reiser4-for-4.14.1.patch 2.4 MB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095809680978098809981008101810281038104810581068107810881098110811181128113811481158116811781188119812081218122812381248125812681278128812981308131813281338134813581368137813881398140814181428143814481458146814781488149815081518152815381548155815681578158815981608161816281638164816581668167816881698170817181728173817481758176817781788179818081818182818381848185818681878188818981908191819281938194819581968197819881998200820182028203820482058206820782088209821082118212821382148215821682178218821982208221822282238224822582268227822882298230823182328233823482358236823782388239824082418242824382448245824682478248824982508251825282538254825582568257825882598260826182628263826482658266826782688269827082718272827382748275827682778278827982808281828282838284828582868287828882898290829182928293829482958296829782988299830083018302830383048305830683078308830983108311831283138314831583168317831883198320832183228323832483258326832783288329833083318332833383348335833683378338833983408341834283438344834583468347834883498350835183528353835483558356835783588359836083618362836383648365836683678368836983708371837283738374837583768377837883798380838183828383838483858386838783888389839083918392839383948395839683978398839984008401840284038404840584068407840884098410841184128413841484158416841784188419842084218422842384248425842684278428842984308431843284338434843584368437843884398440844184428443844484458446844784488449845084518452845384548455845684578458845984608461846284638464846584668467846884698470847184728473847484758476847784788479848084818482848384848485848684878488848984908491849284938494849584968497849884998500850185028503850485058506850785088509851085118512851385148515851685178518851985208521852285238524852585268527852885298530853185328533853485358536853785388539854085418542854385448545854685478548854985508551855285538554855585568557855885598560856185628563856485658566856785688569857085718572857385748575857685778578857985808581858285838584858585868587858885898590859185928593859485958596859785988599860086018602860386048605860686078608860986108611861286138614861586168617861886198620862186228623862486258626862786288629863086318632863386348635863686378638863986408641864286438644864586468647864886498650865186528653865486558656865786588659866086618662866386648665866686678668866986708671867286738674867586768677867886798680868186828683868486858686868786888689869086918692869386948695869686978698869987008701870287038704870587068707870887098710871187128713871487158716871787188719872087218722872387248725872687278728872987308731873287338734873587368737873887398740874187428743874487458746874787488749875087518752875387548755875687578758875987608761876287638764876587668767876887698770877187728773877487758776877787788779878087818782878387848785878687878788878987908791879287938794879587968797879887998800880188028803880488058806880788088809881088118812881388148815881688178818881988208821882288238824882588268827882888298830883188328833883488358836883788388839884088418842884388448845884688478848884988508851885288538854885588568857885888598860886188628863886488658866886788688869887088718872887388748875887688778878887988808881888288838884888588868887888888898890889188928893889488958896889788988899890089018902890389048905890689078908890989108911891289138914891589168917891889198920892189228923892489258926892789288929893089318932893389348935893689378938893989408941894289438944894589468947894889498950895189528953895489558956895789588959896089618962896389648965896689678968896989708971897289738974897589768977897889798980898189828983898489858986898789888989899089918992899389948995899689978998899990009001900290039004900590069007900890099010901190129013901490159016901790189019902090219022902390249025902690279028902990309031903290339034903590369037903890399040904190429043904490459046904790489049905090519052905390549055905690579058905990609061906290639064906590669067906890699070907190729073907490759076907790789079908090819082908390849085908690879088908990909091909290939094909590969097909890999100910191029103910491059106910791089109911091119112911391149115911691179118911991209121912291239124912591269127912891299130913191329133913491359136913791389139914091419142914391449145914691479148914991509151915291539154915591569157915891599160916191629163916491659166916791689169917091719172917391749175917691779178917991809181918291839184918591869187918891899190919191929193919491959196919791989199920092019202920392049205920692079208920992109211921292139214921592169217921892199220922192229223922492259226922792289229923092319232923392349235923692379238923992409241924292439244924592469247924892499250925192529253925492559256925792589259926092619262926392649265926692679268926992709271927292739274927592769277927892799280928192829283928492859286928792889289929092919292929392949295929692979298929993009301930293039304930593069307930893099310931193129313931493159316931793189319932093219322932393249325932693279328932993309331933293339334933593369337933893399340934193429343934493459346934793489349935093519352935393549355935693579358935993609361936293639364936593669367936893699370937193729373937493759376937793789379938093819382938393849385938693879388938993909391939293939394939593969397939893999400940194029403940494059406940794089409941094119412941394149415941694179418941994209421942294239424942594269427942894299430943194329433943494359436943794389439944094419442944394449445944694479448944994509451945294539454945594569457945894599460946194629463946494659466946794689469947094719472947394749475947694779478947994809481948294839484948594869487948894899490949194929493949494959496949794989499950095019502950395049505950695079508950995109511951295139514951595169517951895199520952195229523952495259526952795289529953095319532953395349535953695379538953995409541954295439544954595469547954895499550955195529553955495559556955795589559956095619562956395649565956695679568956995709571957295739574957595769577957895799580958195829583958495859586958795889589959095919592959395949595959695979598959996009601960296039604960596069607960896099610961196129613961496159616961796189619962096219622962396249625962696279628962996309631963296339634963596369637963896399640964196429643964496459646964796489649965096519652965396549655965696579658965996609661966296639664966596669667966896699670967196729673967496759676967796789679968096819682968396849685968696879688968996909691969296939694969596969697969896999700970197029703970497059706970797089709971097119712971397149715971697179718971997209721972297239724972597269727972897299730973197329733973497359736973797389739974097419742974397449745974697479748974997509751975297539754975597569757975897599760976197629763976497659766976797689769977097719772977397749775977697779778977997809781978297839784978597869787978897899790979197929793979497959796979797989799980098019802980398049805980698079808980998109811981298139814981598169817981898199820982198229823982498259826982798289829983098319832983398349835983698379838983998409841984298439844984598469847984898499850985198529853985498559856985798589859986098619862986398649865986698679868986998709871987298739874987598769877987898799880988198829883988498859886988798889889989098919892989398949895989698979898989999009901990299039904990599069907990899099910991199129913991499159916991799189919992099219922992399249925992699279928992999309931993299339934993599369937993899399940994199429943994499459946994799489949995099519952995399549955995699579958995999609961996299639964996599669967996899699970997199729973997499759976997799789979998099819982998399849985998699879988998999909991999299939994999599969997999899991000010001100021000310004100051000610007100081000910010100111001210013100141001510016100171001810019100201002110022100231002410025100261002710028100291003010031100321003310034100351003610037100381003910040100411004210043100441004510046100471004810049100501005110052100531005410055100561005710058100591006010061100621006310064100651006610067100681006910070100711007210073100741007510076100771007810079100801008110082100831008410085100861008710088100891009010091100921009310094100951009610097100981009910100101011010210103101041010510106101071010810109101101011110112101131011410115101161011710118101191012010121101221012310124101251012610127101281012910130101311013210133101341013510136101371013810139101401014110142101431014410145101461014710148101491015010151101521015310154101551015610157101581015910160101611016210163101641016510166101671016810169101701017110172101731017410175101761017710178101791018010181101821018310184101851018610187101881018910190101911019210193101941019510196101971019810199102001020110202102031020410205102061020710208102091021010211102121021310214102151021610217102181021910220102211022210223102241022510226102271022810229102301023110232102331023410235102361023710238102391024010241102421024310244102451024610247102481024910250102511025210253102541025510256102571025810259102601026110262102631026410265102661026710268102691027010271102721027310274102751027610277102781027910280102811028210283102841028510286102871028810289102901029110292102931029410295102961029710298102991030010301103021030310304103051030610307103081030910310103111031210313103141031510316103171031810319103201032110322103231032410325103261032710328103291033010331103321033310334103351033610337103381033910340103411034210343103441034510346103471034810349103501035110352103531035410355103561035710358103591036010361103621036310364103651036610367103681036910370103711037210373103741037510376103771037810379103801038110382103831038410385103861038710388103891039010391103921039310394103951039610397103981039910400104011040210403104041040510406104071040810409104101041110412104131041410415104161041710418104191042010421104221042310424104251042610427104281042910430104311043210433104341043510436104371043810439104401044110442104431044410445104461044710448104491045010451104521045310454104551045610457104581045910460104611046210463104641046510466104671046810469104701047110472104731047410475104761047710478104791048010481104821048310484104851048610487104881048910490104911049210493104941049510496104971049810499105001050110502105031050410505105061050710508105091051010511105121051310514105151051610517105181051910520105211052210523105241052510526105271052810529105301053110532105331053410535105361053710538105391054010541105421054310544105451054610547105481054910550105511055210553105541055510556105571055810559105601056110562105631056410565105661056710568105691057010571105721057310574105751057610577105781057910580105811058210583105841058510586105871058810589105901059110592105931059410595105961059710598105991060010601106021060310604106051060610607106081060910610106111061210613106141061510616106171061810619106201062110622106231062410625106261062710628106291063010631106321063310634106351063610637106381063910640106411064210643106441064510646106471064810649106501065110652106531065410655106561065710658106591066010661106621066310664106651066610667106681066910670106711067210673106741067510676106771067810679106801068110682106831068410685106861068710688106891069010691106921069310694106951069610697106981069910700107011070210703107041070510706107071070810709107101071110712107131071410715107161071710718107191072010721107221072310724107251072610727107281072910730107311073210733107341073510736107371073810739107401074110742107431074410745107461074710748107491075010751107521075310754107551075610757107581075910760107611076210763107641076510766107671076810769107701077110772107731077410775107761077710778107791078010781107821078310784107851078610787107881078910790107911079210793107941079510796107971079810799108001080110802108031080410805108061080710808108091081010811108121081310814108151081610817108181081910820108211082210823108241082510826108271082810829108301083110832108331083410835108361083710838108391084010841108421084310844108451084610847108481084910850108511085210853108541085510856108571085810859108601086110862108631086410865108661086710868108691087010871108721087310874108751087610877108781087910880108811088210883108841088510886108871088810889108901089110892108931089410895108961089710898108991090010901109021090310904109051090610907109081090910910109111091210913109141091510916109171091810919109201092110922109231092410925109261092710928109291093010931109321093310934109351093610937109381093910940109411094210943109441094510946109471094810949109501095110952109531095410955109561095710958109591096010961109621096310964109651096610967109681096910970109711097210973109741097510976109771097810979109801098110982109831098410985109861098710988109891099010991109921099310994109951099610997109981099911000110011100211003110041100511006110071100811009110101101111012110131101411015110161101711018110191102011021110221102311024110251102611027110281102911030110311103211033110341103511036110371103811039110401104111042110431104411045110461104711048110491105011051110521105311054110551105611057110581105911060110611106211063110641106511066110671106811069110701107111072110731107411075110761107711078110791108011081110821108311084110851108611087110881108911090110911109211093110941109511096110971109811099111001110111102111031110411105111061110711108111091111011111111121111311114111151111611117111181111911120111211112211123111241112511126111271112811129111301113111132111331113411135111361113711138111391114011141111421114311144111451114611147111481114911150111511115211153111541115511156111571115811159111601116111162111631116411165111661116711168111691117011171111721117311174111751117611177111781117911180111811118211183111841118511186111871118811189111901119111192111931119411195111961119711198111991120011201112021120311204112051120611207112081120911210112111121211213112141121511216112171121811219112201122111222112231122411225112261122711228112291123011231112321123311234112351123611237112381123911240112411124211243112441124511246112471124811249112501125111252112531125411255112561125711258112591126011261112621126311264112651126611267112681126911270112711127211273112741127511276112771127811279112801128111282112831128411285112861128711288112891129011291112921129311294112951129611297112981129911300113011130211303113041130511306113071130811309113101131111312113131131411315113161131711318113191132011321113221132311324113251132611327113281132911330113311133211333113341133511336113371133811339113401134111342113431134411345113461134711348113491135011351113521135311354113551135611357113581135911360113611136211363113641136511366113671136811369113701137111372113731137411375113761137711378113791138011381113821138311384113851138611387113881138911390113911139211393113941139511396113971139811399114001140111402114031140411405114061140711408114091141011411114121141311414114151141611417114181141911420114211142211423114241142511426114271142811429114301143111432114331143411435114361143711438114391144011441114421144311444114451144611447114481144911450114511145211453114541145511456114571145811459114601146111462114631146411465114661146711468114691147011471114721147311474114751147611477114781147911480114811148211483114841148511486114871148811489114901149111492114931149411495114961149711498114991150011501115021150311504115051150611507115081150911510115111151211513115141151511516115171151811519115201152111522115231152411525115261152711528115291153011531115321153311534115351153611537115381153911540115411154211543115441154511546115471154811549115501155111552115531155411555115561155711558115591156011561115621156311564115651156611567115681156911570115711157211573115741157511576115771157811579115801158111582115831158411585115861158711588115891159011591115921159311594115951159611597115981159911600116011160211603116041160511606116071160811609116101161111612116131161411615116161161711618116191162011621116221162311624116251162611627116281162911630116311163211633116341163511636116371163811639116401164111642116431164411645116461164711648116491165011651116521165311654116551165611657116581165911660116611166211663116641166511666116671166811669116701167111672116731167411675116761167711678116791168011681116821168311684116851168611687116881168911690116911169211693116941169511696116971169811699117001170111702117031170411705117061170711708117091171011711117121171311714117151171611717117181171911720117211172211723117241172511726117271172811729117301173111732117331173411735117361173711738117391174011741117421174311744117451174611747117481174911750117511175211753117541175511756117571175811759117601176111762117631176411765117661176711768117691177011771117721177311774117751177611777117781177911780117811178211783117841178511786117871178811789117901179111792117931179411795117961179711798117991180011801118021180311804118051180611807118081180911810118111181211813118141181511816118171181811819118201182111822118231182411825118261182711828118291183011831118321183311834118351183611837118381183911840118411184211843118441184511846118471184811849118501185111852118531185411855118561185711858118591186011861118621186311864118651186611867118681186911870118711187211873118741187511876118771187811879118801188111882118831188411885118861188711888118891189011891118921189311894118951189611897118981189911900119011190211903119041190511906119071190811909119101191111912119131191411915119161191711918119191192011921119221192311924119251192611927119281192911930119311193211933119341193511936119371193811939119401194111942119431194411945119461194711948119491195011951119521195311954119551195611957119581195911960119611196211963119641196511966119671196811969119701197111972119731197411975119761197711978119791198011981119821198311984119851198611987119881198911990119911199211993119941199511996119971199811999120001200112002120031200412005120061200712008120091201012011120121201312014120151201612017120181201912020120211202212023120241202512026120271202812029120301203112032120331203412035120361203712038120391204012041120421204312044120451204612047120481204912050120511205212053120541205512056120571205812059120601206112062120631206412065120661206712068120691207012071120721207312074120751207612077120781207912080120811208212083120841208512086120871208812089120901209112092120931209412095120961209712098120991210012101121021210312104121051210612107121081210912110121111211212113121141211512116121171211812119121201212112122121231212412125121261212712128121291213012131121321213312134121351213612137121381213912140121411214212143121441214512146121471214812149121501215112152121531215412155121561215712158121591216012161121621216312164121651216612167121681216912170121711217212173121741217512176121771217812179121801218112182121831218412185121861218712188121891219012191121921219312194121951219612197121981219912200122011220212203122041220512206122071220812209122101221112212122131221412215122161221712218122191222012221122221222312224122251222612227122281222912230122311223212233122341223512236122371223812239122401224112242122431224412245122461224712248122491225012251122521225312254122551225612257122581225912260122611226212263122641226512266122671226812269122701227112272122731227412275122761227712278122791228012281122821228312284122851228612287122881228912290122911229212293122941229512296122971229812299123001230112302123031230412305123061230712308123091231012311123121231312314123151231612317123181231912320123211232212323123241232512326123271232812329123301233112332123331233412335123361233712338123391234012341123421234312344123451234612347123481234912350123511235212353123541235512356123571235812359123601236112362123631236412365123661236712368123691237012371123721237312374123751237612377123781237912380123811238212383123841238512386123871238812389123901239112392123931239412395123961239712398123991240012401124021240312404124051240612407124081240912410124111241212413124141241512416124171241812419124201242112422124231242412425124261242712428124291243012431124321243312434124351243612437124381243912440124411244212443124441244512446124471244812449124501245112452124531245412455124561245712458124591246012461124621246312464124651246612467124681246912470124711247212473124741247512476124771247812479124801248112482124831248412485124861248712488124891249012491124921249312494124951249612497124981249912500125011250212503125041250512506125071250812509125101251112512125131251412515125161251712518125191252012521125221252312524125251252612527125281252912530125311253212533125341253512536125371253812539125401254112542125431254412545125461254712548125491255012551125521255312554125551255612557125581255912560125611256212563125641256512566125671256812569125701257112572125731257412575125761257712578125791258012581125821258312584125851258612587125881258912590125911259212593125941259512596125971259812599126001260112602126031260412605126061260712608126091261012611126121261312614126151261612617126181261912620126211262212623126241262512626126271262812629126301263112632126331263412635126361263712638126391264012641126421264312644126451264612647126481264912650126511265212653126541265512656126571265812659126601266112662126631266412665126661266712668126691267012671126721267312674126751267612677126781267912680126811268212683126841268512686126871268812689126901269112692126931269412695126961269712698126991270012701127021270312704127051270612707127081270912710127111271212713127141271512716127171271812719127201272112722127231272412725127261272712728127291273012731127321273312734127351273612737127381273912740127411274212743127441274512746127471274812749127501275112752127531275412755127561275712758127591276012761127621276312764127651276612767127681276912770127711277212773127741277512776127771277812779127801278112782127831278412785127861278712788127891279012791127921279312794127951279612797127981279912800128011280212803128041280512806128071280812809128101281112812128131281412815128161281712818128191282012821128221282312824128251282612827128281282912830128311283212833128341283512836128371283812839128401284112842128431284412845128461284712848128491285012851128521285312854128551285612857128581285912860128611286212863128641286512866128671286812869128701287112872128731287412875128761287712878128791288012881128821288312884128851288612887128881288912890128911289212893128941289512896128971289812899129001290112902129031290412905129061290712908129091291012911129121291312914129151291612917129181291912920129211292212923129241292512926129271292812929129301293112932129331293412935129361293712938129391294012941129421294312944129451294612947129481294912950129511295212953129541295512956129571295812959129601296112962129631296412965129661296712968129691297012971129721297312974129751297612977129781297912980129811298212983129841298512986129871298812989129901299112992129931299412995129961299712998129991300013001130021300313004130051300613007130081300913010130111301213013130141301513016130171301813019130201302113022130231302413025130261302713028130291303013031130321303313034130351303613037130381303913040130411304213043130441304513046130471304813049130501305113052130531305413055130561305713058130591306013061130621306313064130651306613067130681306913070130711307213073130741307513076130771307813079130801308113082130831308413085130861308713088130891309013091130921309313094130951309613097130981309913100131011310213103131041310513106131071310813109131101311113112131131311413115131161311713118131191312013121131221312313124131251312613127131281312913130131311313213133131341313513136131371313813139131401314113142131431314413145131461314713148131491315013151131521315313154131551315613157131581315913160131611316213163131641316513166131671316813169131701317113172131731317413175131761317713178131791318013181131821318313184131851318613187131881318913190131911319213193131941319513196131971319813199132001320113202132031320413205132061320713208132091321013211132121321313214132151321613217132181321913220132211322213223132241322513226132271322813229132301323113232132331323413235132361323713238132391324013241132421324313244132451324613247132481324913250132511325213253132541325513256132571325813259132601326113262132631326413265132661326713268132691327013271132721327313274132751327613277132781327913280132811328213283132841328513286132871328813289132901329113292132931329413295132961329713298132991330013301133021330313304133051330613307133081330913310133111331213313133141331513316133171331813319133201332113322133231332413325133261332713328133291333013331133321333313334133351333613337133381333913340133411334213343133441334513346133471334813349133501335113352133531335413355133561335713358133591336013361133621336313364133651336613367133681336913370133711337213373133741337513376133771337813379133801338113382133831338413385133861338713388133891339013391133921339313394133951339613397133981339913400134011340213403134041340513406134071340813409134101341113412134131341413415134161341713418134191342013421134221342313424134251342613427134281342913430134311343213433134341343513436134371343813439134401344113442134431344413445134461344713448134491345013451134521345313454134551345613457134581345913460134611346213463134641346513466134671346813469134701347113472134731347413475134761347713478134791348013481134821348313484134851348613487134881348913490134911349213493134941349513496134971349813499135001350113502135031350413505135061350713508135091351013511135121351313514135151351613517135181351913520135211352213523135241352513526135271352813529135301353113532135331353413535135361353713538135391354013541135421354313544135451354613547135481354913550135511355213553135541355513556135571355813559135601356113562135631356413565135661356713568135691357013571135721357313574135751357613577135781357913580135811358213583135841358513586135871358813589135901359113592135931359413595135961359713598135991360013601136021360313604136051360613607136081360913610136111361213613136141361513616136171361813619136201362113622136231362413625136261362713628136291363013631136321363313634136351363613637136381363913640136411364213643136441364513646136471364813649136501365113652136531365413655136561365713658136591366013661136621366313664136651366613667136681366913670136711367213673136741367513676136771367813679136801368113682136831368413685136861368713688136891369013691136921369313694136951369613697136981369913700137011370213703137041370513706137071370813709137101371113712137131371413715137161371713718137191372013721137221372313724137251372613727137281372913730137311373213733137341373513736137371373813739137401374113742137431374413745137461374713748137491375013751137521375313754137551375613757137581375913760137611376213763137641376513766137671376813769137701377113772137731377413775137761377713778137791378013781137821378313784137851378613787137881378913790137911379213793137941379513796137971379813799138001380113802138031380413805138061380713808138091381013811138121381313814138151381613817138181381913820138211382213823138241382513826138271382813829138301383113832138331383413835138361383713838138391384013841138421384313844138451384613847138481384913850138511385213853138541385513856138571385813859138601386113862138631386413865138661386713868138691387013871138721387313874138751387613877138781387913880138811388213883138841388513886138871388813889138901389113892138931389413895138961389713898138991390013901139021390313904139051390613907139081390913910139111391213913139141391513916139171391813919139201392113922139231392413925139261392713928139291393013931139321393313934139351393613937139381393913940139411394213943139441394513946139471394813949139501395113952139531395413955139561395713958139591396013961139621396313964139651396613967139681396913970139711397213973139741397513976139771397813979139801398113982139831398413985139861398713988139891399013991139921399313994139951399613997139981399914000140011400214003140041400514006140071400814009140101401114012140131401414015140161401714018140191402014021140221402314024140251402614027140281402914030140311403214033140341403514036140371403814039140401404114042140431404414045140461404714048140491405014051140521405314054140551405614057140581405914060140611406214063140641406514066140671406814069140701407114072140731407414075140761407714078140791408014081140821408314084140851408614087140881408914090140911409214093140941409514096140971409814099141001410114102141031410414105141061410714108141091411014111141121411314114141151411614117141181411914120141211412214123141241412514126141271412814129141301413114132141331413414135141361413714138141391414014141141421414314144141451414614147141481414914150141511415214153141541415514156141571415814159141601416114162141631416414165141661416714168141691417014171141721417314174141751417614177141781417914180141811418214183141841418514186141871418814189141901419114192141931419414195141961419714198141991420014201142021420314204142051420614207142081420914210142111421214213142141421514216142171421814219142201422114222142231422414225142261422714228142291423014231142321423314234142351423614237142381423914240142411424214243142441424514246142471424814249142501425114252142531425414255142561425714258142591426014261142621426314264142651426614267142681426914270142711427214273142741427514276142771427814279142801428114282142831428414285142861428714288142891429014291142921429314294142951429614297142981429914300143011430214303143041430514306143071430814309143101431114312143131431414315143161431714318143191432014321143221432314324143251432614327143281432914330143311433214333143341433514336143371433814339143401434114342143431434414345143461434714348143491435014351143521435314354143551435614357143581435914360143611436214363143641436514366143671436814369143701437114372143731437414375143761437714378143791438014381143821438314384143851438614387143881438914390143911439214393143941439514396143971439814399144001440114402144031440414405144061440714408144091441014411144121441314414144151441614417144181441914420144211442214423144241442514426144271442814429144301443114432144331443414435144361443714438144391444014441144421444314444144451444614447144481444914450144511445214453144541445514456144571445814459144601446114462144631446414465144661446714468144691447014471144721447314474144751447614477144781447914480144811448214483144841448514486144871448814489144901449114492144931449414495144961449714498144991450014501145021450314504145051450614507145081450914510145111451214513145141451514516145171451814519145201452114522145231452414525145261452714528145291453014531145321453314534145351453614537145381453914540145411454214543145441454514546145471454814549145501455114552145531455414555145561455714558145591456014561145621456314564145651456614567145681456914570145711457214573145741457514576145771457814579145801458114582145831458414585145861458714588145891459014591145921459314594145951459614597145981459914600146011460214603146041460514606146071460814609146101461114612146131461414615146161461714618146191462014621146221462314624146251462614627146281462914630146311463214633146341463514636146371463814639146401464114642146431464414645146461464714648146491465014651146521465314654146551465614657146581465914660146611466214663146641466514666146671466814669146701467114672146731467414675146761467714678146791468014681146821468314684146851468614687146881468914690146911469214693146941469514696146971469814699147001470114702147031470414705147061470714708147091471014711147121471314714147151471614717147181471914720147211472214723147241472514726147271472814729147301473114732147331473414735147361473714738147391474014741147421474314744147451474614747147481474914750147511475214753147541475514756147571475814759147601476114762147631476414765147661476714768147691477014771147721477314774147751477614777147781477914780147811478214783147841478514786147871478814789147901479114792147931479414795147961479714798147991480014801148021480314804148051480614807148081480914810148111481214813148141481514816148171481814819148201482114822148231482414825148261482714828148291483014831148321483314834148351483614837148381483914840148411484214843148441484514846148471484814849148501485114852148531485414855148561485714858148591486014861148621486314864148651486614867148681486914870148711487214873148741487514876148771487814879148801488114882148831488414885148861488714888148891489014891148921489314894148951489614897148981489914900149011490214903149041490514906149071490814909149101491114912149131491414915149161491714918149191492014921149221492314924149251492614927149281492914930149311493214933149341493514936149371493814939149401494114942149431494414945149461494714948149491495014951149521495314954149551495614957149581495914960149611496214963149641496514966149671496814969149701497114972149731497414975149761497714978149791498014981149821498314984149851498614987149881498914990149911499214993149941499514996149971499814999150001500115002150031500415005150061500715008150091501015011150121501315014150151501615017150181501915020150211502215023150241502515026150271502815029150301503115032150331503415035150361503715038150391504015041150421504315044150451504615047150481504915050150511505215053150541505515056150571505815059150601506115062150631506415065150661506715068150691507015071150721507315074150751507615077150781507915080150811508215083150841508515086150871508815089150901509115092150931509415095150961509715098150991510015101151021510315104151051510615107151081510915110151111511215113151141511515116151171511815119151201512115122151231512415125151261512715128151291513015131151321513315134151351513615137151381513915140151411514215143151441514515146151471514815149151501515115152151531515415155151561515715158151591516015161151621516315164151651516615167151681516915170151711517215173151741517515176151771517815179151801518115182151831518415185151861518715188151891519015191151921519315194151951519615197151981519915200152011520215203152041520515206152071520815209152101521115212152131521415215152161521715218152191522015221152221522315224152251522615227152281522915230152311523215233152341523515236152371523815239152401524115242152431524415245152461524715248152491525015251152521525315254152551525615257152581525915260152611526215263152641526515266152671526815269152701527115272152731527415275152761527715278152791528015281152821528315284152851528615287152881528915290152911529215293152941529515296152971529815299153001530115302153031530415305153061530715308153091531015311153121531315314153151531615317153181531915320153211532215323153241532515326153271532815329153301533115332153331533415335153361533715338153391534015341153421534315344153451534615347153481534915350153511535215353153541535515356153571535815359153601536115362153631536415365153661536715368153691537015371153721537315374153751537615377153781537915380153811538215383153841538515386153871538815389153901539115392153931539415395153961539715398153991540015401154021540315404154051540615407154081540915410154111541215413154141541515416154171541815419154201542115422154231542415425154261542715428154291543015431154321543315434154351543615437154381543915440154411544215443154441544515446154471544815449154501545115452154531545415455154561545715458154591546015461154621546315464154651546615467154681546915470154711547215473154741547515476154771547815479154801548115482154831548415485154861548715488154891549015491154921549315494154951549615497154981549915500155011550215503155041550515506155071550815509155101551115512155131551415515155161551715518155191552015521155221552315524155251552615527155281552915530155311553215533155341553515536155371553815539155401554115542155431554415545155461554715548155491555015551155521555315554155551555615557155581555915560155611556215563155641556515566155671556815569155701557115572155731557415575155761557715578155791558015581155821558315584155851558615587155881558915590155911559215593155941559515596155971559815599156001560115602156031560415605156061560715608156091561015611156121561315614156151561615617156181561915620156211562215623156241562515626156271562815629156301563115632156331563415635156361563715638156391564015641156421564315644156451564615647156481564915650156511565215653156541565515656156571565815659156601566115662156631566415665156661566715668156691567015671156721567315674156751567615677156781567915680156811568215683156841568515686156871568815689156901569115692156931569415695156961569715698156991570015701157021570315704157051570615707157081570915710157111571215713157141571515716157171571815719157201572115722157231572415725157261572715728157291573015731157321573315734157351573615737157381573915740157411574215743157441574515746157471574815749157501575115752157531575415755157561575715758157591576015761157621576315764157651576615767157681576915770157711577215773157741577515776157771577815779157801578115782157831578415785157861578715788157891579015791157921579315794157951579615797157981579915800158011580215803158041580515806158071580815809158101581115812158131581415815158161581715818158191582015821158221582315824158251582615827158281582915830158311583215833158341583515836158371583815839158401584115842158431584415845158461584715848158491585015851158521585315854158551585615857158581585915860158611586215863158641586515866158671586815869158701587115872158731587415875158761587715878158791588015881158821588315884158851588615887158881588915890158911589215893158941589515896158971589815899159001590115902159031590415905159061590715908159091591015911159121591315914159151591615917159181591915920159211592215923159241592515926159271592815929159301593115932159331593415935159361593715938159391594015941159421594315944159451594615947159481594915950159511595215953159541595515956159571595815959159601596115962159631596415965159661596715968159691597015971159721597315974159751597615977159781597915980159811598215983159841598515986159871598815989159901599115992159931599415995159961599715998159991600016001160021600316004160051600616007160081600916010160111601216013160141601516016160171601816019160201602116022160231602416025160261602716028160291603016031160321603316034160351603616037160381603916040160411604216043160441604516046160471604816049160501605116052160531605416055160561605716058160591606016061160621606316064160651606616067160681606916070160711607216073160741607516076160771607816079160801608116082160831608416085160861608716088160891609016091160921609316094160951609616097160981609916100161011610216103161041610516106161071610816109161101611116112161131611416115161161611716118161191612016121161221612316124161251612616127161281612916130161311613216133161341613516136161371613816139161401614116142161431614416145161461614716148161491615016151161521615316154161551615616157161581615916160161611616216163161641616516166161671616816169161701617116172161731617416175161761617716178161791618016181161821618316184161851618616187161881618916190161911619216193161941619516196161971619816199162001620116202162031620416205162061620716208162091621016211162121621316214162151621616217162181621916220162211622216223162241622516226162271622816229162301623116232162331623416235162361623716238162391624016241162421624316244162451624616247162481624916250162511625216253162541625516256162571625816259162601626116262162631626416265162661626716268162691627016271162721627316274162751627616277162781627916280162811628216283162841628516286162871628816289162901629116292162931629416295162961629716298162991630016301163021630316304163051630616307163081630916310163111631216313163141631516316163171631816319163201632116322163231632416325163261632716328163291633016331163321633316334163351633616337163381633916340163411634216343163441634516346163471634816349163501635116352163531635416355163561635716358163591636016361163621636316364163651636616367163681636916370163711637216373163741637516376163771637816379163801638116382163831638416385163861638716388163891639016391163921639316394163951639616397163981639916400164011640216403164041640516406164071640816409164101641116412164131641416415164161641716418164191642016421164221642316424164251642616427164281642916430164311643216433164341643516436164371643816439164401644116442164431644416445164461644716448164491645016451164521645316454164551645616457164581645916460164611646216463164641646516466164671646816469164701647116472164731647416475164761647716478164791648016481164821648316484164851648616487164881648916490164911649216493164941649516496164971649816499165001650116502165031650416505165061650716508165091651016511165121651316514165151651616517165181651916520165211652216523165241652516526165271652816529165301653116532165331653416535165361653716538165391654016541165421654316544165451654616547165481654916550165511655216553165541655516556165571655816559165601656116562165631656416565165661656716568165691657016571165721657316574165751657616577165781657916580165811658216583165841658516586165871658816589165901659116592165931659416595165961659716598165991660016601166021660316604166051660616607166081660916610166111661216613166141661516616166171661816619166201662116622166231662416625166261662716628166291663016631166321663316634166351663616637166381663916640166411664216643166441664516646166471664816649166501665116652166531665416655166561665716658166591666016661166621666316664166651666616667166681666916670166711667216673166741667516676166771667816679166801668116682166831668416685166861668716688166891669016691166921669316694166951669616697166981669916700167011670216703167041670516706167071670816709167101671116712167131671416715167161671716718167191672016721167221672316724167251672616727167281672916730167311673216733167341673516736167371673816739167401674116742167431674416745167461674716748167491675016751167521675316754167551675616757167581675916760167611676216763167641676516766167671676816769167701677116772167731677416775167761677716778167791678016781167821678316784167851678616787167881678916790167911679216793167941679516796167971679816799168001680116802168031680416805168061680716808168091681016811168121681316814168151681616817168181681916820168211682216823168241682516826168271682816829168301683116832168331683416835168361683716838168391684016841168421684316844168451684616847168481684916850168511685216853168541685516856168571685816859168601686116862168631686416865168661686716868168691687016871168721687316874168751687616877168781687916880168811688216883168841688516886168871688816889168901689116892168931689416895168961689716898168991690016901169021690316904169051690616907169081690916910169111691216913169141691516916169171691816919169201692116922169231692416925169261692716928169291693016931169321693316934169351693616937169381693916940169411694216943169441694516946169471694816949169501695116952169531695416955169561695716958169591696016961169621696316964169651696616967169681696916970169711697216973169741697516976169771697816979169801698116982169831698416985169861698716988169891699016991169921699316994169951699616997169981699917000170011700217003170041700517006170071700817009170101701117012170131701417015170161701717018170191702017021170221702317024170251702617027170281702917030170311703217033170341703517036170371703817039170401704117042170431704417045170461704717048170491705017051170521705317054170551705617057170581705917060170611706217063170641706517066170671706817069170701707117072170731707417075170761707717078170791708017081170821708317084170851708617087170881708917090170911709217093170941709517096170971709817099171001710117102171031710417105171061710717108171091711017111171121711317114171151711617117171181711917120171211712217123171241712517126171271712817129171301713117132171331713417135171361713717138171391714017141171421714317144171451714617147171481714917150171511715217153171541715517156171571715817159171601716117162171631716417165171661716717168171691717017171171721717317174171751717617177171781717917180171811718217183171841718517186171871718817189171901719117192171931719417195171961719717198171991720017201172021720317204172051720617207172081720917210172111721217213172141721517216172171721817219172201722117222172231722417225172261722717228172291723017231172321723317234172351723617237172381723917240172411724217243172441724517246172471724817249172501725117252172531725417255172561725717258172591726017261172621726317264172651726617267172681726917270172711727217273172741727517276172771727817279172801728117282172831728417285172861728717288172891729017291172921729317294172951729617297172981729917300173011730217303173041730517306173071730817309173101731117312173131731417315173161731717318173191732017321173221732317324173251732617327173281732917330173311733217333173341733517336173371733817339173401734117342173431734417345173461734717348173491735017351173521735317354173551735617357173581735917360173611736217363173641736517366173671736817369173701737117372173731737417375173761737717378173791738017381173821738317384173851738617387173881738917390173911739217393173941739517396173971739817399174001740117402174031740417405174061740717408174091741017411174121741317414174151741617417174181741917420174211742217423174241742517426174271742817429174301743117432174331743417435174361743717438174391744017441174421744317444174451744617447174481744917450174511745217453174541745517456174571745817459174601746117462174631746417465174661746717468174691747017471174721747317474174751747617477174781747917480174811748217483174841748517486174871748817489174901749117492174931749417495174961749717498174991750017501175021750317504175051750617507175081750917510175111751217513175141751517516175171751817519175201752117522175231752417525175261752717528175291753017531175321753317534175351753617537175381753917540175411754217543175441754517546175471754817549175501755117552175531755417555175561755717558175591756017561175621756317564175651756617567175681756917570175711757217573175741757517576175771757817579175801758117582175831758417585175861758717588175891759017591175921759317594175951759617597175981759917600176011760217603176041760517606176071760817609176101761117612176131761417615176161761717618176191762017621176221762317624176251762617627176281762917630176311763217633176341763517636176371763817639176401764117642176431764417645176461764717648176491765017651176521765317654176551765617657176581765917660176611766217663176641766517666176671766817669176701767117672176731767417675176761767717678176791768017681176821768317684176851768617687176881768917690176911769217693176941769517696176971769817699177001770117702177031770417705177061770717708177091771017711177121771317714177151771617717177181771917720177211772217723177241772517726177271772817729177301773117732177331773417735177361773717738177391774017741177421774317744177451774617747177481774917750177511775217753177541775517756177571775817759177601776117762177631776417765177661776717768177691777017771177721777317774177751777617777177781777917780177811778217783177841778517786177871778817789177901779117792177931779417795177961779717798177991780017801178021780317804178051780617807178081780917810178111781217813178141781517816178171781817819178201782117822178231782417825178261782717828178291783017831178321783317834178351783617837178381783917840178411784217843178441784517846178471784817849178501785117852178531785417855178561785717858178591786017861178621786317864178651786617867178681786917870178711787217873178741787517876178771787817879178801788117882178831788417885178861788717888178891789017891178921789317894178951789617897178981789917900179011790217903179041790517906179071790817909179101791117912179131791417915179161791717918179191792017921179221792317924179251792617927179281792917930179311793217933179341793517936179371793817939179401794117942179431794417945179461794717948179491795017951179521795317954179551795617957179581795917960179611796217963179641796517966179671796817969179701797117972179731797417975179761797717978179791798017981179821798317984179851798617987179881798917990179911799217993179941799517996179971799817999180001800118002180031800418005180061800718008180091801018011180121801318014180151801618017180181801918020180211802218023180241802518026180271802818029180301803118032180331803418035180361803718038180391804018041180421804318044180451804618047180481804918050180511805218053180541805518056180571805818059180601806118062180631806418065180661806718068180691807018071180721807318074180751807618077180781807918080180811808218083180841808518086180871808818089180901809118092180931809418095180961809718098180991810018101181021810318104181051810618107181081810918110181111811218113181141811518116181171811818119181201812118122181231812418125181261812718128181291813018131181321813318134181351813618137181381813918140181411814218143181441814518146181471814818149181501815118152181531815418155181561815718158181591816018161181621816318164181651816618167181681816918170181711817218173181741817518176181771817818179181801818118182181831818418185181861818718188181891819018191181921819318194181951819618197181981819918200182011820218203182041820518206182071820818209182101821118212182131821418215182161821718218182191822018221182221822318224182251822618227182281822918230182311823218233182341823518236182371823818239182401824118242182431824418245182461824718248182491825018251182521825318254182551825618257182581825918260182611826218263182641826518266182671826818269182701827118272182731827418275182761827718278182791828018281182821828318284182851828618287182881828918290182911829218293182941829518296182971829818299183001830118302183031830418305183061830718308183091831018311183121831318314183151831618317183181831918320183211832218323183241832518326183271832818329183301833118332183331833418335183361833718338183391834018341183421834318344183451834618347183481834918350183511835218353183541835518356183571835818359183601836118362183631836418365183661836718368183691837018371183721837318374183751837618377183781837918380183811838218383183841838518386183871838818389183901839118392183931839418395183961839718398183991840018401184021840318404184051840618407184081840918410184111841218413184141841518416184171841818419184201842118422184231842418425184261842718428184291843018431184321843318434184351843618437184381843918440184411844218443184441844518446184471844818449184501845118452184531845418455184561845718458184591846018461184621846318464184651846618467184681846918470184711847218473184741847518476184771847818479184801848118482184831848418485184861848718488184891849018491184921849318494184951849618497184981849918500185011850218503185041850518506185071850818509185101851118512185131851418515185161851718518185191852018521185221852318524185251852618527185281852918530185311853218533185341853518536185371853818539185401854118542185431854418545185461854718548185491855018551185521855318554185551855618557185581855918560185611856218563185641856518566185671856818569185701857118572185731857418575185761857718578185791858018581185821858318584185851858618587185881858918590185911859218593185941859518596185971859818599186001860118602186031860418605186061860718608186091861018611186121861318614186151861618617186181861918620186211862218623186241862518626186271862818629186301863118632186331863418635186361863718638186391864018641186421864318644186451864618647186481864918650186511865218653186541865518656186571865818659186601866118662186631866418665186661866718668186691867018671186721867318674186751867618677186781867918680186811868218683186841868518686186871868818689186901869118692186931869418695186961869718698186991870018701187021870318704187051870618707187081870918710187111871218713187141871518716187171871818719187201872118722187231872418725187261872718728187291873018731187321873318734187351873618737187381873918740187411874218743187441874518746187471874818749187501875118752187531875418755187561875718758187591876018761187621876318764187651876618767187681876918770187711877218773187741877518776187771877818779187801878118782187831878418785187861878718788187891879018791187921879318794187951879618797187981879918800188011880218803188041880518806188071880818809188101881118812188131881418815188161881718818188191882018821188221882318824188251882618827188281882918830188311883218833188341883518836188371883818839188401884118842188431884418845188461884718848188491885018851188521885318854188551885618857188581885918860188611886218863188641886518866188671886818869188701887118872188731887418875188761887718878188791888018881188821888318884188851888618887188881888918890188911889218893188941889518896188971889818899189001890118902189031890418905189061890718908189091891018911189121891318914189151891618917189181891918920189211892218923189241892518926189271892818929189301893118932189331893418935189361893718938189391894018941189421894318944189451894618947189481894918950189511895218953189541895518956189571895818959189601896118962189631896418965189661896718968189691897018971189721897318974189751897618977189781897918980189811898218983189841898518986189871898818989189901899118992189931899418995189961899718998189991900019001190021900319004190051900619007190081900919010190111901219013190141901519016190171901819019190201902119022190231902419025190261902719028190291903019031190321903319034190351903619037190381903919040190411904219043190441904519046190471904819049190501905119052190531905419055190561905719058190591906019061190621906319064190651906619067190681906919070190711907219073190741907519076190771907819079190801908119082190831908419085190861908719088190891909019091190921909319094190951909619097190981909919100191011910219103191041910519106191071910819109191101911119112191131911419115191161911719118191191912019121191221912319124191251912619127191281912919130191311913219133191341913519136191371913819139191401914119142191431914419145191461914719148191491915019151191521915319154191551915619157191581915919160191611916219163191641916519166191671916819169191701917119172191731917419175191761917719178191791918019181191821918319184191851918619187191881918919190191911919219193191941919519196191971919819199192001920119202192031920419205192061920719208192091921019211192121921319214192151921619217192181921919220192211922219223192241922519226192271922819229192301923119232192331923419235192361923719238192391924019241192421924319244192451924619247192481924919250192511925219253192541925519256192571925819259192601926119262192631926419265192661926719268192691927019271192721927319274192751927619277192781927919280192811928219283192841928519286192871928819289192901929119292192931929419295192961929719298192991930019301193021930319304193051930619307193081930919310193111931219313193141931519316193171931819319193201932119322193231932419325193261932719328193291933019331193321933319334193351933619337193381933919340193411934219343193441934519346193471934819349193501935119352193531935419355193561935719358193591936019361193621936319364193651936619367193681936919370193711937219373193741937519376193771937819379193801938119382193831938419385193861938719388193891939019391193921939319394193951939619397193981939919400194011940219403194041940519406194071940819409194101941119412194131941419415194161941719418194191942019421194221942319424194251942619427194281942919430194311943219433194341943519436194371943819439194401944119442194431944419445194461944719448194491945019451194521945319454194551945619457194581945919460194611946219463194641946519466194671946819469194701947119472194731947419475194761947719478194791948019481194821948319484194851948619487194881948919490194911949219493194941949519496194971949819499195001950119502195031950419505195061950719508195091951019511195121951319514195151951619517195181951919520195211952219523195241952519526195271952819529195301953119532195331953419535195361953719538195391954019541195421954319544195451954619547195481954919550195511955219553195541955519556195571955819559195601956119562195631956419565195661956719568195691957019571195721957319574195751957619577195781957919580195811958219583195841958519586195871958819589195901959119592195931959419595195961959719598195991960019601196021960319604196051960619607196081960919610196111961219613196141961519616196171961819619196201962119622196231962419625196261962719628196291963019631196321963319634196351963619637196381963919640196411964219643196441964519646196471964819649196501965119652196531965419655196561965719658196591966019661196621966319664196651966619667196681966919670196711967219673196741967519676196771967819679196801968119682196831968419685196861968719688196891969019691196921969319694196951969619697196981969919700197011970219703197041970519706197071970819709197101971119712197131971419715197161971719718197191972019721197221972319724197251972619727197281972919730197311973219733197341973519736197371973819739197401974119742197431974419745197461974719748197491975019751197521975319754197551975619757197581975919760197611976219763197641976519766197671976819769197701977119772197731977419775197761977719778197791978019781197821978319784197851978619787197881978919790197911979219793197941979519796197971979819799198001980119802198031980419805198061980719808198091981019811198121981319814198151981619817198181981919820198211982219823198241982519826198271982819829198301983119832198331983419835198361983719838198391984019841198421984319844198451984619847198481984919850198511985219853198541985519856198571985819859198601986119862198631986419865198661986719868198691987019871198721987319874198751987619877198781987919880198811988219883198841988519886198871988819889198901989119892198931989419895198961989719898198991990019901199021990319904199051990619907199081990919910199111991219913199141991519916199171991819919199201992119922199231992419925199261992719928199291993019931199321993319934199351993619937199381993919940199411994219943199441994519946199471994819949199501995119952199531995419955199561995719958199591996019961199621996319964199651996619967199681996919970199711997219973199741997519976199771997819979199801998119982199831998419985199861998719988199891999019991199921999319994199951999619997199981999920000200012000220003200042000520006200072000820009200102001120012200132001420015200162001720018200192002020021200222002320024200252002620027200282002920030200312003220033200342003520036200372003820039200402004120042200432004420045200462004720048200492005020051200522005320054200552005620057200582005920060200612006220063200642006520066200672006820069200702007120072200732007420075200762007720078200792008020081200822008320084200852008620087200882008920090200912009220093200942009520096200972009820099201002010120102201032010420105201062010720108201092011020111201122011320114201152011620117201182011920120201212012220123201242012520126201272012820129201302013120132201332013420135201362013720138201392014020141201422014320144201452014620147201482014920150201512015220153201542015520156201572015820159201602016120162201632016420165201662016720168201692017020171201722017320174201752017620177201782017920180201812018220183201842018520186201872018820189201902019120192201932019420195201962019720198201992020020201202022020320204202052020620207202082020920210202112021220213202142021520216202172021820219202202022120222202232022420225202262022720228202292023020231202322023320234202352023620237202382023920240202412024220243202442024520246202472024820249202502025120252202532025420255202562025720258202592026020261202622026320264202652026620267202682026920270202712027220273202742027520276202772027820279202802028120282202832028420285202862028720288202892029020291202922029320294202952029620297202982029920300203012030220303203042030520306203072030820309203102031120312203132031420315203162031720318203192032020321203222032320324203252032620327203282032920330203312033220333203342033520336203372033820339203402034120342203432034420345203462034720348203492035020351203522035320354203552035620357203582035920360203612036220363203642036520366203672036820369203702037120372203732037420375203762037720378203792038020381203822038320384203852038620387203882038920390203912039220393203942039520396203972039820399204002040120402204032040420405204062040720408204092041020411204122041320414204152041620417204182041920420204212042220423204242042520426204272042820429204302043120432204332043420435204362043720438204392044020441204422044320444204452044620447204482044920450204512045220453204542045520456204572045820459204602046120462204632046420465204662046720468204692047020471204722047320474204752047620477204782047920480204812048220483204842048520486204872048820489204902049120492204932049420495204962049720498204992050020501205022050320504205052050620507205082050920510205112051220513205142051520516205172051820519205202052120522205232052420525205262052720528205292053020531205322053320534205352053620537205382053920540205412054220543205442054520546205472054820549205502055120552205532055420555205562055720558205592056020561205622056320564205652056620567205682056920570205712057220573205742057520576205772057820579205802058120582205832058420585205862058720588205892059020591205922059320594205952059620597205982059920600206012060220603206042060520606206072060820609206102061120612206132061420615206162061720618206192062020621206222062320624206252062620627206282062920630206312063220633206342063520636206372063820639206402064120642206432064420645206462064720648206492065020651206522065320654206552065620657206582065920660206612066220663206642066520666206672066820669206702067120672206732067420675206762067720678206792068020681206822068320684206852068620687206882068920690206912069220693206942069520696206972069820699207002070120702207032070420705207062070720708207092071020711207122071320714207152071620717207182071920720207212072220723207242072520726207272072820729207302073120732207332073420735207362073720738207392074020741207422074320744207452074620747207482074920750207512075220753207542075520756207572075820759207602076120762207632076420765207662076720768207692077020771207722077320774207752077620777207782077920780207812078220783207842078520786207872078820789207902079120792207932079420795207962079720798207992080020801208022080320804208052080620807208082080920810208112081220813208142081520816208172081820819208202082120822208232082420825208262082720828208292083020831208322083320834208352083620837208382083920840208412084220843208442084520846208472084820849208502085120852208532085420855208562085720858208592086020861208622086320864208652086620867208682086920870208712087220873208742087520876208772087820879208802088120882208832088420885208862088720888208892089020891208922089320894208952089620897208982089920900209012090220903209042090520906209072090820909209102091120912209132091420915209162091720918209192092020921209222092320924209252092620927209282092920930209312093220933209342093520936209372093820939209402094120942209432094420945209462094720948209492095020951209522095320954209552095620957209582095920960209612096220963209642096520966209672096820969209702097120972209732097420975209762097720978209792098020981209822098320984209852098620987209882098920990209912099220993209942099520996209972099820999210002100121002210032100421005210062100721008210092101021011210122101321014210152101621017210182101921020210212102221023210242102521026210272102821029210302103121032210332103421035210362103721038210392104021041210422104321044210452104621047210482104921050210512105221053210542105521056210572105821059210602106121062210632106421065210662106721068210692107021071210722107321074210752107621077210782107921080210812108221083210842108521086210872108821089210902109121092210932109421095210962109721098210992110021101211022110321104211052110621107211082110921110211112111221113211142111521116211172111821119211202112121122211232112421125211262112721128211292113021131211322113321134211352113621137211382113921140211412114221143211442114521146211472114821149211502115121152211532115421155211562115721158211592116021161211622116321164211652116621167211682116921170211712117221173211742117521176211772117821179211802118121182211832118421185211862118721188211892119021191211922119321194211952119621197211982119921200212012120221203212042120521206212072120821209212102121121212212132121421215212162121721218212192122021221212222122321224212252122621227212282122921230212312123221233212342123521236212372123821239212402124121242212432124421245212462124721248212492125021251212522125321254212552125621257212582125921260212612126221263212642126521266212672126821269212702127121272212732127421275212762127721278212792128021281212822128321284212852128621287212882128921290212912129221293212942129521296212972129821299213002130121302213032130421305213062130721308213092131021311213122131321314213152131621317213182131921320213212132221323213242132521326213272132821329213302133121332213332133421335213362133721338213392134021341213422134321344213452134621347213482134921350213512135221353213542135521356213572135821359213602136121362213632136421365213662136721368213692137021371213722137321374213752137621377213782137921380213812138221383213842138521386213872138821389213902139121392213932139421395213962139721398213992140021401214022140321404214052140621407214082140921410214112141221413214142141521416214172141821419214202142121422214232142421425214262142721428214292143021431214322143321434214352143621437214382143921440214412144221443214442144521446214472144821449214502145121452214532145421455214562145721458214592146021461214622146321464214652146621467214682146921470214712147221473214742147521476214772147821479214802148121482214832148421485214862148721488214892149021491214922149321494214952149621497214982149921500215012150221503215042150521506215072150821509215102151121512215132151421515215162151721518215192152021521215222152321524215252152621527215282152921530215312153221533215342153521536215372153821539215402154121542215432154421545215462154721548215492155021551215522155321554215552155621557215582155921560215612156221563215642156521566215672156821569215702157121572215732157421575215762157721578215792158021581215822158321584215852158621587215882158921590215912159221593215942159521596215972159821599216002160121602216032160421605216062160721608216092161021611216122161321614216152161621617216182161921620216212162221623216242162521626216272162821629216302163121632216332163421635216362163721638216392164021641216422164321644216452164621647216482164921650216512165221653216542165521656216572165821659216602166121662216632166421665216662166721668216692167021671216722167321674216752167621677216782167921680216812168221683216842168521686216872168821689216902169121692216932169421695216962169721698216992170021701217022170321704217052170621707217082170921710217112171221713217142171521716217172171821719217202172121722217232172421725217262172721728217292173021731217322173321734217352173621737217382173921740217412174221743217442174521746217472174821749217502175121752217532175421755217562175721758217592176021761217622176321764217652176621767217682176921770217712177221773217742177521776217772177821779217802178121782217832178421785217862178721788217892179021791217922179321794217952179621797217982179921800218012180221803218042180521806218072180821809218102181121812218132181421815218162181721818218192182021821218222182321824218252182621827218282182921830218312183221833218342183521836218372183821839218402184121842218432184421845218462184721848218492185021851218522185321854218552185621857218582185921860218612186221863218642186521866218672186821869218702187121872218732187421875218762187721878218792188021881218822188321884218852188621887218882188921890218912189221893218942189521896218972189821899219002190121902219032190421905219062190721908219092191021911219122191321914219152191621917219182191921920219212192221923219242192521926219272192821929219302193121932219332193421935219362193721938219392194021941219422194321944219452194621947219482194921950219512195221953219542195521956219572195821959219602196121962219632196421965219662196721968219692197021971219722197321974219752197621977219782197921980219812198221983219842198521986219872198821989219902199121992219932199421995219962199721998219992200022001220022200322004220052200622007220082200922010220112201222013220142201522016220172201822019220202202122022220232202422025220262202722028220292203022031220322203322034220352203622037220382203922040220412204222043220442204522046220472204822049220502205122052220532205422055220562205722058220592206022061220622206322064220652206622067220682206922070220712207222073220742207522076220772207822079220802208122082220832208422085220862208722088220892209022091220922209322094220952209622097220982209922100221012210222103221042210522106221072210822109221102211122112221132211422115221162211722118221192212022121221222212322124221252212622127221282212922130221312213222133221342213522136221372213822139221402214122142221432214422145221462214722148221492215022151221522215322154221552215622157221582215922160221612216222163221642216522166221672216822169221702217122172221732217422175221762217722178221792218022181221822218322184221852218622187221882218922190221912219222193221942219522196221972219822199222002220122202222032220422205222062220722208222092221022211222122221322214222152221622217222182221922220222212222222223222242222522226222272222822229222302223122232222332223422235222362223722238222392224022241222422224322244222452224622247222482224922250222512225222253222542225522256222572225822259222602226122262222632226422265222662226722268222692227022271222722227322274222752227622277222782227922280222812228222283222842228522286222872228822289222902229122292222932229422295222962229722298222992230022301223022230322304223052230622307223082230922310223112231222313223142231522316223172231822319223202232122322223232232422325223262232722328223292233022331223322233322334223352233622337223382233922340223412234222343223442234522346223472234822349223502235122352223532235422355223562235722358223592236022361223622236322364223652236622367223682236922370223712237222373223742237522376223772237822379223802238122382223832238422385223862238722388223892239022391223922239322394223952239622397223982239922400224012240222403224042240522406224072240822409224102241122412224132241422415224162241722418224192242022421224222242322424224252242622427224282242922430224312243222433224342243522436224372243822439224402244122442224432244422445224462244722448224492245022451224522245322454224552245622457224582245922460224612246222463224642246522466224672246822469224702247122472224732247422475224762247722478224792248022481224822248322484224852248622487224882248922490224912249222493224942249522496224972249822499225002250122502225032250422505225062250722508225092251022511225122251322514225152251622517225182251922520225212252222523225242252522526225272252822529225302253122532225332253422535225362253722538225392254022541225422254322544225452254622547225482254922550225512255222553225542255522556225572255822559225602256122562225632256422565225662256722568225692257022571225722257322574225752257622577225782257922580225812258222583225842258522586225872258822589225902259122592225932259422595225962259722598225992260022601226022260322604226052260622607226082260922610226112261222613226142261522616226172261822619226202262122622226232262422625226262262722628226292263022631226322263322634226352263622637226382263922640226412264222643226442264522646226472264822649226502265122652226532265422655226562265722658226592266022661226622266322664226652266622667226682266922670226712267222673226742267522676226772267822679226802268122682226832268422685226862268722688226892269022691226922269322694226952269622697226982269922700227012270222703227042270522706227072270822709227102271122712227132271422715227162271722718227192272022721227222272322724227252272622727227282272922730227312273222733227342273522736227372273822739227402274122742227432274422745227462274722748227492275022751227522275322754227552275622757227582275922760227612276222763227642276522766227672276822769227702277122772227732277422775227762277722778227792278022781227822278322784227852278622787227882278922790227912279222793227942279522796227972279822799228002280122802228032280422805228062280722808228092281022811228122281322814228152281622817228182281922820228212282222823228242282522826228272282822829228302283122832228332283422835228362283722838228392284022841228422284322844228452284622847228482284922850228512285222853228542285522856228572285822859228602286122862228632286422865228662286722868228692287022871228722287322874228752287622877228782287922880228812288222883228842288522886228872288822889228902289122892228932289422895228962289722898228992290022901229022290322904229052290622907229082290922910229112291222913229142291522916229172291822919229202292122922229232292422925229262292722928229292293022931229322293322934229352293622937229382293922940229412294222943229442294522946229472294822949229502295122952229532295422955229562295722958229592296022961229622296322964229652296622967229682296922970229712297222973229742297522976229772297822979229802298122982229832298422985229862298722988229892299022991229922299322994229952299622997229982299923000230012300223003230042300523006230072300823009230102301123012230132301423015230162301723018230192302023021230222302323024230252302623027230282302923030230312303223033230342303523036230372303823039230402304123042230432304423045230462304723048230492305023051230522305323054230552305623057230582305923060230612306223063230642306523066230672306823069230702307123072230732307423075230762307723078230792308023081230822308323084230852308623087230882308923090230912309223093230942309523096230972309823099231002310123102231032310423105231062310723108231092311023111231122311323114231152311623117231182311923120231212312223123231242312523126231272312823129231302313123132231332313423135231362313723138231392314023141231422314323144231452314623147231482314923150231512315223153231542315523156231572315823159231602316123162231632316423165231662316723168231692317023171231722317323174231752317623177231782317923180231812318223183231842318523186231872318823189231902319123192231932319423195231962319723198231992320023201232022320323204232052320623207232082320923210232112321223213232142321523216232172321823219232202322123222232232322423225232262322723228232292323023231232322323323234232352323623237232382323923240232412324223243232442324523246232472324823249232502325123252232532325423255232562325723258232592326023261232622326323264232652326623267232682326923270232712327223273232742327523276232772327823279232802328123282232832328423285232862328723288232892329023291232922329323294232952329623297232982329923300233012330223303233042330523306233072330823309233102331123312233132331423315233162331723318233192332023321233222332323324233252332623327233282332923330233312333223333233342333523336233372333823339233402334123342233432334423345233462334723348233492335023351233522335323354233552335623357233582335923360233612336223363233642336523366233672336823369233702337123372233732337423375233762337723378233792338023381233822338323384233852338623387233882338923390233912339223393233942339523396233972339823399234002340123402234032340423405234062340723408234092341023411234122341323414234152341623417234182341923420234212342223423234242342523426234272342823429234302343123432234332343423435234362343723438234392344023441234422344323444234452344623447234482344923450234512345223453234542345523456234572345823459234602346123462234632346423465234662346723468234692347023471234722347323474234752347623477234782347923480234812348223483234842348523486234872348823489234902349123492234932349423495234962349723498234992350023501235022350323504235052350623507235082350923510235112351223513235142351523516235172351823519235202352123522235232352423525235262352723528235292353023531235322353323534235352353623537235382353923540235412354223543235442354523546235472354823549235502355123552235532355423555235562355723558235592356023561235622356323564235652356623567235682356923570235712357223573235742357523576235772357823579235802358123582235832358423585235862358723588235892359023591235922359323594235952359623597235982359923600236012360223603236042360523606236072360823609236102361123612236132361423615236162361723618236192362023621236222362323624236252362623627236282362923630236312363223633236342363523636236372363823639236402364123642236432364423645236462364723648236492365023651236522365323654236552365623657236582365923660236612366223663236642366523666236672366823669236702367123672236732367423675236762367723678236792368023681236822368323684236852368623687236882368923690236912369223693236942369523696236972369823699237002370123702237032370423705237062370723708237092371023711237122371323714237152371623717237182371923720237212372223723237242372523726237272372823729237302373123732237332373423735237362373723738237392374023741237422374323744237452374623747237482374923750237512375223753237542375523756237572375823759237602376123762237632376423765237662376723768237692377023771237722377323774237752377623777237782377923780237812378223783237842378523786237872378823789237902379123792237932379423795237962379723798237992380023801238022380323804238052380623807238082380923810238112381223813238142381523816238172381823819238202382123822238232382423825238262382723828238292383023831238322383323834238352383623837238382383923840238412384223843238442384523846238472384823849238502385123852238532385423855238562385723858238592386023861238622386323864238652386623867238682386923870238712387223873238742387523876238772387823879238802388123882238832388423885238862388723888238892389023891238922389323894238952389623897238982389923900239012390223903239042390523906239072390823909239102391123912239132391423915239162391723918239192392023921239222392323924239252392623927239282392923930239312393223933239342393523936239372393823939239402394123942239432394423945239462394723948239492395023951239522395323954239552395623957239582395923960239612396223963239642396523966239672396823969239702397123972239732397423975239762397723978239792398023981239822398323984239852398623987239882398923990239912399223993239942399523996239972399823999240002400124002240032400424005240062400724008240092401024011240122401324014240152401624017240182401924020240212402224023240242402524026240272402824029240302403124032240332403424035240362403724038240392404024041240422404324044240452404624047240482404924050240512405224053240542405524056240572405824059240602406124062240632406424065240662406724068240692407024071240722407324074240752407624077240782407924080240812408224083240842408524086240872408824089240902409124092240932409424095240962409724098240992410024101241022410324104241052410624107241082410924110241112411224113241142411524116241172411824119241202412124122241232412424125241262412724128241292413024131241322413324134241352413624137241382413924140241412414224143241442414524146241472414824149241502415124152241532415424155241562415724158241592416024161241622416324164241652416624167241682416924170241712417224173241742417524176241772417824179241802418124182241832418424185241862418724188241892419024191241922419324194241952419624197241982419924200242012420224203242042420524206242072420824209242102421124212242132421424215242162421724218242192422024221242222422324224242252422624227242282422924230242312423224233242342423524236242372423824239242402424124242242432424424245242462424724248242492425024251242522425324254242552425624257242582425924260242612426224263242642426524266242672426824269242702427124272242732427424275242762427724278242792428024281242822428324284242852428624287242882428924290242912429224293242942429524296242972429824299243002430124302243032430424305243062430724308243092431024311243122431324314243152431624317243182431924320243212432224323243242432524326243272432824329243302433124332243332433424335243362433724338243392434024341243422434324344243452434624347243482434924350243512435224353243542435524356243572435824359243602436124362243632436424365243662436724368243692437024371243722437324374243752437624377243782437924380243812438224383243842438524386243872438824389243902439124392243932439424395243962439724398243992440024401244022440324404244052440624407244082440924410244112441224413244142441524416244172441824419244202442124422244232442424425244262442724428244292443024431244322443324434244352443624437244382443924440244412444224443244442444524446244472444824449244502445124452244532445424455244562445724458244592446024461244622446324464244652446624467244682446924470244712447224473244742447524476244772447824479244802448124482244832448424485244862448724488244892449024491244922449324494244952449624497244982449924500245012450224503245042450524506245072450824509245102451124512245132451424515245162451724518245192452024521245222452324524245252452624527245282452924530245312453224533245342453524536245372453824539245402454124542245432454424545245462454724548245492455024551245522455324554245552455624557245582455924560245612456224563245642456524566245672456824569245702457124572245732457424575245762457724578245792458024581245822458324584245852458624587245882458924590245912459224593245942459524596245972459824599246002460124602246032460424605246062460724608246092461024611246122461324614246152461624617246182461924620246212462224623246242462524626246272462824629246302463124632246332463424635246362463724638246392464024641246422464324644246452464624647246482464924650246512465224653246542465524656246572465824659246602466124662246632466424665246662466724668246692467024671246722467324674246752467624677246782467924680246812468224683246842468524686246872468824689246902469124692246932469424695246962469724698246992470024701247022470324704247052470624707247082470924710247112471224713247142471524716247172471824719247202472124722247232472424725247262472724728247292473024731247322473324734247352473624737247382473924740247412474224743247442474524746247472474824749247502475124752247532475424755247562475724758247592476024761247622476324764247652476624767247682476924770247712477224773247742477524776247772477824779247802478124782247832478424785247862478724788247892479024791247922479324794247952479624797247982479924800248012480224803248042480524806248072480824809248102481124812248132481424815248162481724818248192482024821248222482324824248252482624827248282482924830248312483224833248342483524836248372483824839248402484124842248432484424845248462484724848248492485024851248522485324854248552485624857248582485924860248612486224863248642486524866248672486824869248702487124872248732487424875248762487724878248792488024881248822488324884248852488624887248882488924890248912489224893248942489524896248972489824899249002490124902249032490424905249062490724908249092491024911249122491324914249152491624917249182491924920249212492224923249242492524926249272492824929249302493124932249332493424935249362493724938249392494024941249422494324944249452494624947249482494924950249512495224953249542495524956249572495824959249602496124962249632496424965249662496724968249692497024971249722497324974249752497624977249782497924980249812498224983249842498524986249872498824989249902499124992249932499424995249962499724998249992500025001250022500325004250052500625007250082500925010250112501225013250142501525016250172501825019250202502125022250232502425025250262502725028250292503025031250322503325034250352503625037250382503925040250412504225043250442504525046250472504825049250502505125052250532505425055250562505725058250592506025061250622506325064250652506625067250682506925070250712507225073250742507525076250772507825079250802508125082250832508425085250862508725088250892509025091250922509325094250952509625097250982509925100251012510225103251042510525106251072510825109251102511125112251132511425115251162511725118251192512025121251222512325124251252512625127251282512925130251312513225133251342513525136251372513825139251402514125142251432514425145251462514725148251492515025151251522515325154251552515625157251582515925160251612516225163251642516525166251672516825169251702517125172251732517425175251762517725178251792518025181251822518325184251852518625187251882518925190251912519225193251942519525196251972519825199252002520125202252032520425205252062520725208252092521025211252122521325214252152521625217252182521925220252212522225223252242522525226252272522825229252302523125232252332523425235252362523725238252392524025241252422524325244252452524625247252482524925250252512525225253252542525525256252572525825259252602526125262252632526425265252662526725268252692527025271252722527325274252752527625277252782527925280252812528225283252842528525286252872528825289252902529125292252932529425295252962529725298252992530025301253022530325304253052530625307253082530925310253112531225313253142531525316253172531825319253202532125322253232532425325253262532725328253292533025331253322533325334253352533625337253382533925340253412534225343253442534525346253472534825349253502535125352253532535425355253562535725358253592536025361253622536325364253652536625367253682536925370253712537225373253742537525376253772537825379253802538125382253832538425385253862538725388253892539025391253922539325394253952539625397253982539925400254012540225403254042540525406254072540825409254102541125412254132541425415254162541725418254192542025421254222542325424254252542625427254282542925430254312543225433254342543525436254372543825439254402544125442254432544425445254462544725448254492545025451254522545325454254552545625457254582545925460254612546225463254642546525466254672546825469254702547125472254732547425475254762547725478254792548025481254822548325484254852548625487254882548925490254912549225493254942549525496254972549825499255002550125502255032550425505255062550725508255092551025511255122551325514255152551625517255182551925520255212552225523255242552525526255272552825529255302553125532255332553425535255362553725538255392554025541255422554325544255452554625547255482554925550255512555225553255542555525556255572555825559255602556125562255632556425565255662556725568255692557025571255722557325574255752557625577255782557925580255812558225583255842558525586255872558825589255902559125592255932559425595255962559725598255992560025601256022560325604256052560625607256082560925610256112561225613256142561525616256172561825619256202562125622256232562425625256262562725628256292563025631256322563325634256352563625637256382563925640256412564225643256442564525646256472564825649256502565125652256532565425655256562565725658256592566025661256622566325664256652566625667256682566925670256712567225673256742567525676256772567825679256802568125682256832568425685256862568725688256892569025691256922569325694256952569625697256982569925700257012570225703257042570525706257072570825709257102571125712257132571425715257162571725718257192572025721257222572325724257252572625727257282572925730257312573225733257342573525736257372573825739257402574125742257432574425745257462574725748257492575025751257522575325754257552575625757257582575925760257612576225763257642576525766257672576825769257702577125772257732577425775257762577725778257792578025781257822578325784257852578625787257882578925790257912579225793257942579525796257972579825799258002580125802258032580425805258062580725808258092581025811258122581325814258152581625817258182581925820258212582225823258242582525826258272582825829258302583125832258332583425835258362583725838258392584025841258422584325844258452584625847258482584925850258512585225853258542585525856258572585825859258602586125862258632586425865258662586725868258692587025871258722587325874258752587625877258782587925880258812588225883258842588525886258872588825889258902589125892258932589425895258962589725898258992590025901259022590325904259052590625907259082590925910259112591225913259142591525916259172591825919259202592125922259232592425925259262592725928259292593025931259322593325934259352593625937259382593925940259412594225943259442594525946259472594825949259502595125952259532595425955259562595725958259592596025961259622596325964259652596625967259682596925970259712597225973259742597525976259772597825979259802598125982259832598425985259862598725988259892599025991259922599325994259952599625997259982599926000260012600226003260042600526006260072600826009260102601126012260132601426015260162601726018260192602026021260222602326024260252602626027260282602926030260312603226033260342603526036260372603826039260402604126042260432604426045260462604726048260492605026051260522605326054260552605626057260582605926060260612606226063260642606526066260672606826069260702607126072260732607426075260762607726078260792608026081260822608326084260852608626087260882608926090260912609226093260942609526096260972609826099261002610126102261032610426105261062610726108261092611026111261122611326114261152611626117261182611926120261212612226123261242612526126261272612826129261302613126132261332613426135261362613726138261392614026141261422614326144261452614626147261482614926150261512615226153261542615526156261572615826159261602616126162261632616426165261662616726168261692617026171261722617326174261752617626177261782617926180261812618226183261842618526186261872618826189261902619126192261932619426195261962619726198261992620026201262022620326204262052620626207262082620926210262112621226213262142621526216262172621826219262202622126222262232622426225262262622726228262292623026231262322623326234262352623626237262382623926240262412624226243262442624526246262472624826249262502625126252262532625426255262562625726258262592626026261262622626326264262652626626267262682626926270262712627226273262742627526276262772627826279262802628126282262832628426285262862628726288262892629026291262922629326294262952629626297262982629926300263012630226303263042630526306263072630826309263102631126312263132631426315263162631726318263192632026321263222632326324263252632626327263282632926330263312633226333263342633526336263372633826339263402634126342263432634426345263462634726348263492635026351263522635326354263552635626357263582635926360263612636226363263642636526366263672636826369263702637126372263732637426375263762637726378263792638026381263822638326384263852638626387263882638926390263912639226393263942639526396263972639826399264002640126402264032640426405264062640726408264092641026411264122641326414264152641626417264182641926420264212642226423264242642526426264272642826429264302643126432264332643426435264362643726438264392644026441264422644326444264452644626447264482644926450264512645226453264542645526456264572645826459264602646126462264632646426465264662646726468264692647026471264722647326474264752647626477264782647926480264812648226483264842648526486264872648826489264902649126492264932649426495264962649726498264992650026501265022650326504265052650626507265082650926510265112651226513265142651526516265172651826519265202652126522265232652426525265262652726528265292653026531265322653326534265352653626537265382653926540265412654226543265442654526546265472654826549265502655126552265532655426555265562655726558265592656026561265622656326564265652656626567265682656926570265712657226573265742657526576265772657826579265802658126582265832658426585265862658726588265892659026591265922659326594265952659626597265982659926600266012660226603266042660526606266072660826609266102661126612266132661426615266162661726618266192662026621266222662326624266252662626627266282662926630266312663226633266342663526636266372663826639266402664126642266432664426645266462664726648266492665026651266522665326654266552665626657266582665926660266612666226663266642666526666266672666826669266702667126672266732667426675266762667726678266792668026681266822668326684266852668626687266882668926690266912669226693266942669526696266972669826699267002670126702267032670426705267062670726708267092671026711267122671326714267152671626717267182671926720267212672226723267242672526726267272672826729267302673126732267332673426735267362673726738267392674026741267422674326744267452674626747267482674926750267512675226753267542675526756267572675826759267602676126762267632676426765267662676726768267692677026771267722677326774267752677626777267782677926780267812678226783267842678526786267872678826789267902679126792267932679426795267962679726798267992680026801268022680326804268052680626807268082680926810268112681226813268142681526816268172681826819268202682126822268232682426825268262682726828268292683026831268322683326834268352683626837268382683926840268412684226843268442684526846268472684826849268502685126852268532685426855268562685726858268592686026861268622686326864268652686626867268682686926870268712687226873268742687526876268772687826879268802688126882268832688426885268862688726888268892689026891268922689326894268952689626897268982689926900269012690226903269042690526906269072690826909269102691126912269132691426915269162691726918269192692026921269222692326924269252692626927269282692926930269312693226933269342693526936269372693826939269402694126942269432694426945269462694726948269492695026951269522695326954269552695626957269582695926960269612696226963269642696526966269672696826969269702697126972269732697426975269762697726978269792698026981269822698326984269852698626987269882698926990269912699226993269942699526996269972699826999270002700127002270032700427005270062700727008270092701027011270122701327014270152701627017270182701927020270212702227023270242702527026270272702827029270302703127032270332703427035270362703727038270392704027041270422704327044270452704627047270482704927050270512705227053270542705527056270572705827059270602706127062270632706427065270662706727068270692707027071270722707327074270752707627077270782707927080270812708227083270842708527086270872708827089270902709127092270932709427095270962709727098270992710027101271022710327104271052710627107271082710927110271112711227113271142711527116271172711827119271202712127122271232712427125271262712727128271292713027131271322713327134271352713627137271382713927140271412714227143271442714527146271472714827149271502715127152271532715427155271562715727158271592716027161271622716327164271652716627167271682716927170271712717227173271742717527176271772717827179271802718127182271832718427185271862718727188271892719027191271922719327194271952719627197271982719927200272012720227203272042720527206272072720827209272102721127212272132721427215272162721727218272192722027221272222722327224272252722627227272282722927230272312723227233272342723527236272372723827239272402724127242272432724427245272462724727248272492725027251272522725327254272552725627257272582725927260272612726227263272642726527266272672726827269272702727127272272732727427275272762727727278272792728027281272822728327284272852728627287272882728927290272912729227293272942729527296272972729827299273002730127302273032730427305273062730727308273092731027311273122731327314273152731627317273182731927320273212732227323273242732527326273272732827329273302733127332273332733427335273362733727338273392734027341273422734327344273452734627347273482734927350273512735227353273542735527356273572735827359273602736127362273632736427365273662736727368273692737027371273722737327374273752737627377273782737927380273812738227383273842738527386273872738827389273902739127392273932739427395273962739727398273992740027401274022740327404274052740627407274082740927410274112741227413274142741527416274172741827419274202742127422274232742427425274262742727428274292743027431274322743327434274352743627437274382743927440274412744227443274442744527446274472744827449274502745127452274532745427455274562745727458274592746027461274622746327464274652746627467274682746927470274712747227473274742747527476274772747827479274802748127482274832748427485274862748727488274892749027491274922749327494274952749627497274982749927500275012750227503275042750527506275072750827509275102751127512275132751427515275162751727518275192752027521275222752327524275252752627527275282752927530275312753227533275342753527536275372753827539275402754127542275432754427545275462754727548275492755027551275522755327554275552755627557275582755927560275612756227563275642756527566275672756827569275702757127572275732757427575275762757727578275792758027581275822758327584275852758627587275882758927590275912759227593275942759527596275972759827599276002760127602276032760427605276062760727608276092761027611276122761327614276152761627617276182761927620276212762227623276242762527626276272762827629276302763127632276332763427635276362763727638276392764027641276422764327644276452764627647276482764927650276512765227653276542765527656276572765827659276602766127662276632766427665276662766727668276692767027671276722767327674276752767627677276782767927680276812768227683276842768527686276872768827689276902769127692276932769427695276962769727698276992770027701277022770327704277052770627707277082770927710277112771227713277142771527716277172771827719277202772127722277232772427725277262772727728277292773027731277322773327734277352773627737277382773927740277412774227743277442774527746277472774827749277502775127752277532775427755277562775727758277592776027761277622776327764277652776627767277682776927770277712777227773277742777527776277772777827779277802778127782277832778427785277862778727788277892779027791277922779327794277952779627797277982779927800278012780227803278042780527806278072780827809278102781127812278132781427815278162781727818278192782027821278222782327824278252782627827278282782927830278312783227833278342783527836278372783827839278402784127842278432784427845278462784727848278492785027851278522785327854278552785627857278582785927860278612786227863278642786527866278672786827869278702787127872278732787427875278762787727878278792788027881278822788327884278852788627887278882788927890278912789227893278942789527896278972789827899279002790127902279032790427905279062790727908279092791027911279122791327914279152791627917279182791927920279212792227923279242792527926279272792827929279302793127932279332793427935279362793727938279392794027941279422794327944279452794627947279482794927950279512795227953279542795527956279572795827959279602796127962279632796427965279662796727968279692797027971279722797327974279752797627977279782797927980279812798227983279842798527986279872798827989279902799127992279932799427995279962799727998279992800028001280022800328004280052800628007280082800928010280112801228013280142801528016280172801828019280202802128022280232802428025280262802728028280292803028031280322803328034280352803628037280382803928040280412804228043280442804528046280472804828049280502805128052280532805428055280562805728058280592806028061280622806328064280652806628067280682806928070280712807228073280742807528076280772807828079280802808128082280832808428085280862808728088280892809028091280922809328094280952809628097280982809928100281012810228103281042810528106281072810828109281102811128112281132811428115281162811728118281192812028121281222812328124281252812628127281282812928130281312813228133281342813528136281372813828139281402814128142281432814428145281462814728148281492815028151281522815328154281552815628157281582815928160281612816228163281642816528166281672816828169281702817128172281732817428175281762817728178281792818028181281822818328184281852818628187281882818928190281912819228193281942819528196281972819828199282002820128202282032820428205282062820728208282092821028211282122821328214282152821628217282182821928220282212822228223282242822528226282272822828229282302823128232282332823428235282362823728238282392824028241282422824328244282452824628247282482824928250282512825228253282542825528256282572825828259282602826128262282632826428265282662826728268282692827028271282722827328274282752827628277282782827928280282812828228283282842828528286282872828828289282902829128292282932829428295282962829728298282992830028301283022830328304283052830628307283082830928310283112831228313283142831528316283172831828319283202832128322283232832428325283262832728328283292833028331283322833328334283352833628337283382833928340283412834228343283442834528346283472834828349283502835128352283532835428355283562835728358283592836028361283622836328364283652836628367283682836928370283712837228373283742837528376283772837828379283802838128382283832838428385283862838728388283892839028391283922839328394283952839628397283982839928400284012840228403284042840528406284072840828409284102841128412284132841428415284162841728418284192842028421284222842328424284252842628427284282842928430284312843228433284342843528436284372843828439284402844128442284432844428445284462844728448284492845028451284522845328454284552845628457284582845928460284612846228463284642846528466284672846828469284702847128472284732847428475284762847728478284792848028481284822848328484284852848628487284882848928490284912849228493284942849528496284972849828499285002850128502285032850428505285062850728508285092851028511285122851328514285152851628517285182851928520285212852228523285242852528526285272852828529285302853128532285332853428535285362853728538285392854028541285422854328544285452854628547285482854928550285512855228553285542855528556285572855828559285602856128562285632856428565285662856728568285692857028571285722857328574285752857628577285782857928580285812858228583285842858528586285872858828589285902859128592285932859428595285962859728598285992860028601286022860328604286052860628607286082860928610286112861228613286142861528616286172861828619286202862128622286232862428625286262862728628286292863028631286322863328634286352863628637286382863928640286412864228643286442864528646286472864828649286502865128652286532865428655286562865728658286592866028661286622866328664286652866628667286682866928670286712867228673286742867528676286772867828679286802868128682286832868428685286862868728688286892869028691286922869328694286952869628697286982869928700287012870228703287042870528706287072870828709287102871128712287132871428715287162871728718287192872028721287222872328724287252872628727287282872928730287312873228733287342873528736287372873828739287402874128742287432874428745287462874728748287492875028751287522875328754287552875628757287582875928760287612876228763287642876528766287672876828769287702877128772287732877428775287762877728778287792878028781287822878328784287852878628787287882878928790287912879228793287942879528796287972879828799288002880128802288032880428805288062880728808288092881028811288122881328814288152881628817288182881928820288212882228823288242882528826288272882828829288302883128832288332883428835288362883728838288392884028841288422884328844288452884628847288482884928850288512885228853288542885528856288572885828859288602886128862288632886428865288662886728868288692887028871288722887328874288752887628877288782887928880288812888228883288842888528886288872888828889288902889128892288932889428895288962889728898288992890028901289022890328904289052890628907289082890928910289112891228913289142891528916289172891828919289202892128922289232892428925289262892728928289292893028931289322893328934289352893628937289382893928940289412894228943289442894528946289472894828949289502895128952289532895428955289562895728958289592896028961289622896328964289652896628967289682896928970289712897228973289742897528976289772897828979289802898128982289832898428985289862898728988289892899028991289922899328994289952899628997289982899929000290012900229003290042900529006290072900829009290102901129012290132901429015290162901729018290192902029021290222902329024290252902629027290282902929030290312903229033290342903529036290372903829039290402904129042290432904429045290462904729048290492905029051290522905329054290552905629057290582905929060290612906229063290642906529066290672906829069290702907129072290732907429075290762907729078290792908029081290822908329084290852908629087290882908929090290912909229093290942909529096290972909829099291002910129102291032910429105291062910729108291092911029111291122911329114291152911629117291182911929120291212912229123291242912529126291272912829129291302913129132291332913429135291362913729138291392914029141291422914329144291452914629147291482914929150291512915229153291542915529156291572915829159291602916129162291632916429165291662916729168291692917029171291722917329174291752917629177291782917929180291812918229183291842918529186291872918829189291902919129192291932919429195291962919729198291992920029201292022920329204292052920629207292082920929210292112921229213292142921529216292172921829219292202922129222292232922429225292262922729228292292923029231292322923329234292352923629237292382923929240292412924229243292442924529246292472924829249292502925129252292532925429255292562925729258292592926029261292622926329264292652926629267292682926929270292712927229273292742927529276292772927829279292802928129282292832928429285292862928729288292892929029291292922929329294292952929629297292982929929300293012930229303293042930529306293072930829309293102931129312293132931429315293162931729318293192932029321293222932329324293252932629327293282932929330293312933229333293342933529336293372933829339293402934129342293432934429345293462934729348293492935029351293522935329354293552935629357293582935929360293612936229363293642936529366293672936829369293702937129372293732937429375293762937729378293792938029381293822938329384293852938629387293882938929390293912939229393293942939529396293972939829399294002940129402294032940429405294062940729408294092941029411294122941329414294152941629417294182941929420294212942229423294242942529426294272942829429294302943129432294332943429435294362943729438294392944029441294422944329444294452944629447294482944929450294512945229453294542945529456294572945829459294602946129462294632946429465294662946729468294692947029471294722947329474294752947629477294782947929480294812948229483294842948529486294872948829489294902949129492294932949429495294962949729498294992950029501295022950329504295052950629507295082950929510295112951229513295142951529516295172951829519295202952129522295232952429525295262952729528295292953029531295322953329534295352953629537295382953929540295412954229543295442954529546295472954829549295502955129552295532955429555295562955729558295592956029561295622956329564295652956629567295682956929570295712957229573295742957529576295772957829579295802958129582295832958429585295862958729588295892959029591295922959329594295952959629597295982959929600296012960229603296042960529606296072960829609296102961129612296132961429615296162961729618296192962029621296222962329624296252962629627296282962929630296312963229633296342963529636296372963829639296402964129642296432964429645296462964729648296492965029651296522965329654296552965629657296582965929660296612966229663296642966529666296672966829669296702967129672296732967429675296762967729678296792968029681296822968329684296852968629687296882968929690296912969229693296942969529696296972969829699297002970129702297032970429705297062970729708297092971029711297122971329714297152971629717297182971929720297212972229723297242972529726297272972829729297302973129732297332973429735297362973729738297392974029741297422974329744297452974629747297482974929750297512975229753297542975529756297572975829759297602976129762297632976429765297662976729768297692977029771297722977329774297752977629777297782977929780297812978229783297842978529786297872978829789297902979129792297932979429795297962979729798297992980029801298022980329804298052980629807298082980929810298112981229813298142981529816298172981829819298202982129822298232982429825298262982729828298292983029831298322983329834298352983629837298382983929840298412984229843298442984529846298472984829849298502985129852298532985429855298562985729858298592986029861298622986329864298652986629867298682986929870298712987229873298742987529876298772987829879298802988129882298832988429885298862988729888298892989029891298922989329894298952989629897298982989929900299012990229903299042990529906299072990829909299102991129912299132991429915299162991729918299192992029921299222992329924299252992629927299282992929930299312993229933299342993529936299372993829939299402994129942299432994429945299462994729948299492995029951299522995329954299552995629957299582995929960299612996229963299642996529966299672996829969299702997129972299732997429975299762997729978299792998029981299822998329984299852998629987299882998929990299912999229993299942999529996299972999829999300003000130002300033000430005300063000730008300093001030011300123001330014300153001630017300183001930020300213002230023300243002530026300273002830029300303003130032300333003430035300363003730038300393004030041300423004330044300453004630047300483004930050300513005230053300543005530056300573005830059300603006130062300633006430065300663006730068300693007030071300723007330074300753007630077300783007930080300813008230083300843008530086300873008830089300903009130092300933009430095300963009730098300993010030101301023010330104301053010630107301083010930110301113011230113301143011530116301173011830119301203012130122301233012430125301263012730128301293013030131301323013330134301353013630137301383013930140301413014230143301443014530146301473014830149301503015130152301533015430155301563015730158301593016030161301623016330164301653016630167301683016930170301713017230173301743017530176301773017830179301803018130182301833018430185301863018730188301893019030191301923019330194301953019630197301983019930200302013020230203302043020530206302073020830209302103021130212302133021430215302163021730218302193022030221302223022330224302253022630227302283022930230302313023230233302343023530236302373023830239302403024130242302433024430245302463024730248302493025030251302523025330254302553025630257302583025930260302613026230263302643026530266302673026830269302703027130272302733027430275302763027730278302793028030281302823028330284302853028630287302883028930290302913029230293302943029530296302973029830299303003030130302303033030430305303063030730308303093031030311303123031330314303153031630317303183031930320303213032230323303243032530326303273032830329303303033130332303333033430335303363033730338303393034030341303423034330344303453034630347303483034930350303513035230353303543035530356303573035830359303603036130362303633036430365303663036730368303693037030371303723037330374303753037630377303783037930380303813038230383303843038530386303873038830389303903039130392303933039430395303963039730398303993040030401304023040330404304053040630407304083040930410304113041230413304143041530416304173041830419304203042130422304233042430425304263042730428304293043030431304323043330434304353043630437304383043930440304413044230443304443044530446304473044830449304503045130452304533045430455304563045730458304593046030461304623046330464304653046630467304683046930470304713047230473304743047530476304773047830479304803048130482304833048430485304863048730488304893049030491304923049330494304953049630497304983049930500305013050230503305043050530506305073050830509305103051130512305133051430515305163051730518305193052030521305223052330524305253052630527305283052930530305313053230533305343053530536305373053830539305403054130542305433054430545305463054730548305493055030551305523055330554305553055630557305583055930560305613056230563305643056530566305673056830569305703057130572305733057430575305763057730578305793058030581305823058330584305853058630587305883058930590305913059230593305943059530596305973059830599306003060130602306033060430605306063060730608306093061030611306123061330614306153061630617306183061930620306213062230623306243062530626306273062830629306303063130632306333063430635306363063730638306393064030641306423064330644306453064630647306483064930650306513065230653306543065530656306573065830659306603066130662306633066430665306663066730668306693067030671306723067330674306753067630677306783067930680306813068230683306843068530686306873068830689306903069130692306933069430695306963069730698306993070030701307023070330704307053070630707307083070930710307113071230713307143071530716307173071830719307203072130722307233072430725307263072730728307293073030731307323073330734307353073630737307383073930740307413074230743307443074530746307473074830749307503075130752307533075430755307563075730758307593076030761307623076330764307653076630767307683076930770307713077230773307743077530776307773077830779307803078130782307833078430785307863078730788307893079030791307923079330794307953079630797307983079930800308013080230803308043080530806308073080830809308103081130812308133081430815308163081730818308193082030821308223082330824308253082630827308283082930830308313083230833308343083530836308373083830839308403084130842308433084430845308463084730848308493085030851308523085330854308553085630857308583085930860308613086230863308643086530866308673086830869308703087130872308733087430875308763087730878308793088030881308823088330884308853088630887308883088930890308913089230893308943089530896308973089830899309003090130902309033090430905309063090730908309093091030911309123091330914309153091630917309183091930920309213092230923309243092530926309273092830929309303093130932309333093430935309363093730938309393094030941309423094330944309453094630947309483094930950309513095230953309543095530956309573095830959309603096130962309633096430965309663096730968309693097030971309723097330974309753097630977309783097930980309813098230983309843098530986309873098830989309903099130992309933099430995309963099730998309993100031001310023100331004310053100631007310083100931010310113101231013310143101531016310173101831019310203102131022310233102431025310263102731028310293103031031310323103331034310353103631037310383103931040310413104231043310443104531046310473104831049310503105131052310533105431055310563105731058310593106031061310623106331064310653106631067310683106931070310713107231073310743107531076310773107831079310803108131082310833108431085310863108731088310893109031091310923109331094310953109631097310983109931100311013110231103311043110531106311073110831109311103111131112311133111431115311163111731118311193112031121311223112331124311253112631127311283112931130311313113231133311343113531136311373113831139311403114131142311433114431145311463114731148311493115031151311523115331154311553115631157311583115931160311613116231163311643116531166311673116831169311703117131172311733117431175311763117731178311793118031181311823118331184311853118631187311883118931190311913119231193311943119531196311973119831199312003120131202312033120431205312063120731208312093121031211312123121331214312153121631217312183121931220312213122231223312243122531226312273122831229312303123131232312333123431235312363123731238312393124031241312423124331244312453124631247312483124931250312513125231253312543125531256312573125831259312603126131262312633126431265312663126731268312693127031271312723127331274312753127631277312783127931280312813128231283312843128531286312873128831289312903129131292312933129431295312963129731298312993130031301313023130331304313053130631307313083130931310313113131231313313143131531316313173131831319313203132131322313233132431325313263132731328313293133031331313323133331334313353133631337313383133931340313413134231343313443134531346313473134831349313503135131352313533135431355313563135731358313593136031361313623136331364313653136631367313683136931370313713137231373313743137531376313773137831379313803138131382313833138431385313863138731388313893139031391313923139331394313953139631397313983139931400314013140231403314043140531406314073140831409314103141131412314133141431415314163141731418314193142031421314223142331424314253142631427314283142931430314313143231433314343143531436314373143831439314403144131442314433144431445314463144731448314493145031451314523145331454314553145631457314583145931460314613146231463314643146531466314673146831469314703147131472314733147431475314763147731478314793148031481314823148331484314853148631487314883148931490314913149231493314943149531496314973149831499315003150131502315033150431505315063150731508315093151031511315123151331514315153151631517315183151931520315213152231523315243152531526315273152831529315303153131532315333153431535315363153731538315393154031541315423154331544315453154631547315483154931550315513155231553315543155531556315573155831559315603156131562315633156431565315663156731568315693157031571315723157331574315753157631577315783157931580315813158231583315843158531586315873158831589315903159131592315933159431595315963159731598315993160031601316023160331604316053160631607316083160931610316113161231613316143161531616316173161831619316203162131622316233162431625316263162731628316293163031631316323163331634316353163631637316383163931640316413164231643316443164531646316473164831649316503165131652316533165431655316563165731658316593166031661316623166331664316653166631667316683166931670316713167231673316743167531676316773167831679316803168131682316833168431685316863168731688316893169031691316923169331694316953169631697316983169931700317013170231703317043170531706317073170831709317103171131712317133171431715317163171731718317193172031721317223172331724317253172631727317283172931730317313173231733317343173531736317373173831739317403174131742317433174431745317463174731748317493175031751317523175331754317553175631757317583175931760317613176231763317643176531766317673176831769317703177131772317733177431775317763177731778317793178031781317823178331784317853178631787317883178931790317913179231793317943179531796317973179831799318003180131802318033180431805318063180731808318093181031811318123181331814318153181631817318183181931820318213182231823318243182531826318273182831829318303183131832318333183431835318363183731838318393184031841318423184331844318453184631847318483184931850318513185231853318543185531856318573185831859318603186131862318633186431865318663186731868318693187031871318723187331874318753187631877318783187931880318813188231883318843188531886318873188831889318903189131892318933189431895318963189731898318993190031901319023190331904319053190631907319083190931910319113191231913319143191531916319173191831919319203192131922319233192431925319263192731928319293193031931319323193331934319353193631937319383193931940319413194231943319443194531946319473194831949319503195131952319533195431955319563195731958319593196031961319623196331964319653196631967319683196931970319713197231973319743197531976319773197831979319803198131982319833198431985319863198731988319893199031991319923199331994319953199631997319983199932000320013200232003320043200532006320073200832009320103201132012320133201432015320163201732018320193202032021320223202332024320253202632027320283202932030320313203232033320343203532036320373203832039320403204132042320433204432045320463204732048320493205032051320523205332054320553205632057320583205932060320613206232063320643206532066320673206832069320703207132072320733207432075320763207732078320793208032081320823208332084320853208632087320883208932090320913209232093320943209532096320973209832099321003210132102321033210432105321063210732108321093211032111321123211332114321153211632117321183211932120321213212232123321243212532126321273212832129321303213132132321333213432135321363213732138321393214032141321423214332144321453214632147321483214932150321513215232153321543215532156321573215832159321603216132162321633216432165321663216732168321693217032171321723217332174321753217632177321783217932180321813218232183321843218532186321873218832189321903219132192321933219432195321963219732198321993220032201322023220332204322053220632207322083220932210322113221232213322143221532216322173221832219322203222132222322233222432225322263222732228322293223032231322323223332234322353223632237322383223932240322413224232243322443224532246322473224832249322503225132252322533225432255322563225732258322593226032261322623226332264322653226632267322683226932270322713227232273322743227532276322773227832279322803228132282322833228432285322863228732288322893229032291322923229332294322953229632297322983229932300323013230232303323043230532306323073230832309323103231132312323133231432315323163231732318323193232032321323223232332324323253232632327323283232932330323313233232333323343233532336323373233832339323403234132342323433234432345323463234732348323493235032351323523235332354323553235632357323583235932360323613236232363323643236532366323673236832369323703237132372323733237432375323763237732378323793238032381323823238332384323853238632387323883238932390323913239232393323943239532396323973239832399324003240132402324033240432405324063240732408324093241032411324123241332414324153241632417324183241932420324213242232423324243242532426324273242832429324303243132432324333243432435324363243732438324393244032441324423244332444324453244632447324483244932450324513245232453324543245532456324573245832459324603246132462324633246432465324663246732468324693247032471324723247332474324753247632477324783247932480324813248232483324843248532486324873248832489324903249132492324933249432495324963249732498324993250032501325023250332504325053250632507325083250932510325113251232513325143251532516325173251832519325203252132522325233252432525325263252732528325293253032531325323253332534325353253632537325383253932540325413254232543325443254532546325473254832549325503255132552325533255432555325563255732558325593256032561325623256332564325653256632567325683256932570325713257232573325743257532576325773257832579325803258132582325833258432585325863258732588325893259032591325923259332594325953259632597325983259932600326013260232603326043260532606326073260832609326103261132612326133261432615326163261732618326193262032621326223262332624326253262632627326283262932630326313263232633326343263532636326373263832639326403264132642326433264432645326463264732648326493265032651326523265332654326553265632657326583265932660326613266232663326643266532666326673266832669326703267132672326733267432675326763267732678326793268032681326823268332684326853268632687326883268932690326913269232693326943269532696326973269832699327003270132702327033270432705327063270732708327093271032711327123271332714327153271632717327183271932720327213272232723327243272532726327273272832729327303273132732327333273432735327363273732738327393274032741327423274332744327453274632747327483274932750327513275232753327543275532756327573275832759327603276132762327633276432765327663276732768327693277032771327723277332774327753277632777327783277932780327813278232783327843278532786327873278832789327903279132792327933279432795327963279732798327993280032801328023280332804328053280632807328083280932810328113281232813328143281532816328173281832819328203282132822328233282432825328263282732828328293283032831328323283332834328353283632837328383283932840328413284232843328443284532846328473284832849328503285132852328533285432855328563285732858328593286032861328623286332864328653286632867328683286932870328713287232873328743287532876328773287832879328803288132882328833288432885328863288732888328893289032891328923289332894328953289632897328983289932900329013290232903329043290532906329073290832909329103291132912329133291432915329163291732918329193292032921329223292332924329253292632927329283292932930329313293232933329343293532936329373293832939329403294132942329433294432945329463294732948329493295032951329523295332954329553295632957329583295932960329613296232963329643296532966329673296832969329703297132972329733297432975329763297732978329793298032981329823298332984329853298632987329883298932990329913299232993329943299532996329973299832999330003300133002330033300433005330063300733008330093301033011330123301333014330153301633017330183301933020330213302233023330243302533026330273302833029330303303133032330333303433035330363303733038330393304033041330423304333044330453304633047330483304933050330513305233053330543305533056330573305833059330603306133062330633306433065330663306733068330693307033071330723307333074330753307633077330783307933080330813308233083330843308533086330873308833089330903309133092330933309433095330963309733098330993310033101331023310333104331053310633107331083310933110331113311233113331143311533116331173311833119331203312133122331233312433125331263312733128331293313033131331323313333134331353313633137331383313933140331413314233143331443314533146331473314833149331503315133152331533315433155331563315733158331593316033161331623316333164331653316633167331683316933170331713317233173331743317533176331773317833179331803318133182331833318433185331863318733188331893319033191331923319333194331953319633197331983319933200332013320233203332043320533206332073320833209332103321133212332133321433215332163321733218332193322033221332223322333224332253322633227332283322933230332313323233233332343323533236332373323833239332403324133242332433324433245332463324733248332493325033251332523325333254332553325633257332583325933260332613326233263332643326533266332673326833269332703327133272332733327433275332763327733278332793328033281332823328333284332853328633287332883328933290332913329233293332943329533296332973329833299333003330133302333033330433305333063330733308333093331033311333123331333314333153331633317333183331933320333213332233323333243332533326333273332833329333303333133332333333333433335333363333733338333393334033341333423334333344333453334633347333483334933350333513335233353333543335533356333573335833359333603336133362333633336433365333663336733368333693337033371333723337333374333753337633377333783337933380333813338233383333843338533386333873338833389333903339133392333933339433395333963339733398333993340033401334023340333404334053340633407334083340933410334113341233413334143341533416334173341833419334203342133422334233342433425334263342733428334293343033431334323343333434334353343633437334383343933440334413344233443334443344533446334473344833449334503345133452334533345433455334563345733458334593346033461334623346333464334653346633467334683346933470334713347233473334743347533476334773347833479334803348133482334833348433485334863348733488334893349033491334923349333494334953349633497334983349933500335013350233503335043350533506335073350833509335103351133512335133351433515335163351733518335193352033521335223352333524335253352633527335283352933530335313353233533335343353533536335373353833539335403354133542335433354433545335463354733548335493355033551335523355333554335553355633557335583355933560335613356233563335643356533566335673356833569335703357133572335733357433575335763357733578335793358033581335823358333584335853358633587335883358933590335913359233593335943359533596335973359833599336003360133602336033360433605336063360733608336093361033611336123361333614336153361633617336183361933620336213362233623336243362533626336273362833629336303363133632336333363433635336363363733638336393364033641336423364333644336453364633647336483364933650336513365233653336543365533656336573365833659336603366133662336633366433665336663366733668336693367033671336723367333674336753367633677336783367933680336813368233683336843368533686336873368833689336903369133692336933369433695336963369733698336993370033701337023370333704337053370633707337083370933710337113371233713337143371533716337173371833719337203372133722337233372433725337263372733728337293373033731337323373333734337353373633737337383373933740337413374233743337443374533746337473374833749337503375133752337533375433755337563375733758337593376033761337623376333764337653376633767337683376933770337713377233773337743377533776337773377833779337803378133782337833378433785337863378733788337893379033791337923379333794337953379633797337983379933800338013380233803338043380533806338073380833809338103381133812338133381433815338163381733818338193382033821338223382333824338253382633827338283382933830338313383233833338343383533836338373383833839338403384133842338433384433845338463384733848338493385033851338523385333854338553385633857338583385933860338613386233863338643386533866338673386833869338703387133872338733387433875338763387733878338793388033881338823388333884338853388633887338883388933890338913389233893338943389533896338973389833899339003390133902339033390433905339063390733908339093391033911339123391333914339153391633917339183391933920339213392233923339243392533926339273392833929339303393133932339333393433935339363393733938339393394033941339423394333944339453394633947339483394933950339513395233953339543395533956339573395833959339603396133962339633396433965339663396733968339693397033971339723397333974339753397633977339783397933980339813398233983339843398533986339873398833989339903399133992339933399433995339963399733998339993400034001340023400334004340053400634007340083400934010340113401234013340143401534016340173401834019340203402134022340233402434025340263402734028340293403034031340323403334034340353403634037340383403934040340413404234043340443404534046340473404834049340503405134052340533405434055340563405734058340593406034061340623406334064340653406634067340683406934070340713407234073340743407534076340773407834079340803408134082340833408434085340863408734088340893409034091340923409334094340953409634097340983409934100341013410234103341043410534106341073410834109341103411134112341133411434115341163411734118341193412034121341223412334124341253412634127341283412934130341313413234133341343413534136341373413834139341403414134142341433414434145341463414734148341493415034151341523415334154341553415634157341583415934160341613416234163341643416534166341673416834169341703417134172341733417434175341763417734178341793418034181341823418334184341853418634187341883418934190341913419234193341943419534196341973419834199342003420134202342033420434205342063420734208342093421034211342123421334214342153421634217342183421934220342213422234223342243422534226342273422834229342303423134232342333423434235342363423734238342393424034241342423424334244342453424634247342483424934250342513425234253342543425534256342573425834259342603426134262342633426434265342663426734268342693427034271342723427334274342753427634277342783427934280342813428234283342843428534286342873428834289342903429134292342933429434295342963429734298342993430034301343023430334304343053430634307343083430934310343113431234313343143431534316343173431834319343203432134322343233432434325343263432734328343293433034331343323433334334343353433634337343383433934340343413434234343343443434534346343473434834349343503435134352343533435434355343563435734358343593436034361343623436334364343653436634367343683436934370343713437234373343743437534376343773437834379343803438134382343833438434385343863438734388343893439034391343923439334394343953439634397343983439934400344013440234403344043440534406344073440834409344103441134412344133441434415344163441734418344193442034421344223442334424344253442634427344283442934430344313443234433344343443534436344373443834439344403444134442344433444434445344463444734448344493445034451344523445334454344553445634457344583445934460344613446234463344643446534466344673446834469344703447134472344733447434475344763447734478344793448034481344823448334484344853448634487344883448934490344913449234493344943449534496344973449834499345003450134502345033450434505345063450734508345093451034511345123451334514345153451634517345183451934520345213452234523345243452534526345273452834529345303453134532345333453434535345363453734538345393454034541345423454334544345453454634547345483454934550345513455234553345543455534556345573455834559345603456134562345633456434565345663456734568345693457034571345723457334574345753457634577345783457934580345813458234583345843458534586345873458834589345903459134592345933459434595345963459734598345993460034601346023460334604346053460634607346083460934610346113461234613346143461534616346173461834619346203462134622346233462434625346263462734628346293463034631346323463334634346353463634637346383463934640346413464234643346443464534646346473464834649346503465134652346533465434655346563465734658346593466034661346623466334664346653466634667346683466934670346713467234673346743467534676346773467834679346803468134682346833468434685346863468734688346893469034691346923469334694346953469634697346983469934700347013470234703347043470534706347073470834709347103471134712347133471434715347163471734718347193472034721347223472334724347253472634727347283472934730347313473234733347343473534736347373473834739347403474134742347433474434745347463474734748347493475034751347523475334754347553475634757347583475934760347613476234763347643476534766347673476834769347703477134772347733477434775347763477734778347793478034781347823478334784347853478634787347883478934790347913479234793347943479534796347973479834799348003480134802348033480434805348063480734808348093481034811348123481334814348153481634817348183481934820348213482234823348243482534826348273482834829348303483134832348333483434835348363483734838348393484034841348423484334844348453484634847348483484934850348513485234853348543485534856348573485834859348603486134862348633486434865348663486734868348693487034871348723487334874348753487634877348783487934880348813488234883348843488534886348873488834889348903489134892348933489434895348963489734898348993490034901349023490334904349053490634907349083490934910349113491234913349143491534916349173491834919349203492134922349233492434925349263492734928349293493034931349323493334934349353493634937349383493934940349413494234943349443494534946349473494834949349503495134952349533495434955349563495734958349593496034961349623496334964349653496634967349683496934970349713497234973349743497534976349773497834979349803498134982349833498434985349863498734988349893499034991349923499334994349953499634997349983499935000350013500235003350043500535006350073500835009350103501135012350133501435015350163501735018350193502035021350223502335024350253502635027350283502935030350313503235033350343503535036350373503835039350403504135042350433504435045350463504735048350493505035051350523505335054350553505635057350583505935060350613506235063350643506535066350673506835069350703507135072350733507435075350763507735078350793508035081350823508335084350853508635087350883508935090350913509235093350943509535096350973509835099351003510135102351033510435105351063510735108351093511035111351123511335114351153511635117351183511935120351213512235123351243512535126351273512835129351303513135132351333513435135351363513735138351393514035141351423514335144351453514635147351483514935150351513515235153351543515535156351573515835159351603516135162351633516435165351663516735168351693517035171351723517335174351753517635177351783517935180351813518235183351843518535186351873518835189351903519135192351933519435195351963519735198351993520035201352023520335204352053520635207352083520935210352113521235213352143521535216352173521835219352203522135222352233522435225352263522735228352293523035231352323523335234352353523635237352383523935240352413524235243352443524535246352473524835249352503525135252352533525435255352563525735258352593526035261352623526335264352653526635267352683526935270352713527235273352743527535276352773527835279352803528135282352833528435285352863528735288352893529035291352923529335294352953529635297352983529935300353013530235303353043530535306353073530835309353103531135312353133531435315353163531735318353193532035321353223532335324353253532635327353283532935330353313533235333353343533535336353373533835339353403534135342353433534435345353463534735348353493535035351353523535335354353553535635357353583535935360353613536235363353643536535366353673536835369353703537135372353733537435375353763537735378353793538035381353823538335384353853538635387353883538935390353913539235393353943539535396353973539835399354003540135402354033540435405354063540735408354093541035411354123541335414354153541635417354183541935420354213542235423354243542535426354273542835429354303543135432354333543435435354363543735438354393544035441354423544335444354453544635447354483544935450354513545235453354543545535456354573545835459354603546135462354633546435465354663546735468354693547035471354723547335474354753547635477354783547935480354813548235483354843548535486354873548835489354903549135492354933549435495354963549735498354993550035501355023550335504355053550635507355083550935510355113551235513355143551535516355173551835519355203552135522355233552435525355263552735528355293553035531355323553335534355353553635537355383553935540355413554235543355443554535546355473554835549355503555135552355533555435555355563555735558355593556035561355623556335564355653556635567355683556935570355713557235573355743557535576355773557835579355803558135582355833558435585355863558735588355893559035591355923559335594355953559635597355983559935600356013560235603356043560535606356073560835609356103561135612356133561435615356163561735618356193562035621356223562335624356253562635627356283562935630356313563235633356343563535636356373563835639356403564135642356433564435645356463564735648356493565035651356523565335654356553565635657356583565935660356613566235663356643566535666356673566835669356703567135672356733567435675356763567735678356793568035681356823568335684356853568635687356883568935690356913569235693356943569535696356973569835699357003570135702357033570435705357063570735708357093571035711357123571335714357153571635717357183571935720357213572235723357243572535726357273572835729357303573135732357333573435735357363573735738357393574035741357423574335744357453574635747357483574935750357513575235753357543575535756357573575835759357603576135762357633576435765357663576735768357693577035771357723577335774357753577635777357783577935780357813578235783357843578535786357873578835789357903579135792357933579435795357963579735798357993580035801358023580335804358053580635807358083580935810358113581235813358143581535816358173581835819358203582135822358233582435825358263582735828358293583035831358323583335834358353583635837358383583935840358413584235843358443584535846358473584835849358503585135852358533585435855358563585735858358593586035861358623586335864358653586635867358683586935870358713587235873358743587535876358773587835879358803588135882358833588435885358863588735888358893589035891358923589335894358953589635897358983589935900359013590235903359043590535906359073590835909359103591135912359133591435915359163591735918359193592035921359223592335924359253592635927359283592935930359313593235933359343593535936359373593835939359403594135942359433594435945359463594735948359493595035951359523595335954359553595635957359583595935960359613596235963359643596535966359673596835969359703597135972359733597435975359763597735978359793598035981359823598335984359853598635987359883598935990359913599235993359943599535996359973599835999360003600136002360033600436005360063600736008360093601036011360123601336014360153601636017360183601936020360213602236023360243602536026360273602836029360303603136032360333603436035360363603736038360393604036041360423604336044360453604636047360483604936050360513605236053360543605536056360573605836059360603606136062360633606436065360663606736068360693607036071360723607336074360753607636077360783607936080360813608236083360843608536086360873608836089360903609136092360933609436095360963609736098360993610036101361023610336104361053610636107361083610936110361113611236113361143611536116361173611836119361203612136122361233612436125361263612736128361293613036131361323613336134361353613636137361383613936140361413614236143361443614536146361473614836149361503615136152361533615436155361563615736158361593616036161361623616336164361653616636167361683616936170361713617236173361743617536176361773617836179361803618136182361833618436185361863618736188361893619036191361923619336194361953619636197361983619936200362013620236203362043620536206362073620836209362103621136212362133621436215362163621736218362193622036221362223622336224362253622636227362283622936230362313623236233362343623536236362373623836239362403624136242362433624436245362463624736248362493625036251362523625336254362553625636257362583625936260362613626236263362643626536266362673626836269362703627136272362733627436275362763627736278362793628036281362823628336284362853628636287362883628936290362913629236293362943629536296362973629836299363003630136302363033630436305363063630736308363093631036311363123631336314363153631636317363183631936320363213632236323363243632536326363273632836329363303633136332363333633436335363363633736338363393634036341363423634336344363453634636347363483634936350363513635236353363543635536356363573635836359363603636136362363633636436365363663636736368363693637036371363723637336374363753637636377363783637936380363813638236383363843638536386363873638836389363903639136392363933639436395363963639736398363993640036401364023640336404364053640636407364083640936410364113641236413364143641536416364173641836419364203642136422364233642436425364263642736428364293643036431364323643336434364353643636437364383643936440364413644236443364443644536446364473644836449364503645136452364533645436455364563645736458364593646036461364623646336464364653646636467364683646936470364713647236473364743647536476364773647836479364803648136482364833648436485364863648736488364893649036491364923649336494364953649636497364983649936500365013650236503365043650536506365073650836509365103651136512365133651436515365163651736518365193652036521365223652336524365253652636527365283652936530365313653236533365343653536536365373653836539365403654136542365433654436545365463654736548365493655036551365523655336554365553655636557365583655936560365613656236563365643656536566365673656836569365703657136572365733657436575365763657736578365793658036581365823658336584365853658636587365883658936590365913659236593365943659536596365973659836599366003660136602366033660436605366063660736608366093661036611366123661336614366153661636617366183661936620366213662236623366243662536626366273662836629366303663136632366333663436635366363663736638366393664036641366423664336644366453664636647366483664936650366513665236653366543665536656366573665836659366603666136662366633666436665366663666736668366693667036671366723667336674366753667636677366783667936680366813668236683366843668536686366873668836689366903669136692366933669436695366963669736698366993670036701367023670336704367053670636707367083670936710367113671236713367143671536716367173671836719367203672136722367233672436725367263672736728367293673036731367323673336734367353673636737367383673936740367413674236743367443674536746367473674836749367503675136752367533675436755367563675736758367593676036761367623676336764367653676636767367683676936770367713677236773367743677536776367773677836779367803678136782367833678436785367863678736788367893679036791367923679336794367953679636797367983679936800368013680236803368043680536806368073680836809368103681136812368133681436815368163681736818368193682036821368223682336824368253682636827368283682936830368313683236833368343683536836368373683836839368403684136842368433684436845368463684736848368493685036851368523685336854368553685636857368583685936860368613686236863368643686536866368673686836869368703687136872368733687436875368763687736878368793688036881368823688336884368853688636887368883688936890368913689236893368943689536896368973689836899369003690136902369033690436905369063690736908369093691036911369123691336914369153691636917369183691936920369213692236923369243692536926369273692836929369303693136932369333693436935369363693736938369393694036941369423694336944369453694636947369483694936950369513695236953369543695536956369573695836959369603696136962369633696436965369663696736968369693697036971369723697336974369753697636977369783697936980369813698236983369843698536986369873698836989369903699136992369933699436995369963699736998369993700037001370023700337004370053700637007370083700937010370113701237013370143701537016370173701837019370203702137022370233702437025370263702737028370293703037031370323703337034370353703637037370383703937040370413704237043370443704537046370473704837049370503705137052370533705437055370563705737058370593706037061370623706337064370653706637067370683706937070370713707237073370743707537076370773707837079370803708137082370833708437085370863708737088370893709037091370923709337094370953709637097370983709937100371013710237103371043710537106371073710837109371103711137112371133711437115371163711737118371193712037121371223712337124371253712637127371283712937130371313713237133371343713537136371373713837139371403714137142371433714437145371463714737148371493715037151371523715337154371553715637157371583715937160371613716237163371643716537166371673716837169371703717137172371733717437175371763717737178371793718037181371823718337184371853718637187371883718937190371913719237193371943719537196371973719837199372003720137202372033720437205372063720737208372093721037211372123721337214372153721637217372183721937220372213722237223372243722537226372273722837229372303723137232372333723437235372363723737238372393724037241372423724337244372453724637247372483724937250372513725237253372543725537256372573725837259372603726137262372633726437265372663726737268372693727037271372723727337274372753727637277372783727937280372813728237283372843728537286372873728837289372903729137292372933729437295372963729737298372993730037301373023730337304373053730637307373083730937310373113731237313373143731537316373173731837319373203732137322373233732437325373263732737328373293733037331373323733337334373353733637337373383733937340373413734237343373443734537346373473734837349373503735137352373533735437355373563735737358373593736037361373623736337364373653736637367373683736937370373713737237373373743737537376373773737837379373803738137382373833738437385373863738737388373893739037391373923739337394373953739637397373983739937400374013740237403374043740537406374073740837409374103741137412374133741437415374163741737418374193742037421374223742337424374253742637427374283742937430374313743237433374343743537436374373743837439374403744137442374433744437445374463744737448374493745037451374523745337454374553745637457374583745937460374613746237463374643746537466374673746837469374703747137472374733747437475374763747737478374793748037481374823748337484374853748637487374883748937490374913749237493374943749537496374973749837499375003750137502375033750437505375063750737508375093751037511375123751337514375153751637517375183751937520375213752237523375243752537526375273752837529375303753137532375333753437535375363753737538375393754037541375423754337544375453754637547375483754937550375513755237553375543755537556375573755837559375603756137562375633756437565375663756737568375693757037571375723757337574375753757637577375783757937580375813758237583375843758537586375873758837589375903759137592375933759437595375963759737598375993760037601376023760337604376053760637607376083760937610376113761237613376143761537616376173761837619376203762137622376233762437625376263762737628376293763037631376323763337634376353763637637376383763937640376413764237643376443764537646376473764837649376503765137652376533765437655376563765737658376593766037661376623766337664376653766637667376683766937670376713767237673376743767537676376773767837679376803768137682376833768437685376863768737688376893769037691376923769337694376953769637697376983769937700377013770237703377043770537706377073770837709377103771137712377133771437715377163771737718377193772037721377223772337724377253772637727377283772937730377313773237733377343773537736377373773837739377403774137742377433774437745377463774737748377493775037751377523775337754377553775637757377583775937760377613776237763377643776537766377673776837769377703777137772377733777437775377763777737778377793778037781377823778337784377853778637787377883778937790377913779237793377943779537796377973779837799378003780137802378033780437805378063780737808378093781037811378123781337814378153781637817378183781937820378213782237823378243782537826378273782837829378303783137832378333783437835378363783737838378393784037841378423784337844378453784637847378483784937850378513785237853378543785537856378573785837859378603786137862378633786437865378663786737868378693787037871378723787337874378753787637877378783787937880378813788237883378843788537886378873788837889378903789137892378933789437895378963789737898378993790037901379023790337904379053790637907379083790937910379113791237913379143791537916379173791837919379203792137922379233792437925379263792737928379293793037931379323793337934379353793637937379383793937940379413794237943379443794537946379473794837949379503795137952379533795437955379563795737958379593796037961379623796337964379653796637967379683796937970379713797237973379743797537976379773797837979379803798137982379833798437985379863798737988379893799037991379923799337994379953799637997379983799938000380013800238003380043800538006380073800838009380103801138012380133801438015380163801738018380193802038021380223802338024380253802638027380283802938030380313803238033380343803538036380373803838039380403804138042380433804438045380463804738048380493805038051380523805338054380553805638057380583805938060380613806238063380643806538066380673806838069380703807138072380733807438075380763807738078380793808038081380823808338084380853808638087380883808938090380913809238093380943809538096380973809838099381003810138102381033810438105381063810738108381093811038111381123811338114381153811638117381183811938120381213812238123381243812538126381273812838129381303813138132381333813438135381363813738138381393814038141381423814338144381453814638147381483814938150381513815238153381543815538156381573815838159381603816138162381633816438165381663816738168381693817038171381723817338174381753817638177381783817938180381813818238183381843818538186381873818838189381903819138192381933819438195381963819738198381993820038201382023820338204382053820638207382083820938210382113821238213382143821538216382173821838219382203822138222382233822438225382263822738228382293823038231382323823338234382353823638237382383823938240382413824238243382443824538246382473824838249382503825138252382533825438255382563825738258382593826038261382623826338264382653826638267382683826938270382713827238273382743827538276382773827838279382803828138282382833828438285382863828738288382893829038291382923829338294382953829638297382983829938300383013830238303383043830538306383073830838309383103831138312383133831438315383163831738318383193832038321383223832338324383253832638327383283832938330383313833238333383343833538336383373833838339383403834138342383433834438345383463834738348383493835038351383523835338354383553835638357383583835938360383613836238363383643836538366383673836838369383703837138372383733837438375383763837738378383793838038381383823838338384383853838638387383883838938390383913839238393383943839538396383973839838399384003840138402384033840438405384063840738408384093841038411384123841338414384153841638417384183841938420384213842238423384243842538426384273842838429384303843138432384333843438435384363843738438384393844038441384423844338444384453844638447384483844938450384513845238453384543845538456384573845838459384603846138462384633846438465384663846738468384693847038471384723847338474384753847638477384783847938480384813848238483384843848538486384873848838489384903849138492384933849438495384963849738498384993850038501385023850338504385053850638507385083850938510385113851238513385143851538516385173851838519385203852138522385233852438525385263852738528385293853038531385323853338534385353853638537385383853938540385413854238543385443854538546385473854838549385503855138552385533855438555385563855738558385593856038561385623856338564385653856638567385683856938570385713857238573385743857538576385773857838579385803858138582385833858438585385863858738588385893859038591385923859338594385953859638597385983859938600386013860238603386043860538606386073860838609386103861138612386133861438615386163861738618386193862038621386223862338624386253862638627386283862938630386313863238633386343863538636386373863838639386403864138642386433864438645386463864738648386493865038651386523865338654386553865638657386583865938660386613866238663386643866538666386673866838669386703867138672386733867438675386763867738678386793868038681386823868338684386853868638687386883868938690386913869238693386943869538696386973869838699387003870138702387033870438705387063870738708387093871038711387123871338714387153871638717387183871938720387213872238723387243872538726387273872838729387303873138732387333873438735387363873738738387393874038741387423874338744387453874638747387483874938750387513875238753387543875538756387573875838759387603876138762387633876438765387663876738768387693877038771387723877338774387753877638777387783877938780387813878238783387843878538786387873878838789387903879138792387933879438795387963879738798387993880038801388023880338804388053880638807388083880938810388113881238813388143881538816388173881838819388203882138822388233882438825388263882738828388293883038831388323883338834388353883638837388383883938840388413884238843388443884538846388473884838849388503885138852388533885438855388563885738858388593886038861388623886338864388653886638867388683886938870388713887238873388743887538876388773887838879388803888138882388833888438885388863888738888388893889038891388923889338894388953889638897388983889938900389013890238903389043890538906389073890838909389103891138912389133891438915389163891738918389193892038921389223892338924389253892638927389283892938930389313893238933389343893538936389373893838939389403894138942389433894438945389463894738948389493895038951389523895338954389553895638957389583895938960389613896238963389643896538966389673896838969389703897138972389733897438975389763897738978389793898038981389823898338984389853898638987389883898938990389913899238993389943899538996389973899838999390003900139002390033900439005390063900739008390093901039011390123901339014390153901639017390183901939020390213902239023390243902539026390273902839029390303903139032390333903439035390363903739038390393904039041390423904339044390453904639047390483904939050390513905239053390543905539056390573905839059390603906139062390633906439065390663906739068390693907039071390723907339074390753907639077390783907939080390813908239083390843908539086390873908839089390903909139092390933909439095390963909739098390993910039101391023910339104391053910639107391083910939110391113911239113391143911539116391173911839119391203912139122391233912439125391263912739128391293913039131391323913339134391353913639137391383913939140391413914239143391443914539146391473914839149391503915139152391533915439155391563915739158391593916039161391623916339164391653916639167391683916939170391713917239173391743917539176391773917839179391803918139182391833918439185391863918739188391893919039191391923919339194391953919639197391983919939200392013920239203392043920539206392073920839209392103921139212392133921439215392163921739218392193922039221392223922339224392253922639227392283922939230392313923239233392343923539236392373923839239392403924139242392433924439245392463924739248392493925039251392523925339254392553925639257392583925939260392613926239263392643926539266392673926839269392703927139272392733927439275392763927739278392793928039281392823928339284392853928639287392883928939290392913929239293392943929539296392973929839299393003930139302393033930439305393063930739308393093931039311393123931339314393153931639317393183931939320393213932239323393243932539326393273932839329393303933139332393333933439335393363933739338393393934039341393423934339344393453934639347393483934939350393513935239353393543935539356393573935839359393603936139362393633936439365393663936739368393693937039371393723937339374393753937639377393783937939380393813938239383393843938539386393873938839389393903939139392393933939439395393963939739398393993940039401394023940339404394053940639407394083940939410394113941239413394143941539416394173941839419394203942139422394233942439425394263942739428394293943039431394323943339434394353943639437394383943939440394413944239443394443944539446394473944839449394503945139452394533945439455394563945739458394593946039461394623946339464394653946639467394683946939470394713947239473394743947539476394773947839479394803948139482394833948439485394863948739488394893949039491394923949339494394953949639497394983949939500395013950239503395043950539506395073950839509395103951139512395133951439515395163951739518395193952039521395223952339524395253952639527395283952939530395313953239533395343953539536395373953839539395403954139542395433954439545395463954739548395493955039551395523955339554395553955639557395583955939560395613956239563395643956539566395673956839569395703957139572395733957439575395763957739578395793958039581395823958339584395853958639587395883958939590395913959239593395943959539596395973959839599396003960139602396033960439605396063960739608396093961039611396123961339614396153961639617396183961939620396213962239623396243962539626396273962839629396303963139632396333963439635396363963739638396393964039641396423964339644396453964639647396483964939650396513965239653396543965539656396573965839659396603966139662396633966439665396663966739668396693967039671396723967339674396753967639677396783967939680396813968239683396843968539686396873968839689396903969139692396933969439695396963969739698396993970039701397023970339704397053970639707397083970939710397113971239713397143971539716397173971839719397203972139722397233972439725397263972739728397293973039731397323973339734397353973639737397383973939740397413974239743397443974539746397473974839749397503975139752397533975439755397563975739758397593976039761397623976339764397653976639767397683976939770397713977239773397743977539776397773977839779397803978139782397833978439785397863978739788397893979039791397923979339794397953979639797397983979939800398013980239803398043980539806398073980839809398103981139812398133981439815398163981739818398193982039821398223982339824398253982639827398283982939830398313983239833398343983539836398373983839839398403984139842398433984439845398463984739848398493985039851398523985339854398553985639857398583985939860398613986239863398643986539866398673986839869398703987139872398733987439875398763987739878398793988039881398823988339884398853988639887398883988939890398913989239893398943989539896398973989839899399003990139902399033990439905399063990739908399093991039911399123991339914399153991639917399183991939920399213992239923399243992539926399273992839929399303993139932399333993439935399363993739938399393994039941399423994339944399453994639947399483994939950399513995239953399543995539956399573995839959399603996139962399633996439965399663996739968399693997039971399723997339974399753997639977399783997939980399813998239983399843998539986399873998839989399903999139992399933999439995399963999739998399994000040001400024000340004400054000640007400084000940010400114001240013400144001540016400174001840019400204002140022400234002440025400264002740028400294003040031400324003340034400354003640037400384003940040400414004240043400444004540046400474004840049400504005140052400534005440055400564005740058400594006040061400624006340064400654006640067400684006940070400714007240073400744007540076400774007840079400804008140082400834008440085400864008740088400894009040091400924009340094400954009640097400984009940100401014010240103401044010540106401074010840109401104011140112401134011440115401164011740118401194012040121401224012340124401254012640127401284012940130401314013240133401344013540136401374013840139401404014140142401434014440145401464014740148401494015040151401524015340154401554015640157401584015940160401614016240163401644016540166401674016840169401704017140172401734017440175401764017740178401794018040181401824018340184401854018640187401884018940190401914019240193401944019540196401974019840199402004020140202402034020440205402064020740208402094021040211402124021340214402154021640217402184021940220402214022240223402244022540226402274022840229402304023140232402334023440235402364023740238402394024040241402424024340244402454024640247402484024940250402514025240253402544025540256402574025840259402604026140262402634026440265402664026740268402694027040271402724027340274402754027640277402784027940280402814028240283402844028540286402874028840289402904029140292402934029440295402964029740298402994030040301403024030340304403054030640307403084030940310403114031240313403144031540316403174031840319403204032140322403234032440325403264032740328403294033040331403324033340334403354033640337403384033940340403414034240343403444034540346403474034840349403504035140352403534035440355403564035740358403594036040361403624036340364403654036640367403684036940370403714037240373403744037540376403774037840379403804038140382403834038440385403864038740388403894039040391403924039340394403954039640397403984039940400404014040240403404044040540406404074040840409404104041140412404134041440415404164041740418404194042040421404224042340424404254042640427404284042940430404314043240433404344043540436404374043840439404404044140442404434044440445404464044740448404494045040451404524045340454404554045640457404584045940460404614046240463404644046540466404674046840469404704047140472404734047440475404764047740478404794048040481404824048340484404854048640487404884048940490404914049240493404944049540496404974049840499405004050140502405034050440505405064050740508405094051040511405124051340514405154051640517405184051940520405214052240523405244052540526405274052840529405304053140532405334053440535405364053740538405394054040541405424054340544405454054640547405484054940550405514055240553405544055540556405574055840559405604056140562405634056440565405664056740568405694057040571405724057340574405754057640577405784057940580405814058240583405844058540586405874058840589405904059140592405934059440595405964059740598405994060040601406024060340604406054060640607406084060940610406114061240613406144061540616406174061840619406204062140622406234062440625406264062740628406294063040631406324063340634406354063640637406384063940640406414064240643406444064540646406474064840649406504065140652406534065440655406564065740658406594066040661406624066340664406654066640667406684066940670406714067240673406744067540676406774067840679406804068140682406834068440685406864068740688406894069040691406924069340694406954069640697406984069940700407014070240703407044070540706407074070840709407104071140712407134071440715407164071740718407194072040721407224072340724407254072640727407284072940730407314073240733407344073540736407374073840739407404074140742407434074440745407464074740748407494075040751407524075340754407554075640757407584075940760407614076240763407644076540766407674076840769407704077140772407734077440775407764077740778407794078040781407824078340784407854078640787407884078940790407914079240793407944079540796407974079840799408004080140802408034080440805408064080740808408094081040811408124081340814408154081640817408184081940820408214082240823408244082540826408274082840829408304083140832408334083440835408364083740838408394084040841408424084340844408454084640847408484084940850408514085240853408544085540856408574085840859408604086140862408634086440865408664086740868408694087040871408724087340874408754087640877408784087940880408814088240883408844088540886408874088840889408904089140892408934089440895408964089740898408994090040901409024090340904409054090640907409084090940910409114091240913409144091540916409174091840919409204092140922409234092440925409264092740928409294093040931409324093340934409354093640937409384093940940409414094240943409444094540946409474094840949409504095140952409534095440955409564095740958409594096040961409624096340964409654096640967409684096940970409714097240973409744097540976409774097840979409804098140982409834098440985409864098740988409894099040991409924099340994409954099640997409984099941000410014100241003410044100541006410074100841009410104101141012410134101441015410164101741018410194102041021410224102341024410254102641027410284102941030410314103241033410344103541036410374103841039410404104141042410434104441045410464104741048410494105041051410524105341054410554105641057410584105941060410614106241063410644106541066410674106841069410704107141072410734107441075410764107741078410794108041081410824108341084410854108641087410884108941090410914109241093410944109541096410974109841099411004110141102411034110441105411064110741108411094111041111411124111341114411154111641117411184111941120411214112241123411244112541126411274112841129411304113141132411334113441135411364113741138411394114041141411424114341144411454114641147411484114941150411514115241153411544115541156411574115841159411604116141162411634116441165411664116741168411694117041171411724117341174411754117641177411784117941180411814118241183411844118541186411874118841189411904119141192411934119441195411964119741198411994120041201412024120341204412054120641207412084120941210412114121241213412144121541216412174121841219412204122141222412234122441225412264122741228412294123041231412324123341234412354123641237412384123941240412414124241243412444124541246412474124841249412504125141252412534125441255412564125741258412594126041261412624126341264412654126641267412684126941270412714127241273412744127541276412774127841279412804128141282412834128441285412864128741288412894129041291412924129341294412954129641297412984129941300413014130241303413044130541306413074130841309413104131141312413134131441315413164131741318413194132041321413224132341324413254132641327413284132941330413314133241333413344133541336413374133841339413404134141342413434134441345413464134741348413494135041351413524135341354413554135641357413584135941360413614136241363413644136541366413674136841369413704137141372413734137441375413764137741378413794138041381413824138341384413854138641387413884138941390413914139241393413944139541396413974139841399414004140141402414034140441405414064140741408414094141041411414124141341414414154141641417414184141941420414214142241423414244142541426414274142841429414304143141432414334143441435414364143741438414394144041441414424144341444414454144641447414484144941450414514145241453414544145541456414574145841459414604146141462414634146441465414664146741468414694147041471414724147341474414754147641477414784147941480414814148241483414844148541486414874148841489414904149141492414934149441495414964149741498414994150041501415024150341504415054150641507415084150941510415114151241513415144151541516415174151841519415204152141522415234152441525415264152741528415294153041531415324153341534415354153641537415384153941540415414154241543415444154541546415474154841549415504155141552415534155441555415564155741558415594156041561415624156341564415654156641567415684156941570415714157241573415744157541576415774157841579415804158141582415834158441585415864158741588415894159041591415924159341594415954159641597415984159941600416014160241603416044160541606416074160841609416104161141612416134161441615416164161741618416194162041621416224162341624416254162641627416284162941630416314163241633416344163541636416374163841639416404164141642416434164441645416464164741648416494165041651416524165341654416554165641657416584165941660416614166241663416644166541666416674166841669416704167141672416734167441675416764167741678416794168041681416824168341684416854168641687416884168941690416914169241693416944169541696416974169841699417004170141702417034170441705417064170741708417094171041711417124171341714417154171641717417184171941720417214172241723417244172541726417274172841729417304173141732417334173441735417364173741738417394174041741417424174341744417454174641747417484174941750417514175241753417544175541756417574175841759417604176141762417634176441765417664176741768417694177041771417724177341774417754177641777417784177941780417814178241783417844178541786417874178841789417904179141792417934179441795417964179741798417994180041801418024180341804418054180641807418084180941810418114181241813418144181541816418174181841819418204182141822418234182441825418264182741828418294183041831418324183341834418354183641837418384183941840418414184241843418444184541846418474184841849418504185141852418534185441855418564185741858418594186041861418624186341864418654186641867418684186941870418714187241873418744187541876418774187841879418804188141882418834188441885418864188741888418894189041891418924189341894418954189641897418984189941900419014190241903419044190541906419074190841909419104191141912419134191441915419164191741918419194192041921419224192341924419254192641927419284192941930419314193241933419344193541936419374193841939419404194141942419434194441945419464194741948419494195041951419524195341954419554195641957419584195941960419614196241963419644196541966419674196841969419704197141972419734197441975419764197741978419794198041981419824198341984419854198641987419884198941990419914199241993419944199541996419974199841999420004200142002420034200442005420064200742008420094201042011420124201342014420154201642017420184201942020420214202242023420244202542026420274202842029420304203142032420334203442035420364203742038420394204042041420424204342044420454204642047420484204942050420514205242053420544205542056420574205842059420604206142062420634206442065420664206742068420694207042071420724207342074420754207642077420784207942080420814208242083420844208542086420874208842089420904209142092420934209442095420964209742098420994210042101421024210342104421054210642107421084210942110421114211242113421144211542116421174211842119421204212142122421234212442125421264212742128421294213042131421324213342134421354213642137421384213942140421414214242143421444214542146421474214842149421504215142152421534215442155421564215742158421594216042161421624216342164421654216642167421684216942170421714217242173421744217542176421774217842179421804218142182421834218442185421864218742188421894219042191421924219342194421954219642197421984219942200422014220242203422044220542206422074220842209422104221142212422134221442215422164221742218422194222042221422224222342224422254222642227422284222942230422314223242233422344223542236422374223842239422404224142242422434224442245422464224742248422494225042251422524225342254422554225642257422584225942260422614226242263422644226542266422674226842269422704227142272422734227442275422764227742278422794228042281422824228342284422854228642287422884228942290422914229242293422944229542296422974229842299423004230142302423034230442305423064230742308423094231042311423124231342314423154231642317423184231942320423214232242323423244232542326423274232842329423304233142332423334233442335423364233742338423394234042341423424234342344423454234642347423484234942350423514235242353423544235542356423574235842359423604236142362423634236442365423664236742368423694237042371423724237342374423754237642377423784237942380423814238242383423844238542386423874238842389423904239142392423934239442395423964239742398423994240042401424024240342404424054240642407424084240942410424114241242413424144241542416424174241842419424204242142422424234242442425424264242742428424294243042431424324243342434424354243642437424384243942440424414244242443424444244542446424474244842449424504245142452424534245442455424564245742458424594246042461424624246342464424654246642467424684246942470424714247242473424744247542476424774247842479424804248142482424834248442485424864248742488424894249042491424924249342494424954249642497424984249942500425014250242503425044250542506425074250842509425104251142512425134251442515425164251742518425194252042521425224252342524425254252642527425284252942530425314253242533425344253542536425374253842539425404254142542425434254442545425464254742548425494255042551425524255342554425554255642557425584255942560425614256242563425644256542566425674256842569425704257142572425734257442575425764257742578425794258042581425824258342584425854258642587425884258942590425914259242593425944259542596425974259842599426004260142602426034260442605426064260742608426094261042611426124261342614426154261642617426184261942620426214262242623426244262542626426274262842629426304263142632426334263442635426364263742638426394264042641426424264342644426454264642647426484264942650426514265242653426544265542656426574265842659426604266142662426634266442665426664266742668426694267042671426724267342674426754267642677426784267942680426814268242683426844268542686426874268842689426904269142692426934269442695426964269742698426994270042701427024270342704427054270642707427084270942710427114271242713427144271542716427174271842719427204272142722427234272442725427264272742728427294273042731427324273342734427354273642737427384273942740427414274242743427444274542746427474274842749427504275142752427534275442755427564275742758427594276042761427624276342764427654276642767427684276942770427714277242773427744277542776427774277842779427804278142782427834278442785427864278742788427894279042791427924279342794427954279642797427984279942800428014280242803428044280542806428074280842809428104281142812428134281442815428164281742818428194282042821428224282342824428254282642827428284282942830428314283242833428344283542836428374283842839428404284142842428434284442845428464284742848428494285042851428524285342854428554285642857428584285942860428614286242863428644286542866428674286842869428704287142872428734287442875428764287742878428794288042881428824288342884428854288642887428884288942890428914289242893428944289542896428974289842899429004290142902429034290442905429064290742908429094291042911429124291342914429154291642917429184291942920429214292242923429244292542926429274292842929429304293142932429334293442935429364293742938429394294042941429424294342944429454294642947429484294942950429514295242953429544295542956429574295842959429604296142962429634296442965429664296742968429694297042971429724297342974429754297642977429784297942980429814298242983429844298542986429874298842989429904299142992429934299442995429964299742998429994300043001430024300343004430054300643007430084300943010430114301243013430144301543016430174301843019430204302143022430234302443025430264302743028430294303043031430324303343034430354303643037430384303943040430414304243043430444304543046430474304843049430504305143052430534305443055430564305743058430594306043061430624306343064430654306643067430684306943070430714307243073430744307543076430774307843079430804308143082430834308443085430864308743088430894309043091430924309343094430954309643097430984309943100431014310243103431044310543106431074310843109431104311143112431134311443115431164311743118431194312043121431224312343124431254312643127431284312943130431314313243133431344313543136431374313843139431404314143142431434314443145431464314743148431494315043151431524315343154431554315643157431584315943160431614316243163431644316543166431674316843169431704317143172431734317443175431764317743178431794318043181431824318343184431854318643187431884318943190431914319243193431944319543196431974319843199432004320143202432034320443205432064320743208432094321043211432124321343214432154321643217432184321943220432214322243223432244322543226432274322843229432304323143232432334323443235432364323743238432394324043241432424324343244432454324643247432484324943250432514325243253432544325543256432574325843259432604326143262432634326443265432664326743268432694327043271432724327343274432754327643277432784327943280432814328243283432844328543286432874328843289432904329143292432934329443295432964329743298432994330043301433024330343304433054330643307433084330943310433114331243313433144331543316433174331843319433204332143322433234332443325433264332743328433294333043331433324333343334433354333643337433384333943340433414334243343433444334543346433474334843349433504335143352433534335443355433564335743358433594336043361433624336343364433654336643367433684336943370433714337243373433744337543376433774337843379433804338143382433834338443385433864338743388433894339043391433924339343394433954339643397433984339943400434014340243403434044340543406434074340843409434104341143412434134341443415434164341743418434194342043421434224342343424434254342643427434284342943430434314343243433434344343543436434374343843439434404344143442434434344443445434464344743448434494345043451434524345343454434554345643457434584345943460434614346243463434644346543466434674346843469434704347143472434734347443475434764347743478434794348043481434824348343484434854348643487434884348943490434914349243493434944349543496434974349843499435004350143502435034350443505435064350743508435094351043511435124351343514435154351643517435184351943520435214352243523435244352543526435274352843529435304353143532435334353443535435364353743538435394354043541435424354343544435454354643547435484354943550435514355243553435544355543556435574355843559435604356143562435634356443565435664356743568435694357043571435724357343574435754357643577435784357943580435814358243583435844358543586435874358843589435904359143592435934359443595435964359743598435994360043601436024360343604436054360643607436084360943610436114361243613436144361543616436174361843619436204362143622436234362443625436264362743628436294363043631436324363343634436354363643637436384363943640436414364243643436444364543646436474364843649436504365143652436534365443655436564365743658436594366043661436624366343664436654366643667436684366943670436714367243673436744367543676436774367843679436804368143682436834368443685436864368743688436894369043691436924369343694436954369643697436984369943700437014370243703437044370543706437074370843709437104371143712437134371443715437164371743718437194372043721437224372343724437254372643727437284372943730437314373243733437344373543736437374373843739437404374143742437434374443745437464374743748437494375043751437524375343754437554375643757437584375943760437614376243763437644376543766437674376843769437704377143772437734377443775437764377743778437794378043781437824378343784437854378643787437884378943790437914379243793437944379543796437974379843799438004380143802438034380443805438064380743808438094381043811438124381343814438154381643817438184381943820438214382243823438244382543826438274382843829438304383143832438334383443835438364383743838438394384043841438424384343844438454384643847438484384943850438514385243853438544385543856438574385843859438604386143862438634386443865438664386743868438694387043871438724387343874438754387643877438784387943880438814388243883438844388543886438874388843889438904389143892438934389443895438964389743898438994390043901439024390343904439054390643907439084390943910439114391243913439144391543916439174391843919439204392143922439234392443925439264392743928439294393043931439324393343934439354393643937439384393943940439414394243943439444394543946439474394843949439504395143952439534395443955439564395743958439594396043961439624396343964439654396643967439684396943970439714397243973439744397543976439774397843979439804398143982439834398443985439864398743988439894399043991439924399343994439954399643997439984399944000440014400244003440044400544006440074400844009440104401144012440134401444015440164401744018440194402044021440224402344024440254402644027440284402944030440314403244033440344403544036440374403844039440404404144042440434404444045440464404744048440494405044051440524405344054440554405644057440584405944060440614406244063440644406544066440674406844069440704407144072440734407444075440764407744078440794408044081440824408344084440854408644087440884408944090440914409244093440944409544096440974409844099441004410144102441034410444105441064410744108441094411044111441124411344114441154411644117441184411944120441214412244123441244412544126441274412844129441304413144132441334413444135441364413744138441394414044141441424414344144441454414644147441484414944150441514415244153441544415544156441574415844159441604416144162441634416444165441664416744168441694417044171441724417344174441754417644177441784417944180441814418244183441844418544186441874418844189441904419144192441934419444195441964419744198441994420044201442024420344204442054420644207442084420944210442114421244213442144421544216442174421844219442204422144222442234422444225442264422744228442294423044231442324423344234442354423644237442384423944240442414424244243442444424544246442474424844249442504425144252442534425444255442564425744258442594426044261442624426344264442654426644267442684426944270442714427244273442744427544276442774427844279442804428144282442834428444285442864428744288442894429044291442924429344294442954429644297442984429944300443014430244303443044430544306443074430844309443104431144312443134431444315443164431744318443194432044321443224432344324443254432644327443284432944330443314433244333443344433544336443374433844339443404434144342443434434444345443464434744348443494435044351443524435344354443554435644357443584435944360443614436244363443644436544366443674436844369443704437144372443734437444375443764437744378443794438044381443824438344384443854438644387443884438944390443914439244393443944439544396443974439844399444004440144402444034440444405444064440744408444094441044411444124441344414444154441644417444184441944420444214442244423444244442544426444274442844429444304443144432444334443444435444364443744438444394444044441444424444344444444454444644447444484444944450444514445244453444544445544456444574445844459444604446144462444634446444465444664446744468444694447044471444724447344474444754447644477444784447944480444814448244483444844448544486444874448844489444904449144492444934449444495444964449744498444994450044501445024450344504445054450644507445084450944510445114451244513445144451544516445174451844519445204452144522445234452444525445264452744528445294453044531445324453344534445354453644537445384453944540445414454244543445444454544546445474454844549445504455144552445534455444555445564455744558445594456044561445624456344564445654456644567445684456944570445714457244573445744457544576445774457844579445804458144582445834458444585445864458744588445894459044591445924459344594445954459644597445984459944600446014460244603446044460544606446074460844609446104461144612446134461444615446164461744618446194462044621446224462344624446254462644627446284462944630446314463244633446344463544636446374463844639446404464144642446434464444645446464464744648446494465044651446524465344654446554465644657446584465944660446614466244663446644466544666446674466844669446704467144672446734467444675446764467744678446794468044681446824468344684446854468644687446884468944690446914469244693446944469544696446974469844699447004470144702447034470444705447064470744708447094471044711447124471344714447154471644717447184471944720447214472244723447244472544726447274472844729447304473144732447334473444735447364473744738447394474044741447424474344744447454474644747447484474944750447514475244753447544475544756447574475844759447604476144762447634476444765447664476744768447694477044771447724477344774447754477644777447784477944780447814478244783447844478544786447874478844789447904479144792447934479444795447964479744798447994480044801448024480344804448054480644807448084480944810448114481244813448144481544816448174481844819448204482144822448234482444825448264482744828448294483044831448324483344834448354483644837448384483944840448414484244843448444484544846448474484844849448504485144852448534485444855448564485744858448594486044861448624486344864448654486644867448684486944870448714487244873448744487544876448774487844879448804488144882448834488444885448864488744888448894489044891448924489344894448954489644897448984489944900449014490244903449044490544906449074490844909449104491144912449134491444915449164491744918449194492044921449224492344924449254492644927449284492944930449314493244933449344493544936449374493844939449404494144942449434494444945449464494744948449494495044951449524495344954449554495644957449584495944960449614496244963449644496544966449674496844969449704497144972449734497444975449764497744978449794498044981449824498344984449854498644987449884498944990449914499244993449944499544996449974499844999450004500145002450034500445005450064500745008450094501045011450124501345014450154501645017450184501945020450214502245023450244502545026450274502845029450304503145032450334503445035450364503745038450394504045041450424504345044450454504645047450484504945050450514505245053450544505545056450574505845059450604506145062450634506445065450664506745068450694507045071450724507345074450754507645077450784507945080450814508245083450844508545086450874508845089450904509145092450934509445095450964509745098450994510045101451024510345104451054510645107451084510945110451114511245113451144511545116451174511845119451204512145122451234512445125451264512745128451294513045131451324513345134451354513645137451384513945140451414514245143451444514545146451474514845149451504515145152451534515445155451564515745158451594516045161451624516345164451654516645167451684516945170451714517245173451744517545176451774517845179451804518145182451834518445185451864518745188451894519045191451924519345194451954519645197451984519945200452014520245203452044520545206452074520845209452104521145212452134521445215452164521745218452194522045221452224522345224452254522645227452284522945230452314523245233452344523545236452374523845239452404524145242452434524445245452464524745248452494525045251452524525345254452554525645257452584525945260452614526245263452644526545266452674526845269452704527145272452734527445275452764527745278452794528045281452824528345284452854528645287452884528945290452914529245293452944529545296452974529845299453004530145302453034530445305453064530745308453094531045311453124531345314453154531645317453184531945320453214532245323453244532545326453274532845329453304533145332453334533445335453364533745338453394534045341453424534345344453454534645347453484534945350453514535245353453544535545356453574535845359453604536145362453634536445365453664536745368453694537045371453724537345374453754537645377453784537945380453814538245383453844538545386453874538845389453904539145392453934539445395453964539745398453994540045401454024540345404454054540645407454084540945410454114541245413454144541545416454174541845419454204542145422454234542445425454264542745428454294543045431454324543345434454354543645437454384543945440454414544245443454444544545446454474544845449454504545145452454534545445455454564545745458454594546045461454624546345464454654546645467454684546945470454714547245473454744547545476454774547845479454804548145482454834548445485454864548745488454894549045491454924549345494454954549645497454984549945500455014550245503455044550545506455074550845509455104551145512455134551445515455164551745518455194552045521455224552345524455254552645527455284552945530455314553245533455344553545536455374553845539455404554145542455434554445545455464554745548455494555045551455524555345554455554555645557455584555945560455614556245563455644556545566455674556845569455704557145572455734557445575455764557745578455794558045581455824558345584455854558645587455884558945590455914559245593455944559545596455974559845599456004560145602456034560445605456064560745608456094561045611456124561345614456154561645617456184561945620456214562245623456244562545626456274562845629456304563145632456334563445635456364563745638456394564045641456424564345644456454564645647456484564945650456514565245653456544565545656456574565845659456604566145662456634566445665456664566745668456694567045671456724567345674456754567645677456784567945680456814568245683456844568545686456874568845689456904569145692456934569445695456964569745698456994570045701457024570345704457054570645707457084570945710457114571245713457144571545716457174571845719457204572145722457234572445725457264572745728457294573045731457324573345734457354573645737457384573945740457414574245743457444574545746457474574845749457504575145752457534575445755457564575745758457594576045761457624576345764457654576645767457684576945770457714577245773457744577545776457774577845779457804578145782457834578445785457864578745788457894579045791457924579345794457954579645797457984579945800458014580245803458044580545806458074580845809458104581145812458134581445815458164581745818458194582045821458224582345824458254582645827458284582945830458314583245833458344583545836458374583845839458404584145842458434584445845458464584745848458494585045851458524585345854458554585645857458584585945860458614586245863458644586545866458674586845869458704587145872458734587445875458764587745878458794588045881458824588345884458854588645887458884588945890458914589245893458944589545896458974589845899459004590145902459034590445905459064590745908459094591045911459124591345914459154591645917459184591945920459214592245923459244592545926459274592845929459304593145932459334593445935459364593745938459394594045941459424594345944459454594645947459484594945950459514595245953459544595545956459574595845959459604596145962459634596445965459664596745968459694597045971459724597345974459754597645977459784597945980459814598245983459844598545986459874598845989459904599145992459934599445995459964599745998459994600046001460024600346004460054600646007460084600946010460114601246013460144601546016460174601846019460204602146022460234602446025460264602746028460294603046031460324603346034460354603646037460384603946040460414604246043460444604546046460474604846049460504605146052460534605446055460564605746058460594606046061460624606346064460654606646067460684606946070460714607246073460744607546076460774607846079460804608146082460834608446085460864608746088460894609046091460924609346094460954609646097460984609946100461014610246103461044610546106461074610846109461104611146112461134611446115461164611746118461194612046121461224612346124461254612646127461284612946130461314613246133461344613546136461374613846139461404614146142461434614446145461464614746148461494615046151461524615346154461554615646157461584615946160461614616246163461644616546166461674616846169461704617146172461734617446175461764617746178461794618046181461824618346184461854618646187461884618946190461914619246193461944619546196461974619846199462004620146202462034620446205462064620746208462094621046211462124621346214462154621646217462184621946220462214622246223462244622546226462274622846229462304623146232462334623446235462364623746238462394624046241462424624346244462454624646247462484624946250462514625246253462544625546256462574625846259462604626146262462634626446265462664626746268462694627046271462724627346274462754627646277462784627946280462814628246283462844628546286462874628846289462904629146292462934629446295462964629746298462994630046301463024630346304463054630646307463084630946310463114631246313463144631546316463174631846319463204632146322463234632446325463264632746328463294633046331463324633346334463354633646337463384633946340463414634246343463444634546346463474634846349463504635146352463534635446355463564635746358463594636046361463624636346364463654636646367463684636946370463714637246373463744637546376463774637846379463804638146382463834638446385463864638746388463894639046391463924639346394463954639646397463984639946400464014640246403464044640546406464074640846409464104641146412464134641446415464164641746418464194642046421464224642346424464254642646427464284642946430464314643246433464344643546436464374643846439464404644146442464434644446445464464644746448464494645046451464524645346454464554645646457464584645946460464614646246463464644646546466464674646846469464704647146472464734647446475464764647746478464794648046481464824648346484464854648646487464884648946490464914649246493464944649546496464974649846499465004650146502465034650446505465064650746508465094651046511465124651346514465154651646517465184651946520465214652246523465244652546526465274652846529465304653146532465334653446535465364653746538465394654046541465424654346544465454654646547465484654946550465514655246553465544655546556465574655846559465604656146562465634656446565465664656746568465694657046571465724657346574465754657646577465784657946580465814658246583465844658546586465874658846589465904659146592465934659446595465964659746598465994660046601466024660346604466054660646607466084660946610466114661246613466144661546616466174661846619466204662146622466234662446625466264662746628466294663046631466324663346634466354663646637466384663946640466414664246643466444664546646466474664846649466504665146652466534665446655466564665746658466594666046661466624666346664466654666646667466684666946670466714667246673466744667546676466774667846679466804668146682466834668446685466864668746688466894669046691466924669346694466954669646697466984669946700467014670246703467044670546706467074670846709467104671146712467134671446715467164671746718467194672046721467224672346724467254672646727467284672946730467314673246733467344673546736467374673846739467404674146742467434674446745467464674746748467494675046751467524675346754467554675646757467584675946760467614676246763467644676546766467674676846769467704677146772467734677446775467764677746778467794678046781467824678346784467854678646787467884678946790467914679246793467944679546796467974679846799468004680146802468034680446805468064680746808468094681046811468124681346814468154681646817468184681946820468214682246823468244682546826468274682846829468304683146832468334683446835468364683746838468394684046841468424684346844468454684646847468484684946850468514685246853468544685546856468574685846859468604686146862468634686446865468664686746868468694687046871468724687346874468754687646877468784687946880468814688246883468844688546886468874688846889468904689146892468934689446895468964689746898468994690046901469024690346904469054690646907469084690946910469114691246913469144691546916469174691846919469204692146922469234692446925469264692746928469294693046931469324693346934469354693646937469384693946940469414694246943469444694546946469474694846949469504695146952469534695446955469564695746958469594696046961469624696346964469654696646967469684696946970469714697246973469744697546976469774697846979469804698146982469834698446985469864698746988469894699046991469924699346994469954699646997469984699947000470014700247003470044700547006470074700847009470104701147012470134701447015470164701747018470194702047021470224702347024470254702647027470284702947030470314703247033470344703547036470374703847039470404704147042470434704447045470464704747048470494705047051470524705347054470554705647057470584705947060470614706247063470644706547066470674706847069470704707147072470734707447075470764707747078470794708047081470824708347084470854708647087470884708947090470914709247093470944709547096470974709847099471004710147102471034710447105471064710747108471094711047111471124711347114471154711647117471184711947120471214712247123471244712547126471274712847129471304713147132471334713447135471364713747138471394714047141471424714347144471454714647147471484714947150471514715247153471544715547156471574715847159471604716147162471634716447165471664716747168471694717047171471724717347174471754717647177471784717947180471814718247183471844718547186471874718847189471904719147192471934719447195471964719747198471994720047201472024720347204472054720647207472084720947210472114721247213472144721547216472174721847219472204722147222472234722447225472264722747228472294723047231472324723347234472354723647237472384723947240472414724247243472444724547246472474724847249472504725147252472534725447255472564725747258472594726047261472624726347264472654726647267472684726947270472714727247273472744727547276472774727847279472804728147282472834728447285472864728747288472894729047291472924729347294472954729647297472984729947300473014730247303473044730547306473074730847309473104731147312473134731447315473164731747318473194732047321473224732347324473254732647327473284732947330473314733247333473344733547336473374733847339473404734147342473434734447345473464734747348473494735047351473524735347354473554735647357473584735947360473614736247363473644736547366473674736847369473704737147372473734737447375473764737747378473794738047381473824738347384473854738647387473884738947390473914739247393473944739547396473974739847399474004740147402474034740447405474064740747408474094741047411474124741347414474154741647417474184741947420474214742247423474244742547426474274742847429474304743147432474334743447435474364743747438474394744047441474424744347444474454744647447474484744947450474514745247453474544745547456474574745847459474604746147462474634746447465474664746747468474694747047471474724747347474474754747647477474784747947480474814748247483474844748547486474874748847489474904749147492474934749447495474964749747498474994750047501475024750347504475054750647507475084750947510475114751247513475144751547516475174751847519475204752147522475234752447525475264752747528475294753047531475324753347534475354753647537475384753947540475414754247543475444754547546475474754847549475504755147552475534755447555475564755747558475594756047561475624756347564475654756647567475684756947570475714757247573475744757547576475774757847579475804758147582475834758447585475864758747588475894759047591475924759347594475954759647597475984759947600476014760247603476044760547606476074760847609476104761147612476134761447615476164761747618476194762047621476224762347624476254762647627476284762947630476314763247633476344763547636476374763847639476404764147642476434764447645476464764747648476494765047651476524765347654476554765647657476584765947660476614766247663476644766547666476674766847669476704767147672476734767447675476764767747678476794768047681476824768347684476854768647687476884768947690476914769247693476944769547696476974769847699477004770147702477034770447705477064770747708477094771047711477124771347714477154771647717477184771947720477214772247723477244772547726477274772847729477304773147732477334773447735477364773747738477394774047741477424774347744477454774647747477484774947750477514775247753477544775547756477574775847759477604776147762477634776447765477664776747768477694777047771477724777347774477754777647777477784777947780477814778247783477844778547786477874778847789477904779147792477934779447795477964779747798477994780047801478024780347804478054780647807478084780947810478114781247813478144781547816478174781847819478204782147822478234782447825478264782747828478294783047831478324783347834478354783647837478384783947840478414784247843478444784547846478474784847849478504785147852478534785447855478564785747858478594786047861478624786347864478654786647867478684786947870478714787247873478744787547876478774787847879478804788147882478834788447885478864788747888478894789047891478924789347894478954789647897478984789947900479014790247903479044790547906479074790847909479104791147912479134791447915479164791747918479194792047921479224792347924479254792647927479284792947930479314793247933479344793547936479374793847939479404794147942479434794447945479464794747948479494795047951479524795347954479554795647957479584795947960479614796247963479644796547966479674796847969479704797147972479734797447975479764797747978479794798047981479824798347984479854798647987479884798947990479914799247993479944799547996479974799847999480004800148002480034800448005480064800748008480094801048011480124801348014480154801648017480184801948020480214802248023480244802548026480274802848029480304803148032480334803448035480364803748038480394804048041480424804348044480454804648047480484804948050480514805248053480544805548056480574805848059480604806148062480634806448065480664806748068480694807048071480724807348074480754807648077480784807948080480814808248083480844808548086480874808848089480904809148092480934809448095480964809748098480994810048101481024810348104481054810648107481084810948110481114811248113481144811548116481174811848119481204812148122481234812448125481264812748128481294813048131481324813348134481354813648137481384813948140481414814248143481444814548146481474814848149481504815148152481534815448155481564815748158481594816048161481624816348164481654816648167481684816948170481714817248173481744817548176481774817848179481804818148182481834818448185481864818748188481894819048191481924819348194481954819648197481984819948200482014820248203482044820548206482074820848209482104821148212482134821448215482164821748218482194822048221482224822348224482254822648227482284822948230482314823248233482344823548236482374823848239482404824148242482434824448245482464824748248482494825048251482524825348254482554825648257482584825948260482614826248263482644826548266482674826848269482704827148272482734827448275482764827748278482794828048281482824828348284482854828648287482884828948290482914829248293482944829548296482974829848299483004830148302483034830448305483064830748308483094831048311483124831348314483154831648317483184831948320483214832248323483244832548326483274832848329483304833148332483334833448335483364833748338483394834048341483424834348344483454834648347483484834948350483514835248353483544835548356483574835848359483604836148362483634836448365483664836748368483694837048371483724837348374483754837648377483784837948380483814838248383483844838548386483874838848389483904839148392483934839448395483964839748398483994840048401484024840348404484054840648407484084840948410484114841248413484144841548416484174841848419484204842148422484234842448425484264842748428484294843048431484324843348434484354843648437484384843948440484414844248443484444844548446484474844848449484504845148452484534845448455484564845748458484594846048461484624846348464484654846648467484684846948470484714847248473484744847548476484774847848479484804848148482484834848448485484864848748488484894849048491484924849348494484954849648497484984849948500485014850248503485044850548506485074850848509485104851148512485134851448515485164851748518485194852048521485224852348524485254852648527485284852948530485314853248533485344853548536485374853848539485404854148542485434854448545485464854748548485494855048551485524855348554485554855648557485584855948560485614856248563485644856548566485674856848569485704857148572485734857448575485764857748578485794858048581485824858348584485854858648587485884858948590485914859248593485944859548596485974859848599486004860148602486034860448605486064860748608486094861048611486124861348614486154861648617486184861948620486214862248623486244862548626486274862848629486304863148632486334863448635486364863748638486394864048641486424864348644486454864648647486484864948650486514865248653486544865548656486574865848659486604866148662486634866448665486664866748668486694867048671486724867348674486754867648677486784867948680486814868248683486844868548686486874868848689486904869148692486934869448695486964869748698486994870048701487024870348704487054870648707487084870948710487114871248713487144871548716487174871848719487204872148722487234872448725487264872748728487294873048731487324873348734487354873648737487384873948740487414874248743487444874548746487474874848749487504875148752487534875448755487564875748758487594876048761487624876348764487654876648767487684876948770487714877248773487744877548776487774877848779487804878148782487834878448785487864878748788487894879048791487924879348794487954879648797487984879948800488014880248803488044880548806488074880848809488104881148812488134881448815488164881748818488194882048821488224882348824488254882648827488284882948830488314883248833488344883548836488374883848839488404884148842488434884448845488464884748848488494885048851488524885348854488554885648857488584885948860488614886248863488644886548866488674886848869488704887148872488734887448875488764887748878488794888048881488824888348884488854888648887488884888948890488914889248893488944889548896488974889848899489004890148902489034890448905489064890748908489094891048911489124891348914489154891648917489184891948920489214892248923489244892548926489274892848929489304893148932489334893448935489364893748938489394894048941489424894348944489454894648947489484894948950489514895248953489544895548956489574895848959489604896148962489634896448965489664896748968489694897048971489724897348974489754897648977489784897948980489814898248983489844898548986489874898848989489904899148992489934899448995489964899748998489994900049001490024900349004490054900649007490084900949010490114901249013490144901549016490174901849019490204902149022490234902449025490264902749028490294903049031490324903349034490354903649037490384903949040490414904249043490444904549046490474904849049490504905149052490534905449055490564905749058490594906049061490624906349064490654906649067490684906949070490714907249073490744907549076490774907849079490804908149082490834908449085490864908749088490894909049091490924909349094490954909649097490984909949100491014910249103491044910549106491074910849109491104911149112491134911449115491164911749118491194912049121491224912349124491254912649127491284912949130491314913249133491344913549136491374913849139491404914149142491434914449145491464914749148491494915049151491524915349154491554915649157491584915949160491614916249163491644916549166491674916849169491704917149172491734917449175491764917749178491794918049181491824918349184491854918649187491884918949190491914919249193491944919549196491974919849199492004920149202492034920449205492064920749208492094921049211492124921349214492154921649217492184921949220492214922249223492244922549226492274922849229492304923149232492334923449235492364923749238492394924049241492424924349244492454924649247492484924949250492514925249253492544925549256492574925849259492604926149262492634926449265492664926749268492694927049271492724927349274492754927649277492784927949280492814928249283492844928549286492874928849289492904929149292492934929449295492964929749298492994930049301493024930349304493054930649307493084930949310493114931249313493144931549316493174931849319493204932149322493234932449325493264932749328493294933049331493324933349334493354933649337493384933949340493414934249343493444934549346493474934849349493504935149352493534935449355493564935749358493594936049361493624936349364493654936649367493684936949370493714937249373493744937549376493774937849379493804938149382493834938449385493864938749388493894939049391493924939349394493954939649397493984939949400494014940249403494044940549406494074940849409494104941149412494134941449415494164941749418494194942049421494224942349424494254942649427494284942949430494314943249433494344943549436494374943849439494404944149442494434944449445494464944749448494494945049451494524945349454494554945649457494584945949460494614946249463494644946549466494674946849469494704947149472494734947449475494764947749478494794948049481494824948349484494854948649487494884948949490494914949249493494944949549496494974949849499495004950149502495034950449505495064950749508495094951049511495124951349514495154951649517495184951949520495214952249523495244952549526495274952849529495304953149532495334953449535495364953749538495394954049541495424954349544495454954649547495484954949550495514955249553495544955549556495574955849559495604956149562495634956449565495664956749568495694957049571495724957349574495754957649577495784957949580495814958249583495844958549586495874958849589495904959149592495934959449595495964959749598495994960049601496024960349604496054960649607496084960949610496114961249613496144961549616496174961849619496204962149622496234962449625496264962749628496294963049631496324963349634496354963649637496384963949640496414964249643496444964549646496474964849649496504965149652496534965449655496564965749658496594966049661496624966349664496654966649667496684966949670496714967249673496744967549676496774967849679496804968149682496834968449685496864968749688496894969049691496924969349694496954969649697496984969949700497014970249703497044970549706497074970849709497104971149712497134971449715497164971749718497194972049721497224972349724497254972649727497284972949730497314973249733497344973549736497374973849739497404974149742497434974449745497464974749748497494975049751497524975349754497554975649757497584975949760497614976249763497644976549766497674976849769497704977149772497734977449775497764977749778497794978049781497824978349784497854978649787497884978949790497914979249793497944979549796497974979849799498004980149802498034980449805498064980749808498094981049811498124981349814498154981649817498184981949820498214982249823498244982549826498274982849829498304983149832498334983449835498364983749838498394984049841498424984349844498454984649847498484984949850498514985249853498544985549856498574985849859498604986149862498634986449865498664986749868498694987049871498724987349874498754987649877498784987949880498814988249883498844988549886498874988849889498904989149892498934989449895498964989749898498994990049901499024990349904499054990649907499084990949910499114991249913499144991549916499174991849919499204992149922499234992449925499264992749928499294993049931499324993349934499354993649937499384993949940499414994249943499444994549946499474994849949499504995149952499534995449955499564995749958499594996049961499624996349964499654996649967499684996949970499714997249973499744997549976499774997849979499804998149982499834998449985499864998749988499894999049991499924999349994499954999649997499984999950000500015000250003500045000550006500075000850009500105001150012500135001450015500165001750018500195002050021500225002350024500255002650027500285002950030500315003250033500345003550036500375003850039500405004150042500435004450045500465004750048500495005050051500525005350054500555005650057500585005950060500615006250063500645006550066500675006850069500705007150072500735007450075500765007750078500795008050081500825008350084500855008650087500885008950090500915009250093500945009550096500975009850099501005010150102501035010450105501065010750108501095011050111501125011350114501155011650117501185011950120501215012250123501245012550126501275012850129501305013150132501335013450135501365013750138501395014050141501425014350144501455014650147501485014950150501515015250153501545015550156501575015850159501605016150162501635016450165501665016750168501695017050171501725017350174501755017650177501785017950180501815018250183501845018550186501875018850189501905019150192501935019450195501965019750198501995020050201502025020350204502055020650207502085020950210502115021250213502145021550216502175021850219502205022150222502235022450225502265022750228502295023050231502325023350234502355023650237502385023950240502415024250243502445024550246502475024850249502505025150252502535025450255502565025750258502595026050261502625026350264502655026650267502685026950270502715027250273502745027550276502775027850279502805028150282502835028450285502865028750288502895029050291502925029350294502955029650297502985029950300503015030250303503045030550306503075030850309503105031150312503135031450315503165031750318503195032050321503225032350324503255032650327503285032950330503315033250333503345033550336503375033850339503405034150342503435034450345503465034750348503495035050351503525035350354503555035650357503585035950360503615036250363503645036550366503675036850369503705037150372503735037450375503765037750378503795038050381503825038350384503855038650387503885038950390503915039250393503945039550396503975039850399504005040150402504035040450405504065040750408504095041050411504125041350414504155041650417504185041950420504215042250423504245042550426504275042850429504305043150432504335043450435504365043750438504395044050441504425044350444504455044650447504485044950450504515045250453504545045550456504575045850459504605046150462504635046450465504665046750468504695047050471504725047350474504755047650477504785047950480504815048250483504845048550486504875048850489504905049150492504935049450495504965049750498504995050050501505025050350504505055050650507505085050950510505115051250513505145051550516505175051850519505205052150522505235052450525505265052750528505295053050531505325053350534505355053650537505385053950540505415054250543505445054550546505475054850549505505055150552505535055450555505565055750558505595056050561505625056350564505655056650567505685056950570505715057250573505745057550576505775057850579505805058150582505835058450585505865058750588505895059050591505925059350594505955059650597505985059950600506015060250603506045060550606506075060850609506105061150612506135061450615506165061750618506195062050621506225062350624506255062650627506285062950630506315063250633506345063550636506375063850639506405064150642506435064450645506465064750648506495065050651506525065350654506555065650657506585065950660506615066250663506645066550666506675066850669506705067150672506735067450675506765067750678506795068050681506825068350684506855068650687506885068950690506915069250693506945069550696506975069850699507005070150702507035070450705507065070750708507095071050711507125071350714507155071650717507185071950720507215072250723507245072550726507275072850729507305073150732507335073450735507365073750738507395074050741507425074350744507455074650747507485074950750507515075250753507545075550756507575075850759507605076150762507635076450765507665076750768507695077050771507725077350774507755077650777507785077950780507815078250783507845078550786507875078850789507905079150792507935079450795507965079750798507995080050801508025080350804508055080650807508085080950810508115081250813508145081550816508175081850819508205082150822508235082450825508265082750828508295083050831508325083350834508355083650837508385083950840508415084250843508445084550846508475084850849508505085150852508535085450855508565085750858508595086050861508625086350864508655086650867508685086950870508715087250873508745087550876508775087850879508805088150882508835088450885508865088750888508895089050891508925089350894508955089650897508985089950900509015090250903509045090550906509075090850909509105091150912509135091450915509165091750918509195092050921509225092350924509255092650927509285092950930509315093250933509345093550936509375093850939509405094150942509435094450945509465094750948509495095050951509525095350954509555095650957509585095950960509615096250963509645096550966509675096850969509705097150972509735097450975509765097750978509795098050981509825098350984509855098650987509885098950990509915099250993509945099550996509975099850999510005100151002510035100451005510065100751008510095101051011510125101351014510155101651017510185101951020510215102251023510245102551026510275102851029510305103151032510335103451035510365103751038510395104051041510425104351044510455104651047510485104951050510515105251053510545105551056510575105851059510605106151062510635106451065510665106751068510695107051071510725107351074510755107651077510785107951080510815108251083510845108551086510875108851089510905109151092510935109451095510965109751098510995110051101511025110351104511055110651107511085110951110511115111251113511145111551116511175111851119511205112151122511235112451125511265112751128511295113051131511325113351134511355113651137511385113951140511415114251143511445114551146511475114851149511505115151152511535115451155511565115751158511595116051161511625116351164511655116651167511685116951170511715117251173511745117551176511775117851179511805118151182511835118451185511865118751188511895119051191511925119351194511955119651197511985119951200512015120251203512045120551206512075120851209512105121151212512135121451215512165121751218512195122051221512225122351224512255122651227512285122951230512315123251233512345123551236512375123851239512405124151242512435124451245512465124751248512495125051251512525125351254512555125651257512585125951260512615126251263512645126551266512675126851269512705127151272512735127451275512765127751278512795128051281512825128351284512855128651287512885128951290512915129251293512945129551296512975129851299513005130151302513035130451305513065130751308513095131051311513125131351314513155131651317513185131951320513215132251323513245132551326513275132851329513305133151332513335133451335513365133751338513395134051341513425134351344513455134651347513485134951350513515135251353513545135551356513575135851359513605136151362513635136451365513665136751368513695137051371513725137351374513755137651377513785137951380513815138251383513845138551386513875138851389513905139151392513935139451395513965139751398513995140051401514025140351404514055140651407514085140951410514115141251413514145141551416514175141851419514205142151422514235142451425514265142751428514295143051431514325143351434514355143651437514385143951440514415144251443514445144551446514475144851449514505145151452514535145451455514565145751458514595146051461514625146351464514655146651467514685146951470514715147251473514745147551476514775147851479514805148151482514835148451485514865148751488514895149051491514925149351494514955149651497514985149951500515015150251503515045150551506515075150851509515105151151512515135151451515515165151751518515195152051521515225152351524515255152651527515285152951530515315153251533515345153551536515375153851539515405154151542515435154451545515465154751548515495155051551515525155351554515555155651557515585155951560515615156251563515645156551566515675156851569515705157151572515735157451575515765157751578515795158051581515825158351584515855158651587515885158951590515915159251593515945159551596515975159851599516005160151602516035160451605516065160751608516095161051611516125161351614516155161651617516185161951620516215162251623516245162551626516275162851629516305163151632516335163451635516365163751638516395164051641516425164351644516455164651647516485164951650516515165251653516545165551656516575165851659516605166151662516635166451665516665166751668516695167051671516725167351674516755167651677516785167951680516815168251683516845168551686516875168851689516905169151692516935169451695516965169751698516995170051701517025170351704517055170651707517085170951710517115171251713517145171551716517175171851719517205172151722517235172451725517265172751728517295173051731517325173351734517355173651737517385173951740517415174251743517445174551746517475174851749517505175151752517535175451755517565175751758517595176051761517625176351764517655176651767517685176951770517715177251773517745177551776517775177851779517805178151782517835178451785517865178751788517895179051791517925179351794517955179651797517985179951800518015180251803518045180551806518075180851809518105181151812518135181451815518165181751818518195182051821518225182351824518255182651827518285182951830518315183251833518345183551836518375183851839518405184151842518435184451845518465184751848518495185051851518525185351854518555185651857518585185951860518615186251863518645186551866518675186851869518705187151872518735187451875518765187751878518795188051881518825188351884518855188651887518885188951890518915189251893518945189551896518975189851899519005190151902519035190451905519065190751908519095191051911519125191351914519155191651917519185191951920519215192251923519245192551926519275192851929519305193151932519335193451935519365193751938519395194051941519425194351944519455194651947519485194951950519515195251953519545195551956519575195851959519605196151962519635196451965519665196751968519695197051971519725197351974519755197651977519785197951980519815198251983519845198551986519875198851989519905199151992519935199451995519965199751998519995200052001520025200352004520055200652007520085200952010520115201252013520145201552016520175201852019520205202152022520235202452025520265202752028520295203052031520325203352034520355203652037520385203952040520415204252043520445204552046520475204852049520505205152052520535205452055520565205752058520595206052061520625206352064520655206652067520685206952070520715207252073520745207552076520775207852079520805208152082520835208452085520865208752088520895209052091520925209352094520955209652097520985209952100521015210252103521045210552106521075210852109521105211152112521135211452115521165211752118521195212052121521225212352124521255212652127521285212952130521315213252133521345213552136521375213852139521405214152142521435214452145521465214752148521495215052151521525215352154521555215652157521585215952160521615216252163521645216552166521675216852169521705217152172521735217452175521765217752178521795218052181521825218352184521855218652187521885218952190521915219252193521945219552196521975219852199522005220152202522035220452205522065220752208522095221052211522125221352214522155221652217522185221952220522215222252223522245222552226522275222852229522305223152232522335223452235522365223752238522395224052241522425224352244522455224652247522485224952250522515225252253522545225552256522575225852259522605226152262522635226452265522665226752268522695227052271522725227352274522755227652277522785227952280522815228252283522845228552286522875228852289522905229152292522935229452295522965229752298522995230052301523025230352304523055230652307523085230952310523115231252313523145231552316523175231852319523205232152322523235232452325523265232752328523295233052331523325233352334523355233652337523385233952340523415234252343523445234552346523475234852349523505235152352523535235452355523565235752358523595236052361523625236352364523655236652367523685236952370523715237252373523745237552376523775237852379523805238152382523835238452385523865238752388523895239052391523925239352394523955239652397523985239952400524015240252403524045240552406524075240852409524105241152412524135241452415524165241752418524195242052421524225242352424524255242652427524285242952430524315243252433524345243552436524375243852439524405244152442524435244452445524465244752448524495245052451524525245352454524555245652457524585245952460524615246252463524645246552466524675246852469524705247152472524735247452475524765247752478524795248052481524825248352484524855248652487524885248952490524915249252493524945249552496524975249852499525005250152502525035250452505525065250752508525095251052511525125251352514525155251652517525185251952520525215252252523525245252552526525275252852529525305253152532525335253452535525365253752538525395254052541525425254352544525455254652547525485254952550525515255252553525545255552556525575255852559525605256152562525635256452565525665256752568525695257052571525725257352574525755257652577525785257952580525815258252583525845258552586525875258852589525905259152592525935259452595525965259752598525995260052601526025260352604526055260652607526085260952610526115261252613526145261552616526175261852619526205262152622526235262452625526265262752628526295263052631526325263352634526355263652637526385263952640526415264252643526445264552646526475264852649526505265152652526535265452655526565265752658526595266052661526625266352664526655266652667526685266952670526715267252673526745267552676526775267852679526805268152682526835268452685526865268752688526895269052691526925269352694526955269652697526985269952700527015270252703527045270552706527075270852709527105271152712527135271452715527165271752718527195272052721527225272352724527255272652727527285272952730527315273252733527345273552736527375273852739527405274152742527435274452745527465274752748527495275052751527525275352754527555275652757527585275952760527615276252763527645276552766527675276852769527705277152772527735277452775527765277752778527795278052781527825278352784527855278652787527885278952790527915279252793527945279552796527975279852799528005280152802528035280452805528065280752808528095281052811528125281352814528155281652817528185281952820528215282252823528245282552826528275282852829528305283152832528335283452835528365283752838528395284052841528425284352844528455284652847528485284952850528515285252853528545285552856528575285852859528605286152862528635286452865528665286752868528695287052871528725287352874528755287652877528785287952880528815288252883528845288552886528875288852889528905289152892528935289452895528965289752898528995290052901529025290352904529055290652907529085290952910529115291252913529145291552916529175291852919529205292152922529235292452925529265292752928529295293052931529325293352934529355293652937529385293952940529415294252943529445294552946529475294852949529505295152952529535295452955529565295752958529595296052961529625296352964529655296652967529685296952970529715297252973529745297552976529775297852979529805298152982529835298452985529865298752988529895299052991529925299352994529955299652997529985299953000530015300253003530045300553006530075300853009530105301153012530135301453015530165301753018530195302053021530225302353024530255302653027530285302953030530315303253033530345303553036530375303853039530405304153042530435304453045530465304753048530495305053051530525305353054530555305653057530585305953060530615306253063530645306553066530675306853069530705307153072530735307453075530765307753078530795308053081530825308353084530855308653087530885308953090530915309253093530945309553096530975309853099531005310153102531035310453105531065310753108531095311053111531125311353114531155311653117531185311953120531215312253123531245312553126531275312853129531305313153132531335313453135531365313753138531395314053141531425314353144531455314653147531485314953150531515315253153531545315553156531575315853159531605316153162531635316453165531665316753168531695317053171531725317353174531755317653177531785317953180531815318253183531845318553186531875318853189531905319153192531935319453195531965319753198531995320053201532025320353204532055320653207532085320953210532115321253213532145321553216532175321853219532205322153222532235322453225532265322753228532295323053231532325323353234532355323653237532385323953240532415324253243532445324553246532475324853249532505325153252532535325453255532565325753258532595326053261532625326353264532655326653267532685326953270532715327253273532745327553276532775327853279532805328153282532835328453285532865328753288532895329053291532925329353294532955329653297532985329953300533015330253303533045330553306533075330853309533105331153312533135331453315533165331753318533195332053321533225332353324533255332653327533285332953330533315333253333533345333553336533375333853339533405334153342533435334453345533465334753348533495335053351533525335353354533555335653357533585335953360533615336253363533645336553366533675336853369533705337153372533735337453375533765337753378533795338053381533825338353384533855338653387533885338953390533915339253393533945339553396533975339853399534005340153402534035340453405534065340753408534095341053411534125341353414534155341653417534185341953420534215342253423534245342553426534275342853429534305343153432534335343453435534365343753438534395344053441534425344353444534455344653447534485344953450534515345253453534545345553456534575345853459534605346153462534635346453465534665346753468534695347053471534725347353474534755347653477534785347953480534815348253483534845348553486534875348853489534905349153492534935349453495534965349753498534995350053501535025350353504535055350653507535085350953510535115351253513535145351553516535175351853519535205352153522535235352453525535265352753528535295353053531535325353353534535355353653537535385353953540535415354253543535445354553546535475354853549535505355153552535535355453555535565355753558535595356053561535625356353564535655356653567535685356953570535715357253573535745357553576535775357853579535805358153582535835358453585535865358753588535895359053591535925359353594535955359653597535985359953600536015360253603536045360553606536075360853609536105361153612536135361453615536165361753618536195362053621536225362353624536255362653627536285362953630536315363253633536345363553636536375363853639536405364153642536435364453645536465364753648536495365053651536525365353654536555365653657536585365953660536615366253663536645366553666536675366853669536705367153672536735367453675536765367753678536795368053681536825368353684536855368653687536885368953690536915369253693536945369553696536975369853699537005370153702537035370453705537065370753708537095371053711537125371353714537155371653717537185371953720537215372253723537245372553726537275372853729537305373153732537335373453735537365373753738537395374053741537425374353744537455374653747537485374953750537515375253753537545375553756537575375853759537605376153762537635376453765537665376753768537695377053771537725377353774537755377653777537785377953780537815378253783537845378553786537875378853789537905379153792537935379453795537965379753798537995380053801538025380353804538055380653807538085380953810538115381253813538145381553816538175381853819538205382153822538235382453825538265382753828538295383053831538325383353834538355383653837538385383953840538415384253843538445384553846538475384853849538505385153852538535385453855538565385753858538595386053861538625386353864538655386653867538685386953870538715387253873538745387553876538775387853879538805388153882538835388453885538865388753888538895389053891538925389353894538955389653897538985389953900539015390253903539045390553906539075390853909539105391153912539135391453915539165391753918539195392053921539225392353924539255392653927539285392953930539315393253933539345393553936539375393853939539405394153942539435394453945539465394753948539495395053951539525395353954539555395653957539585395953960539615396253963539645396553966539675396853969539705397153972539735397453975539765397753978539795398053981539825398353984539855398653987539885398953990539915399253993539945399553996539975399853999540005400154002540035400454005540065400754008540095401054011540125401354014540155401654017540185401954020540215402254023540245402554026540275402854029540305403154032540335403454035540365403754038540395404054041540425404354044540455404654047540485404954050540515405254053540545405554056540575405854059540605406154062540635406454065540665406754068540695407054071540725407354074540755407654077540785407954080540815408254083540845408554086540875408854089540905409154092540935409454095540965409754098540995410054101541025410354104541055410654107541085410954110541115411254113541145411554116541175411854119541205412154122541235412454125541265412754128541295413054131541325413354134541355413654137541385413954140541415414254143541445414554146541475414854149541505415154152541535415454155541565415754158541595416054161541625416354164541655416654167541685416954170541715417254173541745417554176541775417854179541805418154182541835418454185541865418754188541895419054191541925419354194541955419654197541985419954200542015420254203542045420554206542075420854209542105421154212542135421454215542165421754218542195422054221542225422354224542255422654227542285422954230542315423254233542345423554236542375423854239542405424154242542435424454245542465424754248542495425054251542525425354254542555425654257542585425954260542615426254263542645426554266542675426854269542705427154272542735427454275542765427754278542795428054281542825428354284542855428654287542885428954290542915429254293542945429554296542975429854299543005430154302543035430454305543065430754308543095431054311543125431354314543155431654317543185431954320543215432254323543245432554326543275432854329543305433154332543335433454335543365433754338543395434054341543425434354344543455434654347543485434954350543515435254353543545435554356543575435854359543605436154362543635436454365543665436754368543695437054371543725437354374543755437654377543785437954380543815438254383543845438554386543875438854389543905439154392543935439454395543965439754398543995440054401544025440354404544055440654407544085440954410544115441254413544145441554416544175441854419544205442154422544235442454425544265442754428544295443054431544325443354434544355443654437544385443954440544415444254443544445444554446544475444854449544505445154452544535445454455544565445754458544595446054461544625446354464544655446654467544685446954470544715447254473544745447554476544775447854479544805448154482544835448454485544865448754488544895449054491544925449354494544955449654497544985449954500545015450254503545045450554506545075450854509545105451154512545135451454515545165451754518545195452054521545225452354524545255452654527545285452954530545315453254533545345453554536545375453854539545405454154542545435454454545545465454754548545495455054551545525455354554545555455654557545585455954560545615456254563545645456554566545675456854569545705457154572545735457454575545765457754578545795458054581545825458354584545855458654587545885458954590545915459254593545945459554596545975459854599546005460154602546035460454605546065460754608546095461054611546125461354614546155461654617546185461954620546215462254623546245462554626546275462854629546305463154632546335463454635546365463754638546395464054641546425464354644546455464654647546485464954650546515465254653546545465554656546575465854659546605466154662546635466454665546665466754668546695467054671546725467354674546755467654677546785467954680546815468254683546845468554686546875468854689546905469154692546935469454695546965469754698546995470054701547025470354704547055470654707547085470954710547115471254713547145471554716547175471854719547205472154722547235472454725547265472754728547295473054731547325473354734547355473654737547385473954740547415474254743547445474554746547475474854749547505475154752547535475454755547565475754758547595476054761547625476354764547655476654767547685476954770547715477254773547745477554776547775477854779547805478154782547835478454785547865478754788547895479054791547925479354794547955479654797547985479954800548015480254803548045480554806548075480854809548105481154812548135481454815548165481754818548195482054821548225482354824548255482654827548285482954830548315483254833548345483554836548375483854839548405484154842548435484454845548465484754848548495485054851548525485354854548555485654857548585485954860548615486254863548645486554866548675486854869548705487154872548735487454875548765487754878548795488054881548825488354884548855488654887548885488954890548915489254893548945489554896548975489854899549005490154902549035490454905549065490754908549095491054911549125491354914549155491654917549185491954920549215492254923549245492554926549275492854929549305493154932549335493454935549365493754938549395494054941549425494354944549455494654947549485494954950549515495254953549545495554956549575495854959549605496154962549635496454965549665496754968549695497054971549725497354974549755497654977549785497954980549815498254983549845498554986549875498854989549905499154992549935499454995549965499754998549995500055001550025500355004550055500655007550085500955010550115501255013550145501555016550175501855019550205502155022550235502455025550265502755028550295503055031550325503355034550355503655037550385503955040550415504255043550445504555046550475504855049550505505155052550535505455055550565505755058550595506055061550625506355064550655506655067550685506955070550715507255073550745507555076550775507855079550805508155082550835508455085550865508755088550895509055091550925509355094550955509655097550985509955100551015510255103551045510555106551075510855109551105511155112551135511455115551165511755118551195512055121551225512355124551255512655127551285512955130551315513255133551345513555136551375513855139551405514155142551435514455145551465514755148551495515055151551525515355154551555515655157551585515955160551615516255163551645516555166551675516855169551705517155172551735517455175551765517755178551795518055181551825518355184551855518655187551885518955190551915519255193551945519555196551975519855199552005520155202552035520455205552065520755208552095521055211552125521355214552155521655217552185521955220552215522255223552245522555226552275522855229552305523155232552335523455235552365523755238552395524055241552425524355244552455524655247552485524955250552515525255253552545525555256552575525855259552605526155262552635526455265552665526755268552695527055271552725527355274552755527655277552785527955280552815528255283552845528555286552875528855289552905529155292552935529455295552965529755298552995530055301553025530355304553055530655307553085530955310553115531255313553145531555316553175531855319553205532155322553235532455325553265532755328553295533055331553325533355334553355533655337553385533955340553415534255343553445534555346553475534855349553505535155352553535535455355553565535755358553595536055361553625536355364553655536655367553685536955370553715537255373553745537555376553775537855379553805538155382553835538455385553865538755388553895539055391553925539355394553955539655397553985539955400554015540255403554045540555406554075540855409554105541155412554135541455415554165541755418554195542055421554225542355424554255542655427554285542955430554315543255433554345543555436554375543855439554405544155442554435544455445554465544755448554495545055451554525545355454554555545655457554585545955460554615546255463554645546555466554675546855469554705547155472554735547455475554765547755478554795548055481554825548355484554855548655487554885548955490554915549255493554945549555496554975549855499555005550155502555035550455505555065550755508555095551055511555125551355514555155551655517555185551955520555215552255523555245552555526555275552855529555305553155532555335553455535555365553755538555395554055541555425554355544555455554655547555485554955550555515555255553555545555555556555575555855559555605556155562555635556455565555665556755568555695557055571555725557355574555755557655577555785557955580555815558255583555845558555586555875558855589555905559155592555935559455595555965559755598555995560055601556025560355604556055560655607556085560955610556115561255613556145561555616556175561855619556205562155622556235562455625556265562755628556295563055631556325563355634556355563655637556385563955640556415564255643556445564555646556475564855649556505565155652556535565455655556565565755658556595566055661556625566355664556655566655667556685566955670556715567255673556745567555676556775567855679556805568155682556835568455685556865568755688556895569055691556925569355694556955569655697556985569955700557015570255703557045570555706557075570855709557105571155712557135571455715557165571755718557195572055721557225572355724557255572655727557285572955730557315573255733557345573555736557375573855739557405574155742557435574455745557465574755748557495575055751557525575355754557555575655757557585575955760557615576255763557645576555766557675576855769557705577155772557735577455775557765577755778557795578055781557825578355784557855578655787557885578955790557915579255793557945579555796557975579855799558005580155802558035580455805558065580755808558095581055811558125581355814558155581655817558185581955820558215582255823558245582555826558275582855829558305583155832558335583455835558365583755838558395584055841558425584355844558455584655847558485584955850558515585255853558545585555856558575585855859558605586155862558635586455865558665586755868558695587055871558725587355874558755587655877558785587955880558815588255883558845588555886558875588855889558905589155892558935589455895558965589755898558995590055901559025590355904559055590655907559085590955910559115591255913559145591555916559175591855919559205592155922559235592455925559265592755928559295593055931559325593355934559355593655937559385593955940559415594255943559445594555946559475594855949559505595155952559535595455955559565595755958559595596055961559625596355964559655596655967559685596955970559715597255973559745597555976559775597855979559805598155982559835598455985559865598755988559895599055991559925599355994559955599655997559985599956000560015600256003560045600556006560075600856009560105601156012560135601456015560165601756018560195602056021560225602356024560255602656027560285602956030560315603256033560345603556036560375603856039560405604156042560435604456045560465604756048560495605056051560525605356054560555605656057560585605956060560615606256063560645606556066560675606856069560705607156072560735607456075560765607756078560795608056081560825608356084560855608656087560885608956090560915609256093560945609556096560975609856099561005610156102561035610456105561065610756108561095611056111561125611356114561155611656117561185611956120561215612256123561245612556126561275612856129561305613156132561335613456135561365613756138561395614056141561425614356144561455614656147561485614956150561515615256153561545615556156561575615856159561605616156162561635616456165561665616756168561695617056171561725617356174561755617656177561785617956180561815618256183561845618556186561875618856189561905619156192561935619456195561965619756198561995620056201562025620356204562055620656207562085620956210562115621256213562145621556216562175621856219562205622156222562235622456225562265622756228562295623056231562325623356234562355623656237562385623956240562415624256243562445624556246562475624856249562505625156252562535625456255562565625756258562595626056261562625626356264562655626656267562685626956270562715627256273562745627556276562775627856279562805628156282562835628456285562865628756288562895629056291562925629356294562955629656297562985629956300563015630256303563045630556306563075630856309563105631156312563135631456315563165631756318563195632056321563225632356324563255632656327563285632956330563315633256333563345633556336563375633856339563405634156342563435634456345563465634756348563495635056351563525635356354563555635656357563585635956360563615636256363563645636556366563675636856369563705637156372563735637456375563765637756378563795638056381563825638356384563855638656387563885638956390563915639256393563945639556396563975639856399564005640156402564035640456405564065640756408564095641056411564125641356414564155641656417564185641956420564215642256423564245642556426564275642856429564305643156432564335643456435564365643756438564395644056441564425644356444564455644656447564485644956450564515645256453564545645556456564575645856459564605646156462564635646456465564665646756468564695647056471564725647356474564755647656477564785647956480564815648256483564845648556486564875648856489564905649156492564935649456495564965649756498564995650056501565025650356504565055650656507565085650956510565115651256513565145651556516565175651856519565205652156522565235652456525565265652756528565295653056531565325653356534565355653656537565385653956540565415654256543565445654556546565475654856549565505655156552565535655456555565565655756558565595656056561565625656356564565655656656567565685656956570565715657256573565745657556576565775657856579565805658156582565835658456585565865658756588565895659056591565925659356594565955659656597565985659956600566015660256603566045660556606566075660856609566105661156612566135661456615566165661756618566195662056621566225662356624566255662656627566285662956630566315663256633566345663556636566375663856639566405664156642566435664456645566465664756648566495665056651566525665356654566555665656657566585665956660566615666256663566645666556666566675666856669566705667156672566735667456675566765667756678566795668056681566825668356684566855668656687566885668956690566915669256693566945669556696566975669856699567005670156702567035670456705567065670756708567095671056711567125671356714567155671656717567185671956720567215672256723567245672556726567275672856729567305673156732567335673456735567365673756738567395674056741567425674356744567455674656747567485674956750567515675256753567545675556756567575675856759567605676156762567635676456765567665676756768567695677056771567725677356774567755677656777567785677956780567815678256783567845678556786567875678856789567905679156792567935679456795567965679756798567995680056801568025680356804568055680656807568085680956810568115681256813568145681556816568175681856819568205682156822568235682456825568265682756828568295683056831568325683356834568355683656837568385683956840568415684256843568445684556846568475684856849568505685156852568535685456855568565685756858568595686056861568625686356864568655686656867568685686956870568715687256873568745687556876568775687856879568805688156882568835688456885568865688756888568895689056891568925689356894568955689656897568985689956900569015690256903569045690556906569075690856909569105691156912569135691456915569165691756918569195692056921569225692356924569255692656927569285692956930569315693256933569345693556936569375693856939569405694156942569435694456945569465694756948569495695056951569525695356954569555695656957569585695956960569615696256963569645696556966569675696856969569705697156972569735697456975569765697756978569795698056981569825698356984569855698656987569885698956990569915699256993569945699556996569975699856999570005700157002570035700457005570065700757008570095701057011570125701357014570155701657017570185701957020570215702257023570245702557026570275702857029570305703157032570335703457035570365703757038570395704057041570425704357044570455704657047570485704957050570515705257053570545705557056570575705857059570605706157062570635706457065570665706757068570695707057071570725707357074570755707657077570785707957080570815708257083570845708557086570875708857089570905709157092570935709457095570965709757098570995710057101571025710357104571055710657107571085710957110571115711257113571145711557116571175711857119571205712157122571235712457125571265712757128571295713057131571325713357134571355713657137571385713957140571415714257143571445714557146571475714857149571505715157152571535715457155571565715757158571595716057161571625716357164571655716657167571685716957170571715717257173571745717557176571775717857179571805718157182571835718457185571865718757188571895719057191571925719357194571955719657197571985719957200572015720257203572045720557206572075720857209572105721157212572135721457215572165721757218572195722057221572225722357224572255722657227572285722957230572315723257233572345723557236572375723857239572405724157242572435724457245572465724757248572495725057251572525725357254572555725657257572585725957260572615726257263572645726557266572675726857269572705727157272572735727457275572765727757278572795728057281572825728357284572855728657287572885728957290572915729257293572945729557296572975729857299573005730157302573035730457305573065730757308573095731057311573125731357314573155731657317573185731957320573215732257323573245732557326573275732857329573305733157332573335733457335573365733757338573395734057341573425734357344573455734657347573485734957350573515735257353573545735557356573575735857359573605736157362573635736457365573665736757368573695737057371573725737357374573755737657377573785737957380573815738257383573845738557386573875738857389573905739157392573935739457395573965739757398573995740057401574025740357404574055740657407574085740957410574115741257413574145741557416574175741857419574205742157422574235742457425574265742757428574295743057431574325743357434574355743657437574385743957440574415744257443574445744557446574475744857449574505745157452574535745457455574565745757458574595746057461574625746357464574655746657467574685746957470574715747257473574745747557476574775747857479574805748157482574835748457485574865748757488574895749057491574925749357494574955749657497574985749957500575015750257503575045750557506575075750857509575105751157512575135751457515575165751757518575195752057521575225752357524575255752657527575285752957530575315753257533575345753557536575375753857539575405754157542575435754457545575465754757548575495755057551575525755357554575555755657557575585755957560575615756257563575645756557566575675756857569575705757157572575735757457575575765757757578575795758057581575825758357584575855758657587575885758957590575915759257593575945759557596575975759857599576005760157602576035760457605576065760757608576095761057611576125761357614576155761657617576185761957620576215762257623576245762557626576275762857629576305763157632576335763457635576365763757638576395764057641576425764357644576455764657647576485764957650576515765257653576545765557656576575765857659576605766157662576635766457665576665766757668576695767057671576725767357674576755767657677576785767957680576815768257683576845768557686576875768857689576905769157692576935769457695576965769757698576995770057701577025770357704577055770657707577085770957710577115771257713577145771557716577175771857719577205772157722577235772457725577265772757728577295773057731577325773357734577355773657737577385773957740577415774257743577445774557746577475774857749577505775157752577535775457755577565775757758577595776057761577625776357764577655776657767577685776957770577715777257773577745777557776577775777857779577805778157782577835778457785577865778757788577895779057791577925779357794577955779657797577985779957800578015780257803578045780557806578075780857809578105781157812578135781457815578165781757818578195782057821578225782357824578255782657827578285782957830578315783257833578345783557836578375783857839578405784157842578435784457845578465784757848578495785057851578525785357854578555785657857578585785957860578615786257863578645786557866578675786857869578705787157872578735787457875578765787757878578795788057881578825788357884578855788657887578885788957890578915789257893578945789557896578975789857899579005790157902579035790457905579065790757908579095791057911579125791357914579155791657917579185791957920579215792257923579245792557926579275792857929579305793157932579335793457935579365793757938579395794057941579425794357944579455794657947579485794957950579515795257953579545795557956579575795857959579605796157962579635796457965579665796757968579695797057971579725797357974579755797657977579785797957980579815798257983579845798557986579875798857989579905799157992579935799457995579965799757998579995800058001580025800358004580055800658007580085800958010580115801258013580145801558016580175801858019580205802158022580235802458025580265802758028580295803058031580325803358034580355803658037580385803958040580415804258043580445804558046580475804858049580505805158052580535805458055580565805758058580595806058061580625806358064580655806658067580685806958070580715807258073580745807558076580775807858079580805808158082580835808458085580865808758088580895809058091580925809358094580955809658097580985809958100581015810258103581045810558106581075810858109581105811158112581135811458115581165811758118581195812058121581225812358124581255812658127581285812958130581315813258133581345813558136581375813858139581405814158142581435814458145581465814758148581495815058151581525815358154581555815658157581585815958160581615816258163581645816558166581675816858169581705817158172581735817458175581765817758178581795818058181581825818358184581855818658187581885818958190581915819258193581945819558196581975819858199582005820158202582035820458205582065820758208582095821058211582125821358214582155821658217582185821958220582215822258223582245822558226582275822858229582305823158232582335823458235582365823758238582395824058241582425824358244582455824658247582485824958250582515825258253582545825558256582575825858259582605826158262582635826458265582665826758268582695827058271582725827358274582755827658277582785827958280582815828258283582845828558286582875828858289582905829158292582935829458295582965829758298582995830058301583025830358304583055830658307583085830958310583115831258313583145831558316583175831858319583205832158322583235832458325583265832758328583295833058331583325833358334583355833658337583385833958340583415834258343583445834558346583475834858349583505835158352583535835458355583565835758358583595836058361583625836358364583655836658367583685836958370583715837258373583745837558376583775837858379583805838158382583835838458385583865838758388583895839058391583925839358394583955839658397583985839958400584015840258403584045840558406584075840858409584105841158412584135841458415584165841758418584195842058421584225842358424584255842658427584285842958430584315843258433584345843558436584375843858439584405844158442584435844458445584465844758448584495845058451584525845358454584555845658457584585845958460584615846258463584645846558466584675846858469584705847158472584735847458475584765847758478584795848058481584825848358484584855848658487584885848958490584915849258493584945849558496584975849858499585005850158502585035850458505585065850758508585095851058511585125851358514585155851658517585185851958520585215852258523585245852558526585275852858529585305853158532585335853458535585365853758538585395854058541585425854358544585455854658547585485854958550585515855258553585545855558556585575855858559585605856158562585635856458565585665856758568585695857058571585725857358574585755857658577585785857958580585815858258583585845858558586585875858858589585905859158592585935859458595585965859758598585995860058601586025860358604586055860658607586085860958610586115861258613586145861558616586175861858619586205862158622586235862458625586265862758628586295863058631586325863358634586355863658637586385863958640586415864258643586445864558646586475864858649586505865158652586535865458655586565865758658586595866058661586625866358664586655866658667586685866958670586715867258673586745867558676586775867858679586805868158682586835868458685586865868758688586895869058691586925869358694586955869658697586985869958700587015870258703587045870558706587075870858709587105871158712587135871458715587165871758718587195872058721587225872358724587255872658727587285872958730587315873258733587345873558736587375873858739587405874158742587435874458745587465874758748587495875058751587525875358754587555875658757587585875958760587615876258763587645876558766587675876858769587705877158772587735877458775587765877758778587795878058781587825878358784587855878658787587885878958790587915879258793587945879558796587975879858799588005880158802588035880458805588065880758808588095881058811588125881358814588155881658817588185881958820588215882258823588245882558826588275882858829588305883158832588335883458835588365883758838588395884058841588425884358844588455884658847588485884958850588515885258853588545885558856588575885858859588605886158862588635886458865588665886758868588695887058871588725887358874588755887658877588785887958880588815888258883588845888558886588875888858889588905889158892588935889458895588965889758898588995890058901589025890358904589055890658907589085890958910589115891258913589145891558916589175891858919589205892158922589235892458925589265892758928589295893058931589325893358934589355893658937589385893958940589415894258943589445894558946589475894858949589505895158952589535895458955589565895758958589595896058961589625896358964589655896658967589685896958970589715897258973589745897558976589775897858979589805898158982589835898458985589865898758988589895899058991589925899358994589955899658997589985899959000590015900259003590045900559006590075900859009590105901159012590135901459015590165901759018590195902059021590225902359024590255902659027590285902959030590315903259033590345903559036590375903859039590405904159042590435904459045590465904759048590495905059051590525905359054590555905659057590585905959060590615906259063590645906559066590675906859069590705907159072590735907459075590765907759078590795908059081590825908359084590855908659087590885908959090590915909259093590945909559096590975909859099591005910159102591035910459105591065910759108591095911059111591125911359114591155911659117591185911959120591215912259123591245912559126591275912859129591305913159132591335913459135591365913759138591395914059141591425914359144591455914659147591485914959150591515915259153591545915559156591575915859159591605916159162591635916459165591665916759168591695917059171591725917359174591755917659177591785917959180591815918259183591845918559186591875918859189591905919159192591935919459195591965919759198591995920059201592025920359204592055920659207592085920959210592115921259213592145921559216592175921859219592205922159222592235922459225592265922759228592295923059231592325923359234592355923659237592385923959240592415924259243592445924559246592475924859249592505925159252592535925459255592565925759258592595926059261592625926359264592655926659267592685926959270592715927259273592745927559276592775927859279592805928159282592835928459285592865928759288592895929059291592925929359294592955929659297592985929959300593015930259303593045930559306593075930859309593105931159312593135931459315593165931759318593195932059321593225932359324593255932659327593285932959330593315933259333593345933559336593375933859339593405934159342593435934459345593465934759348593495935059351593525935359354593555935659357593585935959360593615936259363593645936559366593675936859369593705937159372593735937459375593765937759378593795938059381593825938359384593855938659387593885938959390593915939259393593945939559396593975939859399594005940159402594035940459405594065940759408594095941059411594125941359414594155941659417594185941959420594215942259423594245942559426594275942859429594305943159432594335943459435594365943759438594395944059441594425944359444594455944659447594485944959450594515945259453594545945559456594575945859459594605946159462594635946459465594665946759468594695947059471594725947359474594755947659477594785947959480594815948259483594845948559486594875948859489594905949159492594935949459495594965949759498594995950059501595025950359504595055950659507595085950959510595115951259513595145951559516595175951859519595205952159522595235952459525595265952759528595295953059531595325953359534595355953659537595385953959540595415954259543595445954559546595475954859549595505955159552595535955459555595565955759558595595956059561595625956359564595655956659567595685956959570595715957259573595745957559576595775957859579595805958159582595835958459585595865958759588595895959059591595925959359594595955959659597595985959959600596015960259603596045960559606596075960859609596105961159612596135961459615596165961759618596195962059621596225962359624596255962659627596285962959630596315963259633596345963559636596375963859639596405964159642596435964459645596465964759648596495965059651596525965359654596555965659657596585965959660596615966259663596645966559666596675966859669596705967159672596735967459675596765967759678596795968059681596825968359684596855968659687596885968959690596915969259693596945969559696596975969859699597005970159702597035970459705597065970759708597095971059711597125971359714597155971659717597185971959720597215972259723597245972559726597275972859729597305973159732597335973459735597365973759738597395974059741597425974359744597455974659747597485974959750597515975259753597545975559756597575975859759597605976159762597635976459765597665976759768597695977059771597725977359774597755977659777597785977959780597815978259783597845978559786597875978859789597905979159792597935979459795597965979759798597995980059801598025980359804598055980659807598085980959810598115981259813598145981559816598175981859819598205982159822598235982459825598265982759828598295983059831598325983359834598355983659837598385983959840598415984259843598445984559846598475984859849598505985159852598535985459855598565985759858598595986059861598625986359864598655986659867598685986959870598715987259873598745987559876598775987859879598805988159882598835988459885598865988759888598895989059891598925989359894598955989659897598985989959900599015990259903599045990559906599075990859909599105991159912599135991459915599165991759918599195992059921599225992359924599255992659927599285992959930599315993259933599345993559936599375993859939599405994159942599435994459945599465994759948599495995059951599525995359954599555995659957599585995959960599615996259963599645996559966599675996859969599705997159972599735997459975599765997759978599795998059981599825998359984599855998659987599885998959990599915999259993599945999559996599975999859999600006000160002600036000460005600066000760008600096001060011600126001360014600156001660017600186001960020600216002260023600246002560026600276002860029600306003160032600336003460035600366003760038600396004060041600426004360044600456004660047600486004960050600516005260053600546005560056600576005860059600606006160062600636006460065600666006760068600696007060071600726007360074600756007660077600786007960080600816008260083600846008560086600876008860089600906009160092600936009460095600966009760098600996010060101601026010360104601056010660107601086010960110601116011260113601146011560116601176011860119601206012160122601236012460125601266012760128601296013060131601326013360134601356013660137601386013960140601416014260143601446014560146601476014860149601506015160152601536015460155601566015760158601596016060161601626016360164601656016660167601686016960170601716017260173601746017560176601776017860179601806018160182601836018460185601866018760188601896019060191601926019360194601956019660197601986019960200602016020260203602046020560206602076020860209602106021160212602136021460215602166021760218602196022060221602226022360224602256022660227602286022960230602316023260233602346023560236602376023860239602406024160242602436024460245602466024760248602496025060251602526025360254602556025660257602586025960260602616026260263602646026560266602676026860269602706027160272602736027460275602766027760278602796028060281602826028360284602856028660287602886028960290602916029260293602946029560296602976029860299603006030160302603036030460305603066030760308603096031060311603126031360314603156031660317603186031960320603216032260323603246032560326603276032860329603306033160332603336033460335603366033760338603396034060341603426034360344603456034660347603486034960350603516035260353603546035560356603576035860359603606036160362603636036460365603666036760368603696037060371603726037360374603756037660377603786037960380603816038260383603846038560386603876038860389603906039160392603936039460395603966039760398603996040060401604026040360404604056040660407604086040960410604116041260413604146041560416604176041860419604206042160422604236042460425604266042760428604296043060431604326043360434604356043660437604386043960440604416044260443604446044560446604476044860449604506045160452604536045460455604566045760458604596046060461604626046360464604656046660467604686046960470604716047260473604746047560476604776047860479604806048160482604836048460485604866048760488604896049060491604926049360494604956049660497604986049960500605016050260503605046050560506605076050860509605106051160512605136051460515605166051760518605196052060521605226052360524605256052660527605286052960530605316053260533605346053560536605376053860539605406054160542605436054460545605466054760548605496055060551605526055360554605556055660557605586055960560605616056260563605646056560566605676056860569605706057160572605736057460575605766057760578605796058060581605826058360584605856058660587605886058960590605916059260593605946059560596605976059860599606006060160602606036060460605606066060760608606096061060611606126061360614606156061660617606186061960620606216062260623606246062560626606276062860629606306063160632606336063460635606366063760638606396064060641606426064360644606456064660647606486064960650606516065260653606546065560656606576065860659606606066160662606636066460665606666066760668606696067060671606726067360674606756067660677606786067960680606816068260683606846068560686606876068860689606906069160692606936069460695606966069760698606996070060701607026070360704607056070660707607086070960710607116071260713607146071560716607176071860719607206072160722607236072460725607266072760728607296073060731607326073360734607356073660737607386073960740607416074260743607446074560746607476074860749607506075160752607536075460755607566075760758607596076060761607626076360764607656076660767607686076960770607716077260773607746077560776607776077860779607806078160782607836078460785607866078760788607896079060791607926079360794607956079660797607986079960800608016080260803608046080560806608076080860809608106081160812608136081460815608166081760818608196082060821608226082360824608256082660827608286082960830608316083260833608346083560836608376083860839608406084160842608436084460845608466084760848608496085060851608526085360854608556085660857608586085960860608616086260863608646086560866608676086860869608706087160872608736087460875608766087760878608796088060881608826088360884608856088660887608886088960890608916089260893608946089560896608976089860899609006090160902609036090460905609066090760908609096091060911609126091360914609156091660917609186091960920609216092260923609246092560926609276092860929609306093160932609336093460935609366093760938609396094060941609426094360944609456094660947609486094960950609516095260953609546095560956609576095860959609606096160962609636096460965609666096760968609696097060971609726097360974609756097660977609786097960980609816098260983609846098560986609876098860989609906099160992609936099460995609966099760998609996100061001610026100361004610056100661007610086100961010610116101261013610146101561016610176101861019610206102161022610236102461025610266102761028610296103061031610326103361034610356103661037610386103961040610416104261043610446104561046610476104861049610506105161052610536105461055610566105761058610596106061061610626106361064610656106661067610686106961070610716107261073610746107561076610776107861079610806108161082610836108461085610866108761088610896109061091610926109361094610956109661097610986109961100611016110261103611046110561106611076110861109611106111161112611136111461115611166111761118611196112061121611226112361124611256112661127611286112961130611316113261133611346113561136611376113861139611406114161142611436114461145611466114761148611496115061151611526115361154611556115661157611586115961160611616116261163611646116561166611676116861169611706117161172611736117461175611766117761178611796118061181611826118361184611856118661187611886118961190611916119261193611946119561196611976119861199612006120161202612036120461205612066120761208612096121061211612126121361214612156121661217612186121961220612216122261223612246122561226612276122861229612306123161232612336123461235612366123761238612396124061241612426124361244612456124661247612486124961250612516125261253612546125561256612576125861259612606126161262612636126461265612666126761268612696127061271612726127361274612756127661277612786127961280612816128261283612846128561286612876128861289612906129161292612936129461295612966129761298612996130061301613026130361304613056130661307613086130961310613116131261313613146131561316613176131861319613206132161322613236132461325613266132761328613296133061331613326133361334613356133661337613386133961340613416134261343613446134561346613476134861349613506135161352613536135461355613566135761358613596136061361613626136361364613656136661367613686136961370613716137261373613746137561376613776137861379613806138161382613836138461385613866138761388613896139061391613926139361394613956139661397613986139961400614016140261403614046140561406614076140861409614106141161412614136141461415614166141761418614196142061421614226142361424614256142661427614286142961430614316143261433614346143561436614376143861439614406144161442614436144461445614466144761448614496145061451614526145361454614556145661457614586145961460614616146261463614646146561466614676146861469614706147161472614736147461475614766147761478614796148061481614826148361484614856148661487614886148961490614916149261493614946149561496614976149861499615006150161502615036150461505615066150761508615096151061511615126151361514615156151661517615186151961520615216152261523615246152561526615276152861529615306153161532615336153461535615366153761538615396154061541615426154361544615456154661547615486154961550615516155261553615546155561556615576155861559615606156161562615636156461565615666156761568615696157061571615726157361574615756157661577615786157961580615816158261583615846158561586615876158861589615906159161592615936159461595615966159761598615996160061601616026160361604616056160661607616086160961610616116161261613616146161561616616176161861619616206162161622616236162461625616266162761628616296163061631616326163361634616356163661637616386163961640616416164261643616446164561646616476164861649616506165161652616536165461655616566165761658616596166061661616626166361664616656166661667616686166961670616716167261673616746167561676616776167861679616806168161682616836168461685616866168761688616896169061691616926169361694616956169661697616986169961700617016170261703617046170561706617076170861709617106171161712617136171461715617166171761718617196172061721617226172361724617256172661727617286172961730617316173261733617346173561736617376173861739617406174161742617436174461745617466174761748617496175061751617526175361754617556175661757617586175961760617616176261763617646176561766617676176861769617706177161772617736177461775617766177761778617796178061781617826178361784617856178661787617886178961790617916179261793617946179561796617976179861799618006180161802618036180461805618066180761808618096181061811618126181361814618156181661817618186181961820618216182261823618246182561826618276182861829618306183161832618336183461835618366183761838618396184061841618426184361844618456184661847618486184961850618516185261853618546185561856618576185861859618606186161862618636186461865618666186761868618696187061871618726187361874618756187661877618786187961880618816188261883618846188561886618876188861889618906189161892618936189461895618966189761898618996190061901619026190361904619056190661907619086190961910619116191261913619146191561916619176191861919619206192161922619236192461925619266192761928619296193061931619326193361934619356193661937619386193961940619416194261943619446194561946619476194861949619506195161952619536195461955619566195761958619596196061961619626196361964619656196661967619686196961970619716197261973619746197561976619776197861979619806198161982619836198461985619866198761988619896199061991619926199361994619956199661997619986199962000620016200262003620046200562006620076200862009620106201162012620136201462015620166201762018620196202062021620226202362024620256202662027620286202962030620316203262033620346203562036620376203862039620406204162042620436204462045620466204762048620496205062051620526205362054620556205662057620586205962060620616206262063620646206562066620676206862069620706207162072620736207462075620766207762078620796208062081620826208362084620856208662087620886208962090620916209262093620946209562096620976209862099621006210162102621036210462105621066210762108621096211062111621126211362114621156211662117621186211962120621216212262123621246212562126621276212862129621306213162132621336213462135621366213762138621396214062141621426214362144621456214662147621486214962150621516215262153621546215562156621576215862159621606216162162621636216462165621666216762168621696217062171621726217362174621756217662177621786217962180621816218262183621846218562186621876218862189621906219162192621936219462195621966219762198621996220062201622026220362204622056220662207622086220962210622116221262213622146221562216622176221862219622206222162222622236222462225622266222762228622296223062231622326223362234622356223662237622386223962240622416224262243622446224562246622476224862249622506225162252622536225462255622566225762258622596226062261622626226362264622656226662267622686226962270622716227262273622746227562276622776227862279622806228162282622836228462285622866228762288622896229062291622926229362294622956229662297622986229962300623016230262303623046230562306623076230862309623106231162312623136231462315623166231762318623196232062321623226232362324623256232662327623286232962330623316233262333623346233562336623376233862339623406234162342623436234462345623466234762348623496235062351623526235362354623556235662357623586235962360623616236262363623646236562366623676236862369623706237162372623736237462375623766237762378623796238062381623826238362384623856238662387623886238962390623916239262393623946239562396623976239862399624006240162402624036240462405624066240762408624096241062411624126241362414624156241662417624186241962420624216242262423624246242562426624276242862429624306243162432624336243462435624366243762438624396244062441624426244362444624456244662447624486244962450624516245262453624546245562456624576245862459624606246162462624636246462465624666246762468624696247062471624726247362474624756247662477624786247962480624816248262483624846248562486624876248862489624906249162492624936249462495624966249762498624996250062501625026250362504625056250662507625086250962510625116251262513625146251562516625176251862519625206252162522625236252462525625266252762528625296253062531625326253362534625356253662537625386253962540625416254262543625446254562546625476254862549625506255162552625536255462555625566255762558625596256062561625626256362564625656256662567625686256962570625716257262573625746257562576625776257862579625806258162582625836258462585625866258762588625896259062591625926259362594625956259662597625986259962600626016260262603626046260562606626076260862609626106261162612626136261462615626166261762618626196262062621626226262362624626256262662627626286262962630626316263262633626346263562636626376263862639626406264162642626436264462645626466264762648626496265062651626526265362654626556265662657626586265962660626616266262663626646266562666626676266862669626706267162672626736267462675626766267762678626796268062681626826268362684626856268662687626886268962690626916269262693626946269562696626976269862699627006270162702627036270462705627066270762708627096271062711627126271362714627156271662717627186271962720627216272262723627246272562726627276272862729627306273162732627336273462735627366273762738627396274062741627426274362744627456274662747627486274962750627516275262753627546275562756627576275862759627606276162762627636276462765627666276762768627696277062771627726277362774627756277662777627786277962780627816278262783627846278562786627876278862789627906279162792627936279462795627966279762798627996280062801628026280362804628056280662807628086280962810628116281262813628146281562816628176281862819628206282162822628236282462825628266282762828628296283062831628326283362834628356283662837628386283962840628416284262843628446284562846628476284862849628506285162852628536285462855628566285762858628596286062861628626286362864628656286662867628686286962870628716287262873628746287562876628776287862879628806288162882628836288462885628866288762888628896289062891628926289362894628956289662897628986289962900629016290262903629046290562906629076290862909629106291162912629136291462915629166291762918629196292062921629226292362924629256292662927629286292962930629316293262933629346293562936629376293862939629406294162942629436294462945629466294762948629496295062951629526295362954629556295662957629586295962960629616296262963629646296562966629676296862969629706297162972629736297462975629766297762978629796298062981629826298362984629856298662987629886298962990629916299262993629946299562996629976299862999630006300163002630036300463005630066300763008630096301063011630126301363014630156301663017630186301963020630216302263023630246302563026630276302863029630306303163032630336303463035630366303763038630396304063041630426304363044630456304663047630486304963050630516305263053630546305563056630576305863059630606306163062630636306463065630666306763068630696307063071630726307363074630756307663077630786307963080630816308263083630846308563086630876308863089630906309163092630936309463095630966309763098630996310063101631026310363104631056310663107631086310963110631116311263113631146311563116631176311863119631206312163122631236312463125631266312763128631296313063131631326313363134631356313663137631386313963140631416314263143631446314563146631476314863149631506315163152631536315463155631566315763158631596316063161631626316363164631656316663167631686316963170631716317263173631746317563176631776317863179631806318163182631836318463185631866318763188631896319063191631926319363194631956319663197631986319963200632016320263203632046320563206632076320863209632106321163212632136321463215632166321763218632196322063221632226322363224632256322663227632286322963230632316323263233632346323563236632376323863239632406324163242632436324463245632466324763248632496325063251632526325363254632556325663257632586325963260632616326263263632646326563266632676326863269632706327163272632736327463275632766327763278632796328063281632826328363284632856328663287632886328963290632916329263293632946329563296632976329863299633006330163302633036330463305633066330763308633096331063311633126331363314633156331663317633186331963320633216332263323633246332563326633276332863329633306333163332633336333463335633366333763338633396334063341633426334363344633456334663347633486334963350633516335263353633546335563356633576335863359633606336163362633636336463365633666336763368633696337063371633726337363374633756337663377633786337963380633816338263383633846338563386633876338863389633906339163392633936339463395633966339763398633996340063401634026340363404634056340663407634086340963410634116341263413634146341563416634176341863419634206342163422634236342463425634266342763428634296343063431634326343363434634356343663437634386343963440634416344263443634446344563446634476344863449634506345163452634536345463455634566345763458634596346063461634626346363464634656346663467634686346963470634716347263473634746347563476634776347863479634806348163482634836348463485634866348763488634896349063491634926349363494634956349663497634986349963500635016350263503635046350563506635076350863509635106351163512635136351463515635166351763518635196352063521635226352363524635256352663527635286352963530635316353263533635346353563536635376353863539635406354163542635436354463545635466354763548635496355063551635526355363554635556355663557635586355963560635616356263563635646356563566635676356863569635706357163572635736357463575635766357763578635796358063581635826358363584635856358663587635886358963590635916359263593635946359563596635976359863599636006360163602636036360463605636066360763608636096361063611636126361363614636156361663617636186361963620636216362263623636246362563626636276362863629636306363163632636336363463635636366363763638636396364063641636426364363644636456364663647636486364963650636516365263653636546365563656636576365863659636606366163662636636366463665636666366763668636696367063671636726367363674636756367663677636786367963680636816368263683636846368563686636876368863689636906369163692636936369463695636966369763698636996370063701637026370363704637056370663707637086370963710637116371263713637146371563716637176371863719637206372163722637236372463725637266372763728637296373063731637326373363734637356373663737637386373963740637416374263743637446374563746637476374863749637506375163752637536375463755637566375763758637596376063761637626376363764637656376663767637686376963770637716377263773637746377563776637776377863779637806378163782637836378463785637866378763788637896379063791637926379363794637956379663797637986379963800638016380263803638046380563806638076380863809638106381163812638136381463815638166381763818638196382063821638226382363824638256382663827638286382963830638316383263833638346383563836638376383863839638406384163842638436384463845638466384763848638496385063851638526385363854638556385663857638586385963860638616386263863638646386563866638676386863869638706387163872638736387463875638766387763878638796388063881638826388363884638856388663887638886388963890638916389263893638946389563896638976389863899639006390163902639036390463905639066390763908639096391063911639126391363914639156391663917639186391963920639216392263923639246392563926639276392863929639306393163932639336393463935639366393763938639396394063941639426394363944639456394663947639486394963950639516395263953639546395563956639576395863959639606396163962639636396463965639666396763968639696397063971639726397363974639756397663977639786397963980639816398263983639846398563986639876398863989639906399163992639936399463995639966399763998639996400064001640026400364004640056400664007640086400964010640116401264013640146401564016640176401864019640206402164022640236402464025640266402764028640296403064031640326403364034640356403664037640386403964040640416404264043640446404564046640476404864049640506405164052640536405464055640566405764058640596406064061640626406364064640656406664067640686406964070640716407264073640746407564076640776407864079640806408164082640836408464085640866408764088640896409064091640926409364094640956409664097640986409964100641016410264103641046410564106641076410864109641106411164112641136411464115641166411764118641196412064121641226412364124641256412664127641286412964130641316413264133641346413564136641376413864139641406414164142641436414464145641466414764148641496415064151641526415364154641556415664157641586415964160641616416264163641646416564166641676416864169641706417164172641736417464175641766417764178641796418064181641826418364184641856418664187641886418964190641916419264193641946419564196641976419864199642006420164202642036420464205642066420764208642096421064211642126421364214642156421664217642186421964220642216422264223642246422564226642276422864229642306423164232642336423464235642366423764238642396424064241642426424364244642456424664247642486424964250642516425264253642546425564256642576425864259642606426164262642636426464265642666426764268642696427064271642726427364274642756427664277642786427964280642816428264283642846428564286642876428864289642906429164292642936429464295642966429764298642996430064301643026430364304643056430664307643086430964310643116431264313643146431564316643176431864319643206432164322643236432464325643266432764328643296433064331643326433364334643356433664337643386433964340643416434264343643446434564346643476434864349643506435164352643536435464355643566435764358643596436064361643626436364364643656436664367643686436964370643716437264373643746437564376643776437864379643806438164382643836438464385643866438764388643896439064391643926439364394643956439664397643986439964400644016440264403644046440564406644076440864409644106441164412644136441464415644166441764418644196442064421644226442364424644256442664427644286442964430644316443264433644346443564436644376443864439644406444164442644436444464445644466444764448644496445064451644526445364454644556445664457644586445964460644616446264463644646446564466644676446864469644706447164472644736447464475644766447764478644796448064481644826448364484644856448664487644886448964490644916449264493644946449564496644976449864499645006450164502645036450464505645066450764508645096451064511645126451364514645156451664517645186451964520645216452264523645246452564526645276452864529645306453164532645336453464535645366453764538645396454064541645426454364544645456454664547645486454964550645516455264553645546455564556645576455864559645606456164562645636456464565645666456764568645696457064571645726457364574645756457664577645786457964580645816458264583645846458564586645876458864589645906459164592645936459464595645966459764598645996460064601646026460364604646056460664607646086460964610646116461264613646146461564616646176461864619646206462164622646236462464625646266462764628646296463064631646326463364634646356463664637646386463964640646416464264643646446464564646646476464864649646506465164652646536465464655646566465764658646596466064661646626466364664646656466664667646686466964670646716467264673646746467564676646776467864679646806468164682646836468464685646866468764688646896469064691646926469364694646956469664697646986469964700647016470264703647046470564706647076470864709647106471164712647136471464715647166471764718647196472064721647226472364724647256472664727647286472964730647316473264733647346473564736647376473864739647406474164742647436474464745647466474764748647496475064751647526475364754647556475664757647586475964760647616476264763647646476564766647676476864769647706477164772647736477464775647766477764778647796478064781647826478364784647856478664787647886478964790647916479264793647946479564796647976479864799648006480164802648036480464805648066480764808648096481064811648126481364814648156481664817648186481964820648216482264823648246482564826648276482864829648306483164832648336483464835648366483764838648396484064841648426484364844648456484664847648486484964850648516485264853648546485564856648576485864859648606486164862648636486464865648666486764868648696487064871648726487364874648756487664877648786487964880648816488264883648846488564886648876488864889648906489164892648936489464895648966489764898648996490064901649026490364904649056490664907649086490964910649116491264913649146491564916649176491864919649206492164922649236492464925649266492764928649296493064931649326493364934649356493664937649386493964940649416494264943649446494564946649476494864949649506495164952649536495464955649566495764958649596496064961649626496364964649656496664967649686496964970649716497264973649746497564976649776497864979649806498164982649836498464985649866498764988649896499064991649926499364994649956499664997649986499965000650016500265003650046500565006650076500865009650106501165012650136501465015650166501765018650196502065021650226502365024650256502665027650286502965030650316503265033650346503565036650376503865039650406504165042650436504465045650466504765048650496505065051650526505365054650556505665057650586505965060650616506265063650646506565066650676506865069650706507165072650736507465075650766507765078650796508065081650826508365084650856508665087650886508965090650916509265093650946509565096650976509865099651006510165102651036510465105651066510765108651096511065111651126511365114651156511665117651186511965120651216512265123651246512565126651276512865129651306513165132651336513465135651366513765138651396514065141651426514365144651456514665147651486514965150651516515265153651546515565156651576515865159651606516165162651636516465165651666516765168651696517065171651726517365174651756517665177651786517965180651816518265183651846518565186651876518865189651906519165192651936519465195651966519765198651996520065201652026520365204652056520665207652086520965210652116521265213652146521565216652176521865219652206522165222652236522465225652266522765228652296523065231652326523365234652356523665237652386523965240652416524265243652446524565246652476524865249652506525165252652536525465255652566525765258652596526065261652626526365264652656526665267652686526965270652716527265273652746527565276652776527865279652806528165282652836528465285652866528765288652896529065291652926529365294652956529665297652986529965300653016530265303653046530565306653076530865309653106531165312653136531465315653166531765318653196532065321653226532365324653256532665327653286532965330653316533265333653346533565336653376533865339653406534165342653436534465345653466534765348653496535065351653526535365354653556535665357653586535965360653616536265363653646536565366653676536865369653706537165372653736537465375653766537765378653796538065381653826538365384653856538665387653886538965390653916539265393653946539565396653976539865399654006540165402654036540465405654066540765408654096541065411654126541365414654156541665417654186541965420654216542265423654246542565426654276542865429654306543165432654336543465435654366543765438654396544065441654426544365444654456544665447654486544965450654516545265453654546545565456654576545865459654606546165462654636546465465654666546765468654696547065471654726547365474654756547665477654786547965480654816548265483654846548565486654876548865489654906549165492654936549465495654966549765498654996550065501655026550365504655056550665507655086550965510655116551265513655146551565516655176551865519655206552165522655236552465525655266552765528655296553065531655326553365534655356553665537655386553965540655416554265543655446554565546655476554865549655506555165552655536555465555655566555765558655596556065561655626556365564655656556665567655686556965570655716557265573655746557565576655776557865579655806558165582655836558465585655866558765588655896559065591655926559365594655956559665597655986559965600656016560265603656046560565606656076560865609656106561165612656136561465615656166561765618656196562065621656226562365624656256562665627656286562965630656316563265633656346563565636656376563865639656406564165642656436564465645656466564765648656496565065651656526565365654656556565665657656586565965660656616566265663656646566565666656676566865669656706567165672656736567465675656766567765678656796568065681656826568365684656856568665687656886568965690656916569265693656946569565696656976569865699657006570165702657036570465705657066570765708657096571065711657126571365714657156571665717657186571965720657216572265723657246572565726657276572865729657306573165732657336573465735657366573765738657396574065741657426574365744657456574665747657486574965750657516575265753657546575565756657576575865759657606576165762657636576465765657666576765768657696577065771657726577365774657756577665777657786577965780657816578265783657846578565786657876578865789657906579165792657936579465795657966579765798657996580065801658026580365804658056580665807658086580965810658116581265813658146581565816658176581865819658206582165822658236582465825658266582765828658296583065831658326583365834658356583665837658386583965840658416584265843658446584565846658476584865849658506585165852658536585465855658566585765858658596586065861658626586365864658656586665867658686586965870658716587265873658746587565876658776587865879658806588165882658836588465885658866588765888658896589065891658926589365894658956589665897658986589965900659016590265903659046590565906659076590865909659106591165912659136591465915659166591765918659196592065921659226592365924659256592665927659286592965930659316593265933659346593565936659376593865939659406594165942659436594465945659466594765948659496595065951659526595365954659556595665957659586595965960659616596265963659646596565966659676596865969659706597165972659736597465975659766597765978659796598065981659826598365984659856598665987659886598965990659916599265993659946599565996659976599865999660006600166002660036600466005660066600766008660096601066011660126601366014660156601666017660186601966020660216602266023660246602566026660276602866029660306603166032660336603466035660366603766038660396604066041660426604366044660456604666047660486604966050660516605266053660546605566056660576605866059660606606166062660636606466065660666606766068660696607066071660726607366074660756607666077660786607966080660816608266083660846608566086660876608866089660906609166092660936609466095660966609766098660996610066101661026610366104661056610666107661086610966110661116611266113661146611566116661176611866119661206612166122661236612466125661266612766128661296613066131661326613366134661356613666137661386613966140661416614266143661446614566146661476614866149661506615166152661536615466155661566615766158661596616066161661626616366164661656616666167661686616966170661716617266173661746617566176661776617866179661806618166182661836618466185661866618766188661896619066191661926619366194661956619666197661986619966200662016620266203662046620566206662076620866209662106621166212662136621466215662166621766218662196622066221662226622366224662256622666227662286622966230662316623266233662346623566236662376623866239662406624166242662436624466245662466624766248662496625066251662526625366254662556625666257662586625966260662616626266263662646626566266662676626866269662706627166272662736627466275662766627766278662796628066281662826628366284662856628666287662886628966290662916629266293662946629566296662976629866299663006630166302663036630466305663066630766308663096631066311663126631366314663156631666317663186631966320663216632266323663246632566326663276632866329663306633166332663336633466335663366633766338663396634066341663426634366344663456634666347663486634966350663516635266353663546635566356663576635866359663606636166362663636636466365663666636766368663696637066371663726637366374663756637666377663786637966380663816638266383663846638566386663876638866389663906639166392663936639466395663966639766398663996640066401664026640366404664056640666407664086640966410664116641266413664146641566416664176641866419664206642166422664236642466425664266642766428664296643066431664326643366434664356643666437664386643966440664416644266443664446644566446664476644866449664506645166452664536645466455664566645766458664596646066461664626646366464664656646666467664686646966470664716647266473664746647566476664776647866479664806648166482664836648466485664866648766488664896649066491664926649366494664956649666497664986649966500665016650266503665046650566506665076650866509665106651166512665136651466515665166651766518665196652066521665226652366524665256652666527665286652966530665316653266533665346653566536665376653866539665406654166542665436654466545665466654766548665496655066551665526655366554665556655666557665586655966560665616656266563665646656566566665676656866569665706657166572665736657466575665766657766578665796658066581665826658366584665856658666587665886658966590665916659266593665946659566596665976659866599666006660166602666036660466605666066660766608666096661066611666126661366614666156661666617666186661966620666216662266623666246662566626666276662866629666306663166632666336663466635666366663766638666396664066641666426664366644666456664666647666486664966650666516665266653666546665566656666576665866659666606666166662666636666466665666666666766668666696667066671666726667366674666756667666677666786667966680666816668266683666846668566686666876668866689666906669166692666936669466695666966669766698666996670066701667026670366704667056670666707667086670966710667116671266713667146671566716667176671866719667206672166722667236672466725667266672766728667296673066731667326673366734667356673666737667386673966740667416674266743667446674566746667476674866749667506675166752667536675466755667566675766758667596676066761667626676366764667656676666767667686676966770667716677266773667746677566776667776677866779667806678166782667836678466785667866678766788667896679066791667926679366794667956679666797667986679966800668016680266803668046680566806668076680866809668106681166812668136681466815668166681766818668196682066821668226682366824668256682666827668286682966830668316683266833668346683566836668376683866839668406684166842668436684466845668466684766848668496685066851668526685366854668556685666857668586685966860668616686266863668646686566866668676686866869668706687166872668736687466875668766687766878668796688066881668826688366884668856688666887668886688966890668916689266893668946689566896668976689866899669006690166902669036690466905669066690766908669096691066911669126691366914669156691666917669186691966920669216692266923669246692566926669276692866929669306693166932669336693466935669366693766938669396694066941669426694366944669456694666947669486694966950669516695266953669546695566956669576695866959669606696166962669636696466965669666696766968669696697066971669726697366974669756697666977669786697966980669816698266983669846698566986669876698866989669906699166992669936699466995669966699766998669996700067001670026700367004670056700667007670086700967010670116701267013670146701567016670176701867019670206702167022670236702467025670266702767028670296703067031670326703367034670356703667037670386703967040670416704267043670446704567046670476704867049670506705167052670536705467055670566705767058670596706067061670626706367064670656706667067670686706967070670716707267073670746707567076670776707867079670806708167082670836708467085670866708767088670896709067091670926709367094670956709667097670986709967100671016710267103671046710567106671076710867109671106711167112671136711467115671166711767118671196712067121671226712367124671256712667127671286712967130671316713267133671346713567136671376713867139671406714167142671436714467145671466714767148671496715067151671526715367154671556715667157671586715967160671616716267163671646716567166671676716867169671706717167172671736717467175671766717767178671796718067181671826718367184671856718667187671886718967190671916719267193671946719567196671976719867199672006720167202672036720467205672066720767208672096721067211672126721367214672156721667217672186721967220672216722267223672246722567226672276722867229672306723167232672336723467235672366723767238672396724067241672426724367244672456724667247672486724967250672516725267253672546725567256672576725867259672606726167262672636726467265672666726767268672696727067271672726727367274672756727667277672786727967280672816728267283672846728567286672876728867289672906729167292672936729467295672966729767298672996730067301673026730367304673056730667307673086730967310673116731267313673146731567316673176731867319673206732167322673236732467325673266732767328673296733067331673326733367334673356733667337673386733967340673416734267343673446734567346673476734867349673506735167352673536735467355673566735767358673596736067361673626736367364673656736667367673686736967370673716737267373673746737567376673776737867379673806738167382673836738467385673866738767388673896739067391673926739367394673956739667397673986739967400674016740267403674046740567406674076740867409674106741167412674136741467415674166741767418674196742067421674226742367424674256742667427674286742967430674316743267433674346743567436674376743867439674406744167442674436744467445674466744767448674496745067451674526745367454674556745667457674586745967460674616746267463674646746567466674676746867469674706747167472674736747467475674766747767478674796748067481674826748367484674856748667487674886748967490674916749267493674946749567496674976749867499675006750167502675036750467505675066750767508675096751067511675126751367514675156751667517675186751967520675216752267523675246752567526675276752867529675306753167532675336753467535675366753767538675396754067541675426754367544675456754667547675486754967550675516755267553675546755567556675576755867559675606756167562675636756467565675666756767568675696757067571675726757367574675756757667577675786757967580675816758267583675846758567586675876758867589675906759167592675936759467595675966759767598675996760067601676026760367604676056760667607676086760967610676116761267613676146761567616676176761867619676206762167622676236762467625676266762767628676296763067631676326763367634676356763667637676386763967640676416764267643676446764567646676476764867649676506765167652676536765467655676566765767658676596766067661676626766367664676656766667667676686766967670676716767267673676746767567676676776767867679676806768167682676836768467685676866768767688676896769067691676926769367694676956769667697676986769967700677016770267703677046770567706677076770867709677106771167712677136771467715677166771767718677196772067721677226772367724677256772667727677286772967730677316773267733677346773567736677376773867739677406774167742677436774467745677466774767748677496775067751677526775367754677556775667757677586775967760677616776267763677646776567766677676776867769677706777167772677736777467775677766777767778677796778067781677826778367784677856778667787677886778967790677916779267793677946779567796677976779867799678006780167802678036780467805678066780767808678096781067811678126781367814678156781667817678186781967820678216782267823678246782567826678276782867829678306783167832678336783467835678366783767838678396784067841678426784367844678456784667847678486784967850678516785267853678546785567856678576785867859678606786167862678636786467865678666786767868678696787067871678726787367874678756787667877678786787967880678816788267883678846788567886678876788867889678906789167892678936789467895678966789767898678996790067901679026790367904679056790667907679086790967910679116791267913679146791567916679176791867919679206792167922679236792467925679266792767928679296793067931679326793367934679356793667937679386793967940679416794267943679446794567946679476794867949679506795167952679536795467955679566795767958679596796067961679626796367964679656796667967679686796967970679716797267973679746797567976679776797867979679806798167982679836798467985679866798767988679896799067991679926799367994679956799667997679986799968000680016800268003680046800568006680076800868009680106801168012680136801468015680166801768018680196802068021680226802368024680256802668027680286802968030680316803268033680346803568036680376803868039680406804168042680436804468045680466804768048680496805068051680526805368054680556805668057680586805968060680616806268063680646806568066680676806868069680706807168072680736807468075680766807768078680796808068081680826808368084680856808668087680886808968090680916809268093680946809568096680976809868099681006810168102681036810468105681066810768108681096811068111681126811368114681156811668117681186811968120681216812268123681246812568126681276812868129681306813168132681336813468135681366813768138681396814068141681426814368144681456814668147681486814968150681516815268153681546815568156681576815868159681606816168162681636816468165681666816768168681696817068171681726817368174681756817668177681786817968180681816818268183681846818568186681876818868189681906819168192681936819468195681966819768198681996820068201682026820368204682056820668207682086820968210682116821268213682146821568216682176821868219682206822168222682236822468225682266822768228682296823068231682326823368234682356823668237682386823968240682416824268243682446824568246682476824868249682506825168252682536825468255682566825768258682596826068261682626826368264682656826668267682686826968270682716827268273682746827568276682776827868279682806828168282682836828468285682866828768288682896829068291682926829368294682956829668297682986829968300683016830268303683046830568306683076830868309683106831168312683136831468315683166831768318683196832068321683226832368324683256832668327683286832968330683316833268333683346833568336683376833868339683406834168342683436834468345683466834768348683496835068351683526835368354683556835668357683586835968360683616836268363683646836568366683676836868369683706837168372683736837468375683766837768378683796838068381683826838368384683856838668387683886838968390683916839268393683946839568396683976839868399684006840168402684036840468405684066840768408684096841068411684126841368414684156841668417684186841968420684216842268423684246842568426684276842868429684306843168432684336843468435684366843768438684396844068441684426844368444684456844668447684486844968450684516845268453684546845568456684576845868459684606846168462684636846468465684666846768468684696847068471684726847368474684756847668477684786847968480684816848268483684846848568486684876848868489684906849168492684936849468495684966849768498684996850068501685026850368504685056850668507685086850968510685116851268513685146851568516685176851868519685206852168522685236852468525685266852768528685296853068531685326853368534685356853668537685386853968540685416854268543685446854568546685476854868549685506855168552685536855468555685566855768558685596856068561685626856368564685656856668567685686856968570685716857268573685746857568576685776857868579685806858168582685836858468585685866858768588685896859068591685926859368594685956859668597685986859968600686016860268603686046860568606686076860868609686106861168612686136861468615686166861768618686196862068621686226862368624686256862668627686286862968630686316863268633686346863568636686376863868639686406864168642686436864468645686466864768648686496865068651686526865368654686556865668657686586865968660686616866268663686646866568666686676866868669686706867168672686736867468675686766867768678686796868068681686826868368684686856868668687686886868968690686916869268693686946869568696686976869868699687006870168702687036870468705687066870768708687096871068711687126871368714687156871668717687186871968720687216872268723687246872568726687276872868729687306873168732687336873468735687366873768738687396874068741687426874368744687456874668747687486874968750687516875268753687546875568756687576875868759687606876168762687636876468765687666876768768687696877068771687726877368774687756877668777687786877968780687816878268783687846878568786687876878868789687906879168792687936879468795687966879768798687996880068801688026880368804688056880668807688086880968810688116881268813688146881568816688176881868819688206882168822688236882468825688266882768828688296883068831688326883368834688356883668837688386883968840688416884268843688446884568846688476884868849688506885168852688536885468855688566885768858688596886068861688626886368864688656886668867688686886968870688716887268873688746887568876688776887868879688806888168882688836888468885688866888768888688896889068891688926889368894688956889668897688986889968900689016890268903689046890568906689076890868909689106891168912689136891468915689166891768918689196892068921689226892368924689256892668927689286892968930689316893268933689346893568936689376893868939689406894168942689436894468945689466894768948689496895068951689526895368954689556895668957689586895968960689616896268963689646896568966689676896868969689706897168972689736897468975689766897768978689796898068981689826898368984689856898668987689886898968990689916899268993689946899568996689976899868999690006900169002690036900469005690066900769008690096901069011690126901369014690156901669017690186901969020690216902269023690246902569026690276902869029690306903169032690336903469035690366903769038690396904069041690426904369044690456904669047690486904969050690516905269053690546905569056690576905869059690606906169062690636906469065690666906769068690696907069071690726907369074690756907669077690786907969080690816908269083690846908569086690876908869089690906909169092690936909469095690966909769098690996910069101691026910369104691056910669107691086910969110691116911269113691146911569116691176911869119691206912169122691236912469125691266912769128691296913069131691326913369134691356913669137691386913969140691416914269143691446914569146691476914869149691506915169152691536915469155691566915769158691596916069161691626916369164691656916669167691686916969170691716917269173691746917569176691776917869179691806918169182691836918469185691866918769188691896919069191691926919369194691956919669197691986919969200692016920269203692046920569206692076920869209692106921169212692136921469215692166921769218692196922069221692226922369224692256922669227692286922969230692316923269233692346923569236692376923869239692406924169242692436924469245692466924769248692496925069251692526925369254692556925669257692586925969260692616926269263692646926569266692676926869269692706927169272692736927469275692766927769278692796928069281692826928369284692856928669287692886928969290692916929269293692946929569296692976929869299693006930169302693036930469305693066930769308693096931069311693126931369314693156931669317693186931969320693216932269323693246932569326693276932869329693306933169332693336933469335693366933769338693396934069341693426934369344693456934669347693486934969350693516935269353693546935569356693576935869359693606936169362693636936469365693666936769368693696937069371693726937369374693756937669377693786937969380693816938269383693846938569386693876938869389693906939169392693936939469395693966939769398693996940069401694026940369404694056940669407694086940969410694116941269413694146941569416694176941869419694206942169422694236942469425694266942769428694296943069431694326943369434694356943669437694386943969440694416944269443694446944569446694476944869449694506945169452694536945469455694566945769458694596946069461694626946369464694656946669467694686946969470694716947269473694746947569476694776947869479694806948169482694836948469485694866948769488694896949069491694926949369494694956949669497694986949969500695016950269503695046950569506695076950869509695106951169512695136951469515695166951769518695196952069521695226952369524695256952669527695286952969530695316953269533695346953569536695376953869539695406954169542695436954469545695466954769548695496955069551695526955369554695556955669557695586955969560695616956269563695646956569566695676956869569695706957169572695736957469575695766957769578695796958069581695826958369584695856958669587695886958969590695916959269593695946959569596695976959869599696006960169602696036960469605696066960769608696096961069611696126961369614696156961669617696186961969620696216962269623696246962569626696276962869629696306963169632696336963469635696366963769638696396964069641696426964369644696456964669647696486964969650696516965269653696546965569656696576965869659696606966169662696636966469665696666966769668696696967069671696726967369674696756967669677696786967969680696816968269683696846968569686696876968869689696906969169692696936969469695696966969769698696996970069701697026970369704697056970669707697086970969710697116971269713697146971569716697176971869719697206972169722697236972469725697266972769728697296973069731697326973369734697356973669737697386973969740697416974269743697446974569746697476974869749697506975169752697536975469755697566975769758697596976069761697626976369764697656976669767697686976969770697716977269773697746977569776697776977869779697806978169782697836978469785697866978769788697896979069791697926979369794697956979669797697986979969800698016980269803698046980569806698076980869809698106981169812698136981469815698166981769818698196982069821698226982369824698256982669827698286982969830698316983269833698346983569836698376983869839698406984169842698436984469845698466984769848698496985069851698526985369854698556985669857698586985969860698616986269863698646986569866698676986869869698706987169872698736987469875698766987769878698796988069881698826988369884698856988669887698886988969890698916989269893698946989569896698976989869899699006990169902699036990469905699066990769908699096991069911699126991369914699156991669917699186991969920699216992269923699246992569926699276992869929699306993169932699336993469935699366993769938699396994069941699426994369944699456994669947699486994969950699516995269953699546995569956699576995869959699606996169962699636996469965699666996769968699696997069971699726997369974699756997669977699786997969980699816998269983699846998569986699876998869989699906999169992699936999469995699966999769998699997000070001700027000370004700057000670007700087000970010700117001270013700147001570016700177001870019700207002170022700237002470025700267002770028700297003070031700327003370034700357003670037700387003970040700417004270043700447004570046700477004870049700507005170052700537005470055700567005770058700597006070061700627006370064700657006670067700687006970070700717007270073700747007570076700777007870079700807008170082700837008470085700867008770088700897009070091700927009370094700957009670097700987009970100701017010270103701047010570106701077010870109701107011170112701137011470115701167011770118701197012070121701227012370124701257012670127701287012970130701317013270133701347013570136701377013870139701407014170142701437014470145701467014770148701497015070151701527015370154701557015670157701587015970160701617016270163701647016570166701677016870169701707017170172701737017470175701767017770178701797018070181701827018370184701857018670187701887018970190701917019270193701947019570196701977019870199702007020170202702037020470205702067020770208702097021070211702127021370214702157021670217702187021970220702217022270223702247022570226702277022870229702307023170232702337023470235702367023770238702397024070241702427024370244702457024670247702487024970250702517025270253702547025570256702577025870259702607026170262702637026470265702667026770268702697027070271702727027370274702757027670277702787027970280702817028270283702847028570286702877028870289702907029170292702937029470295702967029770298702997030070301703027030370304703057030670307703087030970310703117031270313703147031570316703177031870319703207032170322703237032470325703267032770328703297033070331703327033370334703357033670337703387033970340703417034270343703447034570346703477034870349703507035170352703537035470355703567035770358703597036070361703627036370364703657036670367703687036970370703717037270373703747037570376703777037870379703807038170382703837038470385703867038770388703897039070391703927039370394703957039670397703987039970400704017040270403704047040570406704077040870409704107041170412704137041470415704167041770418704197042070421704227042370424704257042670427704287042970430704317043270433704347043570436704377043870439704407044170442704437044470445704467044770448704497045070451704527045370454704557045670457704587045970460704617046270463704647046570466704677046870469704707047170472704737047470475704767047770478704797048070481704827048370484704857048670487704887048970490704917049270493704947049570496704977049870499705007050170502705037050470505705067050770508705097051070511705127051370514705157051670517705187051970520705217052270523705247052570526705277052870529705307053170532705337053470535705367053770538705397054070541705427054370544705457054670547705487054970550705517055270553705547055570556705577055870559705607056170562705637056470565705667056770568705697057070571705727057370574705757057670577705787057970580705817058270583705847058570586705877058870589705907059170592705937059470595705967059770598705997060070601706027060370604706057060670607706087060970610706117061270613706147061570616706177061870619706207062170622706237062470625706267062770628706297063070631706327063370634706357063670637706387063970640706417064270643706447064570646706477064870649706507065170652706537065470655706567065770658706597066070661706627066370664706657066670667706687066970670706717067270673706747067570676706777067870679706807068170682706837068470685706867068770688706897069070691706927069370694706957069670697706987069970700707017070270703707047070570706707077070870709707107071170712707137071470715707167071770718707197072070721707227072370724707257072670727707287072970730707317073270733707347073570736707377073870739707407074170742707437074470745707467074770748707497075070751707527075370754707557075670757707587075970760707617076270763707647076570766707677076870769707707077170772707737077470775707767077770778707797078070781707827078370784707857078670787707887078970790707917079270793707947079570796707977079870799708007080170802708037080470805708067080770808708097081070811708127081370814708157081670817708187081970820708217082270823708247082570826708277082870829708307083170832708337083470835708367083770838708397084070841708427084370844708457084670847708487084970850708517085270853708547085570856708577085870859708607086170862708637086470865708667086770868708697087070871708727087370874708757087670877708787087970880708817088270883708847088570886708877088870889708907089170892708937089470895708967089770898708997090070901709027090370904709057090670907709087090970910709117091270913709147091570916709177091870919709207092170922709237092470925709267092770928709297093070931709327093370934709357093670937709387093970940709417094270943709447094570946709477094870949709507095170952709537095470955709567095770958709597096070961709627096370964709657096670967709687096970970709717097270973709747097570976709777097870979709807098170982709837098470985709867098770988709897099070991709927099370994709957099670997709987099971000710017100271003710047100571006710077100871009710107101171012710137101471015710167101771018710197102071021710227102371024710257102671027710287102971030710317103271033710347103571036710377103871039710407104171042710437104471045710467104771048710497105071051710527105371054710557105671057710587105971060710617106271063710647106571066710677106871069710707107171072710737107471075710767107771078710797108071081710827108371084710857108671087710887108971090710917109271093710947109571096710977109871099711007110171102711037110471105711067110771108711097111071111711127111371114711157111671117711187111971120711217112271123711247112571126711277112871129711307113171132711337113471135711367113771138711397114071141711427114371144711457114671147711487114971150711517115271153711547115571156711577115871159711607116171162711637116471165711667116771168711697117071171711727117371174711757117671177711787117971180711817118271183711847118571186711877118871189711907119171192711937119471195711967119771198711997120071201712027120371204712057120671207712087120971210712117121271213712147121571216712177121871219712207122171222712237122471225712267122771228712297123071231712327123371234712357123671237712387123971240712417124271243712447124571246712477124871249712507125171252712537125471255712567125771258712597126071261712627126371264712657126671267712687126971270712717127271273712747127571276712777127871279712807128171282712837128471285712867128771288712897129071291712927129371294712957129671297712987129971300713017130271303713047130571306713077130871309713107131171312713137131471315713167131771318713197132071321713227132371324713257132671327713287132971330713317133271333713347133571336713377133871339713407134171342713437134471345713467134771348713497135071351713527135371354713557135671357713587135971360713617136271363713647136571366713677136871369713707137171372713737137471375713767137771378713797138071381713827138371384713857138671387713887138971390713917139271393713947139571396713977139871399714007140171402714037140471405714067140771408714097141071411714127141371414714157141671417714187141971420714217142271423714247142571426714277142871429714307143171432714337143471435714367143771438714397144071441714427144371444714457144671447714487144971450714517145271453714547145571456714577145871459714607146171462714637146471465714667146771468714697147071471714727147371474714757147671477714787147971480714817148271483714847148571486714877148871489714907149171492714937149471495714967149771498714997150071501715027150371504715057150671507715087150971510715117151271513715147151571516715177151871519715207152171522715237152471525715267152771528715297153071531715327153371534715357153671537715387153971540715417154271543715447154571546715477154871549715507155171552715537155471555715567155771558715597156071561715627156371564715657156671567715687156971570715717157271573715747157571576715777157871579715807158171582715837158471585715867158771588715897159071591715927159371594715957159671597715987159971600716017160271603716047160571606716077160871609716107161171612716137161471615716167161771618716197162071621716227162371624716257162671627716287162971630716317163271633716347163571636716377163871639716407164171642716437164471645716467164771648716497165071651716527165371654716557165671657716587165971660716617166271663716647166571666716677166871669716707167171672716737167471675716767167771678716797168071681716827168371684716857168671687716887168971690716917169271693716947169571696716977169871699717007170171702717037170471705717067170771708717097171071711717127171371714717157171671717717187171971720717217172271723717247172571726717277172871729717307173171732717337173471735717367173771738717397174071741717427174371744717457174671747717487174971750717517175271753717547175571756717577175871759717607176171762717637176471765717667176771768717697177071771717727177371774717757177671777717787177971780717817178271783717847178571786717877178871789717907179171792717937179471795717967179771798717997180071801718027180371804718057180671807718087180971810718117181271813718147181571816718177181871819718207182171822718237182471825718267182771828718297183071831718327183371834718357183671837718387183971840718417184271843718447184571846718477184871849718507185171852718537185471855718567185771858718597186071861718627186371864718657186671867718687186971870718717187271873718747187571876718777187871879718807188171882718837188471885718867188771888718897189071891718927189371894718957189671897718987189971900719017190271903719047190571906719077190871909719107191171912719137191471915719167191771918719197192071921719227192371924719257192671927719287192971930719317193271933719347193571936719377193871939719407194171942719437194471945719467194771948719497195071951719527195371954719557195671957719587195971960719617196271963719647196571966719677196871969719707197171972719737197471975719767197771978719797198071981719827198371984719857198671987719887198971990719917199271993719947199571996719977199871999720007200172002720037200472005720067200772008720097201072011720127201372014720157201672017720187201972020720217202272023720247202572026720277202872029720307203172032720337203472035720367203772038720397204072041720427204372044720457204672047720487204972050720517205272053720547205572056720577205872059720607206172062720637206472065720667206772068720697207072071720727207372074720757207672077720787207972080720817208272083720847208572086720877208872089720907209172092720937209472095720967209772098720997210072101721027210372104721057210672107721087210972110721117211272113721147211572116721177211872119721207212172122721237212472125721267212772128721297213072131721327213372134721357213672137721387213972140721417214272143721447214572146721477214872149721507215172152721537215472155721567215772158721597216072161721627216372164721657216672167721687216972170721717217272173721747217572176721777217872179721807218172182721837218472185721867218772188721897219072191721927219372194721957219672197721987219972200722017220272203722047220572206722077220872209722107221172212722137221472215722167221772218722197222072221722227222372224722257222672227722287222972230722317223272233722347223572236722377223872239722407224172242722437224472245722467224772248722497225072251722527225372254722557225672257722587225972260722617226272263722647226572266722677226872269722707227172272722737227472275722767227772278722797228072281722827228372284722857228672287722887228972290722917229272293722947229572296722977229872299723007230172302723037230472305723067230772308723097231072311723127231372314723157231672317723187231972320723217232272323723247232572326723277232872329723307233172332723337233472335723367233772338723397234072341723427234372344723457234672347723487234972350723517235272353723547235572356723577235872359723607236172362723637236472365723667236772368723697237072371723727237372374723757237672377723787237972380723817238272383723847238572386723877238872389723907239172392723937239472395723967239772398723997240072401724027240372404724057240672407724087240972410724117241272413724147241572416724177241872419724207242172422724237242472425724267242772428724297243072431724327243372434724357243672437724387243972440724417244272443724447244572446724477244872449724507245172452724537245472455724567245772458724597246072461724627246372464724657246672467724687246972470724717247272473724747247572476724777247872479724807248172482724837248472485724867248772488724897249072491724927249372494724957249672497724987249972500725017250272503725047250572506725077250872509725107251172512725137251472515725167251772518725197252072521725227252372524725257252672527725287252972530725317253272533725347253572536725377253872539725407254172542725437254472545725467254772548725497255072551725527255372554725557255672557725587255972560725617256272563725647256572566725677256872569725707257172572725737257472575725767257772578725797258072581725827258372584725857258672587725887258972590725917259272593725947259572596725977259872599726007260172602726037260472605726067260772608726097261072611726127261372614726157261672617726187261972620726217262272623726247262572626726277262872629726307263172632726337263472635726367263772638726397264072641726427264372644726457264672647726487264972650726517265272653726547265572656726577265872659726607266172662726637266472665726667266772668726697267072671726727267372674726757267672677726787267972680726817268272683726847268572686726877268872689726907269172692726937269472695726967269772698726997270072701727027270372704727057270672707727087270972710727117271272713727147271572716727177271872719727207272172722727237272472725727267272772728727297273072731727327273372734727357273672737727387273972740727417274272743727447274572746727477274872749727507275172752727537275472755727567275772758727597276072761727627276372764727657276672767727687276972770727717277272773727747277572776727777277872779727807278172782727837278472785727867278772788727897279072791727927279372794727957279672797727987279972800728017280272803728047280572806728077280872809728107281172812728137281472815728167281772818728197282072821728227282372824728257282672827728287282972830728317283272833728347283572836728377283872839728407284172842728437284472845728467284772848728497285072851728527285372854728557285672857728587285972860728617286272863728647286572866728677286872869728707287172872728737287472875728767287772878728797288072881728827288372884728857288672887728887288972890728917289272893728947289572896728977289872899729007290172902729037290472905729067290772908729097291072911729127291372914729157291672917729187291972920729217292272923729247292572926729277292872929729307293172932729337293472935729367293772938729397294072941729427294372944729457294672947729487294972950729517295272953729547295572956729577295872959729607296172962729637296472965729667296772968729697297072971729727297372974729757297672977729787297972980729817298272983729847298572986729877298872989729907299172992729937299472995729967299772998729997300073001730027300373004730057300673007730087300973010730117301273013730147301573016730177301873019730207302173022730237302473025730267302773028730297303073031730327303373034730357303673037730387303973040730417304273043730447304573046730477304873049730507305173052730537305473055730567305773058730597306073061730627306373064730657306673067730687306973070730717307273073730747307573076730777307873079730807308173082730837308473085730867308773088730897309073091730927309373094730957309673097730987309973100731017310273103731047310573106731077310873109731107311173112731137311473115731167311773118731197312073121731227312373124731257312673127731287312973130731317313273133731347313573136731377313873139731407314173142731437314473145731467314773148731497315073151731527315373154731557315673157731587315973160731617316273163731647316573166731677316873169731707317173172731737317473175731767317773178731797318073181731827318373184731857318673187731887318973190731917319273193731947319573196731977319873199732007320173202732037320473205732067320773208732097321073211732127321373214732157321673217732187321973220732217322273223732247322573226732277322873229732307323173232732337323473235732367323773238732397324073241732427324373244732457324673247732487324973250732517325273253732547325573256732577325873259732607326173262732637326473265732667326773268732697327073271732727327373274732757327673277732787327973280732817328273283732847328573286732877328873289732907329173292732937329473295732967329773298732997330073301733027330373304733057330673307733087330973310733117331273313733147331573316733177331873319733207332173322733237332473325733267332773328733297333073331733327333373334733357333673337733387333973340733417334273343733447334573346733477334873349733507335173352733537335473355733567335773358733597336073361733627336373364733657336673367733687336973370733717337273373733747337573376733777337873379733807338173382733837338473385733867338773388733897339073391733927339373394733957339673397733987339973400734017340273403734047340573406734077340873409734107341173412734137341473415734167341773418734197342073421734227342373424734257342673427734287342973430734317343273433734347343573436734377343873439734407344173442734437344473445734467344773448734497345073451734527345373454734557345673457734587345973460734617346273463734647346573466734677346873469734707347173472734737347473475734767347773478734797348073481734827348373484734857348673487734887348973490734917349273493734947349573496734977349873499735007350173502735037350473505735067350773508735097351073511735127351373514735157351673517735187351973520735217352273523735247352573526735277352873529735307353173532735337353473535735367353773538735397354073541735427354373544735457354673547735487354973550735517355273553735547355573556735577355873559735607356173562735637356473565735667356773568735697357073571735727357373574735757357673577735787357973580735817358273583735847358573586735877358873589735907359173592735937359473595735967359773598735997360073601736027360373604736057360673607736087360973610736117361273613736147361573616736177361873619736207362173622736237362473625736267362773628736297363073631736327363373634736357363673637736387363973640736417364273643736447364573646736477364873649736507365173652736537365473655736567365773658736597366073661736627366373664736657366673667736687366973670736717367273673736747367573676736777367873679736807368173682736837368473685736867368773688736897369073691736927369373694736957369673697736987369973700737017370273703737047370573706737077370873709737107371173712737137371473715737167371773718737197372073721737227372373724737257372673727737287372973730737317373273733737347373573736737377373873739737407374173742737437374473745737467374773748737497375073751737527375373754737557375673757737587375973760737617376273763737647376573766737677376873769737707377173772737737377473775737767377773778737797378073781737827378373784737857378673787737887378973790737917379273793737947379573796737977379873799738007380173802738037380473805738067380773808738097381073811738127381373814738157381673817738187381973820738217382273823738247382573826738277382873829738307383173832738337383473835738367383773838738397384073841738427384373844738457384673847738487384973850738517385273853738547385573856738577385873859738607386173862738637386473865738667386773868738697387073871738727387373874738757387673877738787387973880738817388273883738847388573886738877388873889738907389173892738937389473895738967389773898738997390073901739027390373904739057390673907739087390973910739117391273913739147391573916739177391873919739207392173922739237392473925739267392773928739297393073931739327393373934739357393673937739387393973940739417394273943739447394573946739477394873949739507395173952739537395473955739567395773958739597396073961739627396373964739657396673967739687396973970739717397273973739747397573976739777397873979739807398173982739837398473985739867398773988739897399073991739927399373994739957399673997739987399974000740017400274003740047400574006740077400874009740107401174012740137401474015740167401774018740197402074021740227402374024740257402674027740287402974030740317403274033740347403574036740377403874039740407404174042740437404474045740467404774048740497405074051740527405374054740557405674057740587405974060740617406274063740647406574066740677406874069740707407174072740737407474075740767407774078740797408074081740827408374084740857408674087740887408974090740917409274093740947409574096740977409874099741007410174102741037410474105741067410774108741097411074111741127411374114741157411674117741187411974120741217412274123741247412574126741277412874129741307413174132741337413474135741367413774138741397414074141741427414374144741457414674147741487414974150741517415274153741547415574156741577415874159741607416174162741637416474165741667416774168741697417074171741727417374174741757417674177741787417974180741817418274183741847418574186741877418874189741907419174192741937419474195741967419774198741997420074201742027420374204742057420674207742087420974210742117421274213742147421574216742177421874219742207422174222742237422474225742267422774228742297423074231742327423374234742357423674237742387423974240742417424274243742447424574246742477424874249742507425174252742537425474255742567425774258742597426074261742627426374264742657426674267742687426974270742717427274273742747427574276742777427874279742807428174282742837428474285742867428774288742897429074291742927429374294742957429674297742987429974300743017430274303743047430574306743077430874309743107431174312743137431474315743167431774318743197432074321743227432374324743257432674327743287432974330743317433274333743347433574336743377433874339743407434174342743437434474345743467434774348743497435074351743527435374354743557435674357743587435974360743617436274363743647436574366743677436874369743707437174372743737437474375743767437774378743797438074381743827438374384743857438674387743887438974390743917439274393743947439574396743977439874399744007440174402744037440474405744067440774408744097441074411744127441374414744157441674417744187441974420744217442274423744247442574426744277442874429744307443174432744337443474435744367443774438744397444074441744427444374444744457444674447744487444974450744517445274453744547445574456744577445874459744607446174462744637446474465744667446774468744697447074471744727447374474744757447674477744787447974480744817448274483744847448574486744877448874489744907449174492744937449474495744967449774498744997450074501745027450374504745057450674507745087450974510745117451274513745147451574516745177451874519745207452174522745237452474525745267452774528745297453074531745327453374534745357453674537745387453974540745417454274543745447454574546745477454874549745507455174552745537455474555745567455774558745597456074561745627456374564745657456674567745687456974570745717457274573745747457574576745777457874579745807458174582745837458474585745867458774588745897459074591745927459374594745957459674597745987459974600746017460274603746047460574606746077460874609746107461174612746137461474615746167461774618746197462074621746227462374624746257462674627746287462974630746317463274633746347463574636746377463874639746407464174642746437464474645746467464774648746497465074651746527465374654746557465674657746587465974660746617466274663746647466574666746677466874669746707467174672746737467474675746767467774678746797468074681746827468374684746857468674687746887468974690746917469274693746947469574696746977469874699747007470174702747037470474705747067470774708747097471074711747127471374714747157471674717747187471974720747217472274723747247472574726747277472874729747307473174732747337473474735747367473774738747397474074741747427474374744747457474674747747487474974750747517475274753747547475574756747577475874759747607476174762747637476474765747667476774768747697477074771747727477374774747757477674777747787477974780747817478274783747847478574786747877478874789747907479174792747937479474795747967479774798747997480074801748027480374804748057480674807748087480974810748117481274813748147481574816748177481874819748207482174822748237482474825748267482774828748297483074831748327483374834748357483674837748387483974840748417484274843748447484574846748477484874849748507485174852748537485474855748567485774858748597486074861748627486374864748657486674867748687486974870748717487274873748747487574876748777487874879748807488174882748837488474885748867488774888748897489074891748927489374894748957489674897748987489974900749017490274903749047490574906749077490874909749107491174912749137491474915749167491774918749197492074921749227492374924749257492674927749287492974930749317493274933749347493574936749377493874939749407494174942749437494474945749467494774948749497495074951749527495374954749557495674957749587495974960749617496274963749647496574966749677496874969749707497174972749737497474975749767497774978749797498074981749827498374984749857498674987749887498974990749917499274993749947499574996749977499874999750007500175002750037500475005750067500775008750097501075011750127501375014750157501675017750187501975020750217502275023750247502575026750277502875029750307503175032750337503475035750367503775038750397504075041750427504375044750457504675047750487504975050750517505275053750547505575056750577505875059750607506175062750637506475065750667506775068750697507075071750727507375074750757507675077750787507975080750817508275083750847508575086750877508875089750907509175092750937509475095750967509775098750997510075101751027510375104751057510675107751087510975110751117511275113751147511575116751177511875119751207512175122751237512475125751267512775128751297513075131751327513375134751357513675137751387513975140751417514275143751447514575146751477514875149751507515175152751537515475155751567515775158751597516075161751627516375164751657516675167751687516975170751717517275173751747517575176751777517875179751807518175182751837518475185751867518775188751897519075191751927519375194751957519675197751987519975200752017520275203752047520575206752077520875209752107521175212752137521475215752167521775218752197522075221752227522375224752257522675227752287522975230752317523275233752347523575236752377523875239752407524175242752437524475245752467524775248752497525075251752527525375254752557525675257752587525975260752617526275263752647526575266752677526875269752707527175272752737527475275752767527775278752797528075281752827528375284752857528675287752887528975290752917529275293752947529575296752977529875299753007530175302753037530475305753067530775308753097531075311753127531375314753157531675317753187531975320753217532275323753247532575326753277532875329753307533175332753337533475335753367533775338753397534075341753427534375344753457534675347753487534975350753517535275353753547535575356753577535875359753607536175362753637536475365753667536775368753697537075371753727537375374753757537675377753787537975380753817538275383753847538575386753877538875389753907539175392753937539475395753967539775398753997540075401754027540375404754057540675407754087540975410754117541275413754147541575416754177541875419754207542175422754237542475425754267542775428754297543075431754327543375434754357543675437754387543975440754417544275443754447544575446754477544875449754507545175452754537545475455754567545775458754597546075461754627546375464754657546675467754687546975470754717547275473754747547575476754777547875479754807548175482754837548475485754867548775488754897549075491754927549375494754957549675497754987549975500755017550275503755047550575506755077550875509755107551175512755137551475515755167551775518755197552075521755227552375524755257552675527755287552975530755317553275533755347553575536755377553875539755407554175542755437554475545755467554775548755497555075551755527555375554755557555675557755587555975560755617556275563755647556575566755677556875569755707557175572755737557475575755767557775578755797558075581755827558375584755857558675587755887558975590755917559275593755947559575596755977559875599756007560175602756037560475605756067560775608756097561075611756127561375614756157561675617756187561975620756217562275623756247562575626756277562875629756307563175632756337563475635756367563775638756397564075641756427564375644756457564675647756487564975650756517565275653756547565575656756577565875659756607566175662756637566475665756667566775668756697567075671756727567375674756757567675677756787567975680756817568275683756847568575686756877568875689756907569175692756937569475695756967569775698756997570075701757027570375704757057570675707757087570975710757117571275713757147571575716757177571875719757207572175722757237572475725757267572775728757297573075731757327573375734757357573675737757387573975740757417574275743757447574575746757477574875749757507575175752757537575475755757567575775758757597576075761757627576375764757657576675767757687576975770757717577275773757747577575776757777577875779757807578175782757837578475785757867578775788757897579075791757927579375794757957579675797757987579975800758017580275803758047580575806758077580875809758107581175812758137581475815758167581775818758197582075821758227582375824758257582675827758287582975830758317583275833758347583575836758377583875839758407584175842758437584475845758467584775848758497585075851758527585375854758557585675857758587585975860758617586275863758647586575866758677586875869758707587175872758737587475875758767587775878758797588075881758827588375884758857588675887758887588975890758917589275893758947589575896758977589875899759007590175902759037590475905759067590775908759097591075911759127591375914759157591675917759187591975920759217592275923759247592575926759277592875929759307593175932759337593475935759367593775938759397594075941759427594375944759457594675947759487594975950759517595275953759547595575956759577595875959759607596175962759637596475965759667596775968759697597075971759727597375974759757597675977759787597975980759817598275983759847598575986759877598875989759907599175992759937599475995759967599775998759997600076001760027600376004760057600676007760087600976010760117601276013760147601576016760177601876019760207602176022760237602476025760267602776028760297603076031760327603376034760357603676037760387603976040760417604276043760447604576046760477604876049760507605176052760537605476055760567605776058760597606076061760627606376064760657606676067760687606976070760717607276073760747607576076760777607876079760807608176082760837608476085760867608776088760897609076091760927609376094760957609676097760987609976100761017610276103761047610576106761077610876109761107611176112761137611476115761167611776118761197612076121761227612376124761257612676127761287612976130761317613276133761347613576136761377613876139761407614176142761437614476145761467614776148761497615076151761527615376154761557615676157761587615976160761617616276163761647616576166761677616876169761707617176172761737617476175761767617776178761797618076181761827618376184761857618676187761887618976190761917619276193761947619576196761977619876199762007620176202762037620476205762067620776208762097621076211762127621376214762157621676217762187621976220762217622276223762247622576226762277622876229762307623176232762337623476235762367623776238762397624076241762427624376244762457624676247762487624976250762517625276253762547625576256762577625876259762607626176262762637626476265762667626776268762697627076271762727627376274762757627676277762787627976280762817628276283762847628576286762877628876289762907629176292762937629476295762967629776298762997630076301763027630376304763057630676307763087630976310763117631276313763147631576316763177631876319763207632176322763237632476325763267632776328763297633076331763327633376334763357633676337763387633976340763417634276343763447634576346763477634876349763507635176352763537635476355763567635776358763597636076361763627636376364763657636676367763687636976370763717637276373763747637576376763777637876379763807638176382763837638476385763867638776388763897639076391763927639376394763957639676397763987639976400764017640276403764047640576406764077640876409764107641176412764137641476415764167641776418764197642076421764227642376424764257642676427764287642976430764317643276433764347643576436764377643876439764407644176442764437644476445764467644776448764497645076451764527645376454764557645676457764587645976460764617646276463764647646576466764677646876469764707647176472764737647476475764767647776478764797648076481764827648376484764857648676487764887648976490764917649276493764947649576496764977649876499765007650176502765037650476505765067650776508765097651076511765127651376514765157651676517765187651976520765217652276523765247652576526765277652876529765307653176532765337653476535765367653776538765397654076541765427654376544765457654676547765487654976550765517655276553765547655576556765577655876559765607656176562765637656476565765667656776568765697657076571765727657376574765757657676577765787657976580765817658276583765847658576586765877658876589765907659176592765937659476595765967659776598765997660076601766027660376604766057660676607766087660976610766117661276613766147661576616766177661876619766207662176622766237662476625766267662776628766297663076631766327663376634766357663676637766387663976640766417664276643766447664576646766477664876649766507665176652766537665476655766567665776658766597666076661766627666376664766657666676667766687666976670766717667276673766747667576676766777667876679766807668176682766837668476685766867668776688766897669076691766927669376694766957669676697766987669976700767017670276703767047670576706767077670876709767107671176712767137671476715767167671776718767197672076721767227672376724767257672676727767287672976730767317673276733767347673576736767377673876739767407674176742767437674476745767467674776748767497675076751767527675376754767557675676757767587675976760767617676276763767647676576766767677676876769767707677176772767737677476775767767677776778767797678076781767827678376784767857678676787767887678976790767917679276793767947679576796767977679876799768007680176802768037680476805768067680776808768097681076811768127681376814768157681676817768187681976820768217682276823768247682576826768277682876829768307683176832768337683476835768367683776838768397684076841768427684376844768457684676847768487684976850768517685276853768547685576856768577685876859768607686176862768637686476865768667686776868768697687076871768727687376874768757687676877768787687976880768817688276883768847688576886768877688876889768907689176892768937689476895768967689776898768997690076901769027690376904769057690676907769087690976910769117691276913769147691576916769177691876919769207692176922769237692476925769267692776928769297693076931769327693376934769357693676937769387693976940769417694276943769447694576946769477694876949769507695176952769537695476955769567695776958769597696076961769627696376964769657696676967769687696976970769717697276973769747697576976769777697876979769807698176982769837698476985769867698776988769897699076991769927699376994769957699676997769987699977000770017700277003770047700577006770077700877009770107701177012770137701477015770167701777018770197702077021770227702377024770257702677027770287702977030770317703277033770347703577036770377703877039770407704177042770437704477045770467704777048770497705077051770527705377054770557705677057770587705977060770617706277063770647706577066770677706877069770707707177072770737707477075770767707777078770797708077081770827708377084770857708677087770887708977090770917709277093770947709577096770977709877099771007710177102771037710477105771067710777108771097711077111771127711377114771157711677117771187711977120771217712277123771247712577126771277712877129771307713177132771337713477135771367713777138771397714077141771427714377144771457714677147771487714977150771517715277153771547715577156771577715877159771607716177162771637716477165771667716777168771697717077171771727717377174771757717677177771787717977180771817718277183771847718577186771877718877189771907719177192771937719477195771967719777198771997720077201772027720377204772057720677207772087720977210772117721277213772147721577216772177721877219772207722177222772237722477225772267722777228772297723077231772327723377234772357723677237772387723977240772417724277243772447724577246772477724877249772507725177252772537725477255772567725777258772597726077261772627726377264772657726677267772687726977270772717727277273772747727577276772777727877279772807728177282772837728477285772867728777288772897729077291772927729377294772957729677297772987729977300773017730277303773047730577306773077730877309773107731177312773137731477315773167731777318773197732077321773227732377324773257732677327773287732977330773317733277333773347733577336773377733877339773407734177342773437734477345773467734777348773497735077351773527735377354773557735677357773587735977360773617736277363773647736577366773677736877369773707737177372773737737477375773767737777378773797738077381773827738377384773857738677387773887738977390773917739277393773947739577396773977739877399774007740177402774037740477405774067740777408774097741077411774127741377414774157741677417774187741977420774217742277423774247742577426774277742877429774307743177432774337743477435774367743777438774397744077441774427744377444774457744677447774487744977450774517745277453774547745577456774577745877459774607746177462774637746477465774667746777468774697747077471774727747377474774757747677477774787747977480774817748277483774847748577486774877748877489774907749177492774937749477495774967749777498774997750077501775027750377504775057750677507775087750977510775117751277513775147751577516775177751877519775207752177522775237752477525775267752777528775297753077531775327753377534775357753677537775387753977540775417754277543775447754577546775477754877549775507755177552775537755477555775567755777558775597756077561775627756377564775657756677567775687756977570775717757277573775747757577576775777757877579775807758177582775837758477585775867758777588775897759077591775927759377594775957759677597775987759977600776017760277603776047760577606776077760877609776107761177612776137761477615776167761777618776197762077621776227762377624776257762677627776287762977630776317763277633776347763577636776377763877639776407764177642776437764477645776467764777648776497765077651776527765377654776557765677657776587765977660776617766277663776647766577666776677766877669776707767177672776737767477675776767767777678776797768077681776827768377684776857768677687776887768977690776917769277693776947769577696776977769877699777007770177702777037770477705777067770777708777097771077711777127771377714777157771677717777187771977720777217772277723777247772577726777277772877729777307773177732777337773477735777367773777738777397774077741777427774377744777457774677747777487774977750777517775277753777547775577756777577775877759777607776177762777637776477765777667776777768777697777077771777727777377774777757777677777777787777977780777817778277783777847778577786777877778877789777907779177792777937779477795777967779777798777997780077801778027780377804778057780677807778087780977810778117781277813778147781577816778177781877819778207782177822778237782477825778267782777828778297783077831778327783377834778357783677837778387783977840778417784277843778447784577846778477784877849778507785177852778537785477855778567785777858778597786077861778627786377864778657786677867778687786977870778717787277873778747787577876778777787877879778807788177882778837788477885778867788777888778897789077891778927789377894778957789677897778987789977900779017790277903779047790577906779077790877909779107791177912779137791477915779167791777918779197792077921779227792377924779257792677927779287792977930779317793277933779347793577936779377793877939779407794177942779437794477945779467794777948779497795077951779527795377954779557795677957779587795977960779617796277963779647796577966779677796877969779707797177972779737797477975779767797777978779797798077981779827798377984779857798677987779887798977990779917799277993779947799577996779977799877999780007800178002780037800478005780067800778008780097801078011780127801378014780157801678017780187801978020780217802278023780247802578026780277802878029780307803178032780337803478035780367803778038780397804078041780427804378044780457804678047780487804978050780517805278053780547805578056780577805878059780607806178062780637806478065780667806778068780697807078071780727807378074780757807678077780787807978080780817808278083780847808578086780877808878089780907809178092780937809478095780967809778098780997810078101781027810378104781057810678107781087810978110781117811278113781147811578116781177811878119781207812178122781237812478125781267812778128781297813078131781327813378134781357813678137781387813978140781417814278143781447814578146781477814878149781507815178152781537815478155781567815778158781597816078161781627816378164781657816678167781687816978170781717817278173781747817578176781777817878179781807818178182781837818478185781867818778188781897819078191781927819378194781957819678197781987819978200782017820278203782047820578206782077820878209782107821178212782137821478215782167821778218782197822078221782227822378224782257822678227782287822978230782317823278233782347823578236782377823878239782407824178242782437824478245782467824778248782497825078251782527825378254782557825678257782587825978260782617826278263782647826578266782677826878269782707827178272782737827478275782767827778278782797828078281782827828378284782857828678287782887828978290782917829278293782947829578296782977829878299783007830178302783037830478305783067830778308783097831078311783127831378314783157831678317783187831978320783217832278323783247832578326783277832878329783307833178332783337833478335783367833778338783397834078341783427834378344783457834678347783487834978350783517835278353783547835578356783577835878359783607836178362783637836478365783667836778368783697837078371783727837378374783757837678377783787837978380783817838278383783847838578386783877838878389783907839178392783937839478395783967839778398783997840078401784027840378404784057840678407784087840978410784117841278413784147841578416784177841878419784207842178422784237842478425784267842778428784297843078431784327843378434784357843678437784387843978440784417844278443784447844578446784477844878449784507845178452784537845478455784567845778458784597846078461784627846378464784657846678467784687846978470784717847278473784747847578476784777847878479784807848178482784837848478485784867848778488784897849078491784927849378494784957849678497784987849978500785017850278503785047850578506785077850878509785107851178512785137851478515785167851778518785197852078521785227852378524785257852678527785287852978530785317853278533785347853578536785377853878539785407854178542785437854478545785467854778548785497855078551785527855378554785557855678557785587855978560785617856278563785647856578566785677856878569785707857178572785737857478575785767857778578785797858078581785827858378584785857858678587785887858978590785917859278593785947859578596785977859878599786007860178602786037860478605786067860778608786097861078611786127861378614786157861678617786187861978620786217862278623786247862578626786277862878629786307863178632786337863478635786367863778638786397864078641786427864378644786457864678647786487864978650786517865278653786547865578656786577865878659786607866178662786637866478665786667866778668786697867078671786727867378674786757867678677786787867978680786817868278683786847868578686786877868878689786907869178692786937869478695786967869778698786997870078701787027870378704787057870678707787087870978710787117871278713787147871578716787177871878719787207872178722787237872478725787267872778728787297873078731787327873378734787357873678737787387873978740787417874278743787447874578746787477874878749787507875178752787537875478755787567875778758787597876078761787627876378764787657876678767787687876978770787717877278773787747877578776787777877878779787807878178782787837878478785787867878778788787897879078791787927879378794787957879678797787987879978800788017880278803788047880578806788077880878809788107881178812788137881478815788167881778818788197882078821788227882378824788257882678827788287882978830788317883278833788347883578836788377883878839788407884178842788437884478845788467884778848788497885078851788527885378854788557885678857788587885978860788617886278863788647886578866788677886878869788707887178872788737887478875788767887778878788797888078881788827888378884788857888678887788887888978890788917889278893788947889578896788977889878899789007890178902789037890478905789067890778908789097891078911789127891378914789157891678917789187891978920789217892278923789247892578926789277892878929789307893178932789337893478935789367893778938789397894078941789427894378944789457894678947789487894978950789517895278953789547895578956789577895878959789607896178962789637896478965789667896778968789697897078971789727897378974789757897678977789787897978980789817898278983789847898578986789877898878989789907899178992789937899478995789967899778998789997900079001790027900379004790057900679007790087900979010790117901279013790147901579016790177901879019790207902179022790237902479025790267902779028790297903079031790327903379034790357903679037790387903979040790417904279043790447904579046790477904879049790507905179052790537905479055790567905779058790597906079061790627906379064790657906679067790687906979070790717907279073790747907579076790777907879079790807908179082790837908479085790867908779088790897909079091790927909379094790957909679097790987909979100791017910279103791047910579106791077910879109791107911179112791137911479115791167911779118791197912079121791227912379124791257912679127791287912979130791317913279133791347913579136791377913879139791407914179142791437914479145791467914779148791497915079151791527915379154791557915679157791587915979160791617916279163791647916579166791677916879169791707917179172791737917479175791767917779178791797918079181791827918379184791857918679187791887918979190791917919279193791947919579196791977919879199792007920179202792037920479205792067920779208792097921079211792127921379214792157921679217792187921979220792217922279223792247922579226792277922879229792307923179232792337923479235792367923779238792397924079241792427924379244792457924679247792487924979250792517925279253792547925579256792577925879259792607926179262792637926479265792667926779268792697927079271792727927379274792757927679277792787927979280792817928279283792847928579286792877928879289792907929179292792937929479295792967929779298792997930079301793027930379304793057930679307793087930979310793117931279313793147931579316793177931879319793207932179322793237932479325793267932779328793297933079331793327933379334793357933679337793387933979340793417934279343793447934579346793477934879349793507935179352793537935479355793567935779358793597936079361793627936379364793657936679367793687936979370793717937279373793747937579376793777937879379793807938179382793837938479385793867938779388793897939079391793927939379394793957939679397793987939979400794017940279403794047940579406794077940879409794107941179412794137941479415794167941779418794197942079421794227942379424794257942679427794287942979430794317943279433794347943579436794377943879439794407944179442794437944479445794467944779448794497945079451794527945379454794557945679457794587945979460794617946279463794647946579466794677946879469794707947179472794737947479475794767947779478794797948079481794827948379484794857948679487794887948979490794917949279493794947949579496794977949879499795007950179502795037950479505795067950779508795097951079511795127951379514795157951679517795187951979520795217952279523795247952579526795277952879529795307953179532795337953479535795367953779538795397954079541795427954379544795457954679547795487954979550795517955279553795547955579556795577955879559795607956179562795637956479565795667956779568795697957079571795727957379574795757957679577795787957979580795817958279583795847958579586795877958879589795907959179592795937959479595795967959779598795997960079601796027960379604796057960679607796087960979610796117961279613796147961579616796177961879619796207962179622796237962479625796267962779628796297963079631796327963379634796357963679637796387963979640796417964279643796447964579646796477964879649796507965179652796537965479655796567965779658796597966079661796627966379664796657966679667796687966979670796717967279673796747967579676796777967879679796807968179682796837968479685796867968779688796897969079691796927969379694796957969679697796987969979700797017970279703797047970579706797077970879709797107971179712797137971479715797167971779718797197972079721797227972379724797257972679727797287972979730797317973279733797347973579736797377973879739797407974179742797437974479745797467974779748797497975079751797527975379754797557975679757797587975979760797617976279763797647976579766797677976879769797707977179772797737977479775797767977779778797797978079781797827978379784797857978679787797887978979790797917979279793797947979579796797977979879799798007980179802798037980479805798067980779808798097981079811798127981379814798157981679817798187981979820798217982279823798247982579826798277982879829798307983179832798337983479835798367983779838798397984079841798427984379844798457984679847798487984979850798517985279853798547985579856798577985879859798607986179862798637986479865798667986779868798697987079871798727987379874798757987679877798787987979880798817988279883798847988579886798877988879889798907989179892798937989479895798967989779898798997990079901799027990379904799057990679907799087990979910799117991279913799147991579916799177991879919799207992179922799237992479925799267992779928799297993079931799327993379934799357993679937799387993979940799417994279943799447994579946799477994879949799507995179952799537995479955799567995779958799597996079961799627996379964799657996679967799687996979970799717997279973799747997579976799777997879979799807998179982799837998479985799867998779988799897999079991799927999379994799957999679997799987999980000800018000280003800048000580006800078000880009800108001180012800138001480015800168001780018800198002080021800228002380024800258002680027800288002980030800318003280033800348003580036800378003880039800408004180042800438004480045800468004780048800498005080051800528005380054800558005680057800588005980060800618006280063800648006580066800678006880069800708007180072800738007480075800768007780078800798008080081800828008380084800858008680087800888008980090800918009280093800948009580096800978009880099801008010180102801038010480105801068010780108801098011080111801128011380114801158011680117801188011980120801218012280123801248012580126801278012880129801308013180132801338013480135801368013780138801398014080141801428014380144801458014680147801488014980150801518015280153801548015580156801578015880159801608016180162801638016480165801668016780168801698017080171801728017380174801758017680177801788017980180801818018280183801848018580186801878018880189801908019180192801938019480195801968019780198801998020080201802028020380204802058020680207802088020980210802118021280213802148021580216802178021880219802208022180222802238022480225802268022780228802298023080231802328023380234802358023680237802388023980240802418024280243802448024580246802478024880249802508025180252802538025480255802568025780258802598026080261802628026380264802658026680267802688026980270802718027280273802748027580276802778027880279802808028180282802838028480285802868028780288802898029080291802928029380294802958029680297802988029980300803018030280303803048030580306803078030880309803108031180312803138031480315803168031780318803198032080321803228032380324803258032680327803288032980330803318033280333803348033580336803378033880339803408034180342803438034480345803468034780348803498035080351803528035380354803558035680357803588035980360803618036280363803648036580366803678036880369803708037180372803738037480375803768037780378803798038080381803828038380384803858038680387803888038980390803918039280393803948039580396803978039880399804008040180402804038040480405804068040780408804098041080411804128041380414804158041680417804188041980420804218042280423804248042580426804278042880429804308043180432804338043480435804368043780438804398044080441804428044380444804458044680447804488044980450804518045280453804548045580456804578045880459804608046180462804638046480465804668046780468804698047080471804728047380474804758047680477804788047980480804818048280483804848048580486804878048880489804908049180492804938049480495804968049780498804998050080501805028050380504805058050680507805088050980510805118051280513805148051580516805178051880519805208052180522805238052480525805268052780528805298053080531805328053380534805358053680537805388053980540805418054280543805448054580546805478054880549805508055180552805538055480555805568055780558805598056080561805628056380564805658056680567805688056980570805718057280573805748057580576805778057880579805808058180582805838058480585805868058780588805898059080591805928059380594805958059680597805988059980600806018060280603806048060580606806078060880609806108061180612806138061480615806168061780618806198062080621806228062380624806258062680627806288062980630806318063280633806348063580636806378063880639806408064180642806438064480645806468064780648806498065080651806528065380654806558065680657806588065980660806618066280663806648066580666806678066880669806708067180672806738067480675806768067780678806798068080681806828068380684806858068680687806888068980690806918069280693806948069580696806978069880699807008070180702807038070480705807068070780708807098071080711807128071380714807158071680717807188071980720807218072280723807248072580726807278072880729807308073180732807338073480735807368073780738807398074080741807428074380744807458074680747807488074980750807518075280753807548075580756807578075880759807608076180762807638076480765807668076780768807698077080771807728077380774807758077680777807788077980780807818078280783807848078580786807878078880789807908079180792807938079480795807968079780798807998080080801808028080380804808058080680807808088080980810808118081280813808148081580816808178081880819808208082180822808238082480825808268082780828808298083080831808328083380834808358083680837808388083980840808418084280843808448084580846808478084880849808508085180852808538085480855808568085780858808598086080861808628086380864808658086680867808688086980870808718087280873808748087580876808778087880879808808088180882808838088480885808868088780888808898089080891808928089380894808958089680897808988089980900809018090280903809048090580906809078090880909809108091180912809138091480915809168091780918809198092080921809228092380924809258092680927809288092980930809318093280933809348093580936809378093880939809408094180942809438094480945809468094780948809498095080951809528095380954809558095680957809588095980960809618096280963809648096580966809678096880969809708097180972809738097480975809768097780978809798098080981809828098380984809858098680987809888098980990809918099280993809948099580996809978099880999810008100181002810038100481005810068100781008810098101081011810128101381014810158101681017810188101981020810218102281023810248102581026810278102881029810308103181032810338103481035810368103781038810398104081041810428104381044810458104681047810488104981050810518105281053810548105581056810578105881059810608106181062810638106481065810668106781068810698107081071810728107381074810758107681077810788107981080810818108281083810848108581086810878108881089810908109181092810938109481095810968109781098810998110081101811028110381104811058110681107811088110981110811118111281113811148111581116811178111881119811208112181122811238112481125811268112781128811298113081131811328113381134811358113681137811388113981140811418114281143811448114581146811478114881149811508115181152811538115481155811568115781158811598116081161811628116381164811658116681167811688116981170811718117281173811748117581176811778117881179811808118181182811838118481185811868118781188811898119081191811928119381194811958119681197811988119981200812018120281203812048120581206812078120881209812108121181212812138121481215812168121781218812198122081221812228122381224812258122681227812288122981230812318123281233812348123581236812378123881239812408124181242812438124481245812468124781248812498125081251812528125381254812558125681257812588125981260812618126281263
  1. diff -urN --no-dereference linux-4.14.2.orig/Documentation/filesystems/reiser4.txt linux-4.14.2/Documentation/filesystems/reiser4.txt
  2. --- linux-4.14.2.orig/Documentation/filesystems/reiser4.txt 1970-01-01 01:00:00.000000000 +0100
  3. +++ linux-4.14.2/Documentation/filesystems/reiser4.txt 2017-11-26 22:13:09.000000000 +0100
  4. @@ -0,0 +1,75 @@
  5. +Reiser4 filesystem
  6. +==================
  7. +Reiser4 is a file system based on dancing tree algorithms, and is
  8. +described at http://www.namesys.com
  9. +
  10. +
  11. +References
  12. +==========
  13. +web page http://namesys.com/v4/v4.html
  14. +source code ftp://ftp.namesys.com/pub/reiser4-for-2.6/
  15. +userland tools ftp://ftp.namesys.com/pub/reiser4progs/
  16. +install page http://www.namesys.com/install_v4.html
  17. +
  18. +Compile options
  19. +===============
  20. +Enable reiser4 debug mode
  21. + This checks everything imaginable while reiser4
  22. + runs
  23. +
  24. +Mount options
  25. +=============
  26. +tmgr.atom_max_size=N
  27. + Atoms containing more than N blocks will be forced to commit.
  28. + N is decimal.
  29. + Default is nr_free_pagecache_pages() / 2 at mount time.
  30. +
  31. +tmgr.atom_max_age=N
  32. + Atoms older than N seconds will be forced to commit. N is decimal.
  33. + Default is 600.
  34. +
  35. +tmgr.atom_max_flushers=N
  36. + Limit of concurrent flushers for one atom. 0 means no limit.
  37. + Default is 0.
  38. +
  39. +tree.cbk_cache.nr_slots=N
  40. + Number of slots in the cbk cache.
  41. +
  42. +flush.relocate_threshold=N
  43. + If flush finds more than N adjacent dirty leaf-level blocks it
  44. + will force them to be relocated.
  45. + Default is 64.
  46. +
  47. +flush.relocate_distance=N
  48. + If flush finds can find a block allocation closer than at most
  49. + N from the preceder it will relocate to that position.
  50. + Default is 64.
  51. +
  52. +flush.scan_maxnodes=N
  53. + The maximum number of nodes to scan left on a level during
  54. + flush.
  55. + Default is 10000.
  56. +
  57. +optimal_io_size=N
  58. + Preferred IO size. This value is used to set st_blksize of
  59. + struct stat.
  60. + Default is 65536.
  61. +
  62. +bsdgroups
  63. + Turn on BSD-style gid assignment.
  64. +
  65. +32bittimes
  66. + By default file in reiser4 have 64 bit timestamps. Files
  67. + created when filesystem is mounted with 32bittimes mount
  68. + option will get 32 bit timestamps.
  69. +
  70. +mtflush
  71. + Turn off concurrent flushing.
  72. +
  73. +nopseudo
  74. + Disable pseudo files support. See
  75. + http://namesys.com/v4/pseudo.html for more about pseudo files.
  76. +
  77. +dont_load_bitmap
  78. + Don't load all bitmap blocks at mount time, it is useful for
  79. + machines with tiny RAM and large disks.
  80. diff -urN --no-dereference linux-4.14.2.orig/Documentation/process/changes.rst linux-4.14.2/Documentation/process/changes.rst
  81. --- linux-4.14.2.orig/Documentation/process/changes.rst 2017-11-27 23:32:41.000000000 +0100
  82. +++ linux-4.14.2/Documentation/process/changes.rst 2017-11-26 22:13:09.000000000 +0100
  83. @@ -180,6 +180,13 @@
  84. versions of ``mkreiserfs``, ``resize_reiserfs``, ``debugreiserfs`` and
  85. ``reiserfsck``. These utils work on both i386 and alpha platforms.
  86. +Reiser4progs
  87. +------------
  88. +
  89. +The reiser4progs package contains utilities for the reiser4 file system.
  90. +Detailed instructions are provided in the README file located at:
  91. +<https://github.com/edward6/reiser4progs>.
  92. +
  93. Xfsprogs
  94. --------
  95. @@ -376,6 +383,11 @@
  96. - <http://www.kernel.org/pub/linux/utils/fs/reiserfs/>
  97. +Reiser4progs
  98. +------------
  99. +
  100. +- <http://sourceforge.net/projects/reiser4/>
  101. +
  102. Xfsprogs
  103. --------
  104. diff -urN --no-dereference linux-4.14.2.orig/fs/fs-writeback.c linux-4.14.2/fs/fs-writeback.c
  105. --- linux-4.14.2.orig/fs/fs-writeback.c 2017-11-27 23:32:41.000000000 +0100
  106. +++ linux-4.14.2/fs/fs-writeback.c 2017-11-26 22:13:09.000000000 +0100
  107. @@ -40,26 +40,6 @@
  108. };
  109. /*
  110. - * Passed into wb_writeback(), essentially a subset of writeback_control
  111. - */
  112. -struct wb_writeback_work {
  113. - long nr_pages;
  114. - struct super_block *sb;
  115. - unsigned long *older_than_this;
  116. - enum writeback_sync_modes sync_mode;
  117. - unsigned int tagged_writepages:1;
  118. - unsigned int for_kupdate:1;
  119. - unsigned int range_cyclic:1;
  120. - unsigned int for_background:1;
  121. - unsigned int for_sync:1; /* sync(2) WB_SYNC_ALL writeback */
  122. - unsigned int auto_free:1; /* free on completion */
  123. - enum wb_reason reason; /* why was writeback initiated? */
  124. -
  125. - struct list_head list; /* pending work list */
  126. - struct wb_completion *done; /* set if the caller waits */
  127. -};
  128. -
  129. -/*
  130. * If one wants to wait for one or more wb_writeback_works, each work's
  131. * ->done should be set to a wb_completion defined using the following
  132. * macro. Once all work items are issued with wb_queue_work(), the caller
  133. @@ -269,6 +249,7 @@
  134. if (unlikely(cmpxchg(&inode->i_wb, NULL, wb)))
  135. wb_put(wb);
  136. }
  137. +EXPORT_SYMBOL_GPL(__inode_attach_wb);
  138. /**
  139. * locked_inode_to_wb_and_lock_list - determine a locked inode's wb and lock it
  140. @@ -1488,20 +1469,12 @@
  141. * unlock and relock that for each inode it ends up doing
  142. * IO for.
  143. */
  144. -static long writeback_sb_inodes(struct super_block *sb,
  145. - struct bdi_writeback *wb,
  146. - struct wb_writeback_work *work)
  147. +long generic_writeback_sb_inodes(struct super_block *sb,
  148. + struct bdi_writeback *wb,
  149. + struct writeback_control *wbc,
  150. + struct wb_writeback_work *work,
  151. + bool flush_all)
  152. {
  153. - struct writeback_control wbc = {
  154. - .sync_mode = work->sync_mode,
  155. - .tagged_writepages = work->tagged_writepages,
  156. - .for_kupdate = work->for_kupdate,
  157. - .for_background = work->for_background,
  158. - .for_sync = work->for_sync,
  159. - .range_cyclic = work->range_cyclic,
  160. - .range_start = 0,
  161. - .range_end = LLONG_MAX,
  162. - };
  163. unsigned long start_time = jiffies;
  164. long write_chunk;
  165. long wrote = 0; /* count both pages and inodes */
  166. @@ -1540,7 +1513,7 @@
  167. redirty_tail(inode, wb);
  168. continue;
  169. }
  170. - if ((inode->i_state & I_SYNC) && wbc.sync_mode != WB_SYNC_ALL) {
  171. + if ((inode->i_state & I_SYNC) && wbc->sync_mode != WB_SYNC_ALL) {
  172. /*
  173. * If this inode is locked for writeback and we are not
  174. * doing writeback-for-data-integrity, move it to
  175. @@ -1570,21 +1543,21 @@
  176. continue;
  177. }
  178. inode->i_state |= I_SYNC;
  179. - wbc_attach_and_unlock_inode(&wbc, inode);
  180. + wbc_attach_and_unlock_inode(wbc, inode);
  181. write_chunk = writeback_chunk_size(wb, work);
  182. - wbc.nr_to_write = write_chunk;
  183. - wbc.pages_skipped = 0;
  184. + wbc->nr_to_write = write_chunk;
  185. + wbc->pages_skipped = 0;
  186. /*
  187. * We use I_SYNC to pin the inode in memory. While it is set
  188. * evict_inode() will wait so the inode cannot be freed.
  189. */
  190. - __writeback_single_inode(inode, &wbc);
  191. + __writeback_single_inode(inode, wbc);
  192. - wbc_detach_inode(&wbc);
  193. - work->nr_pages -= write_chunk - wbc.nr_to_write;
  194. - wrote += write_chunk - wbc.nr_to_write;
  195. + wbc_detach_inode(wbc);
  196. + work->nr_pages -= write_chunk - wbc->nr_to_write;
  197. + wrote += write_chunk - wbc->nr_to_write;
  198. if (need_resched()) {
  199. /*
  200. @@ -1607,7 +1580,7 @@
  201. spin_lock(&inode->i_lock);
  202. if (!(inode->i_state & I_DIRTY_ALL))
  203. wrote++;
  204. - requeue_inode(inode, tmp_wb, &wbc);
  205. + requeue_inode(inode, tmp_wb, wbc);
  206. inode_sync_complete(inode);
  207. spin_unlock(&inode->i_lock);
  208. @@ -1621,7 +1594,7 @@
  209. * background threshold and other termination conditions.
  210. */
  211. if (wrote) {
  212. - if (time_is_before_jiffies(start_time + HZ / 10UL))
  213. + if (!flush_all && time_is_before_jiffies(start_time + HZ / 10UL))
  214. break;
  215. if (work->nr_pages <= 0)
  216. break;
  217. @@ -1629,6 +1602,26 @@
  218. }
  219. return wrote;
  220. }
  221. +EXPORT_SYMBOL(generic_writeback_sb_inodes);
  222. +
  223. +long writeback_sb_inodes(struct super_block *sb,
  224. + struct bdi_writeback *wb,
  225. + struct wb_writeback_work *work)
  226. +{
  227. + struct writeback_control wbc = {
  228. + .sync_mode = work->sync_mode,
  229. + .tagged_writepages = work->tagged_writepages,
  230. + .for_kupdate = work->for_kupdate,
  231. + .for_background = work->for_background,
  232. + .range_cyclic = work->range_cyclic,
  233. + .range_start = 0,
  234. + .range_end = LLONG_MAX,
  235. + };
  236. + if (sb->s_op->writeback_inodes)
  237. + return sb->s_op->writeback_inodes(sb, wb, &wbc, work, false);
  238. + else
  239. + return generic_writeback_sb_inodes(sb, wb, &wbc, work, false);
  240. +}
  241. static long __writeback_inodes_wb(struct bdi_writeback *wb,
  242. struct wb_writeback_work *work)
  243. @@ -1903,6 +1896,31 @@
  244. }
  245. /*
  246. + * This function is for file systems which have their
  247. + * own means of periodical write-out of old data.
  248. + * NOTE: inode_lock should be hold.
  249. + *
  250. + * Skip a portion of b_io inodes which belong to @sb
  251. + * and go sequentially in reverse order.
  252. + */
  253. +void writeback_skip_sb_inodes(struct super_block *sb,
  254. + struct bdi_writeback *wb)
  255. +{
  256. + while (1) {
  257. + struct inode *inode;
  258. +
  259. + if (list_empty(&wb->b_io))
  260. + break;
  261. + inode = wb_inode(wb->b_io.prev);
  262. + if (sb != inode->i_sb)
  263. + break;
  264. + redirty_tail(inode, wb);
  265. + }
  266. +}
  267. +EXPORT_SYMBOL(writeback_skip_sb_inodes);
  268. +
  269. +
  270. +/*
  271. * Handle writeback of dirty data for the device backed by this bdi. Also
  272. * reschedules periodically and does kupdated style flushing.
  273. */
  274. @@ -1913,7 +1931,7 @@
  275. long pages_written;
  276. set_worker_desc("flush-%s", dev_name(wb->bdi->dev));
  277. - current->flags |= PF_SWAPWRITE;
  278. + current->flags |= PF_FLUSHER | PF_SWAPWRITE;
  279. if (likely(!current_is_workqueue_rescuer() ||
  280. !test_bit(WB_registered, &wb->state))) {
  281. diff -urN --no-dereference linux-4.14.2.orig/fs/Kconfig linux-4.14.2/fs/Kconfig
  282. --- linux-4.14.2.orig/fs/Kconfig 2017-11-27 23:32:41.000000000 +0100
  283. +++ linux-4.14.2/fs/Kconfig 2017-11-26 22:13:09.000000000 +0100
  284. @@ -24,6 +24,7 @@
  285. default y if EXT4_FS=y
  286. default m if EXT2_FS_XATTR || EXT4_FS
  287. +source "fs/reiser4/Kconfig"
  288. source "fs/reiserfs/Kconfig"
  289. source "fs/jfs/Kconfig"
  290. diff -urN --no-dereference linux-4.14.2.orig/fs/Makefile linux-4.14.2/fs/Makefile
  291. --- linux-4.14.2.orig/fs/Makefile 2017-11-27 23:32:41.000000000 +0100
  292. +++ linux-4.14.2/fs/Makefile 2017-11-26 22:13:09.000000000 +0100
  293. @@ -66,6 +66,7 @@
  294. # Do not add any filesystems before this line
  295. obj-$(CONFIG_FSCACHE) += fscache/
  296. obj-$(CONFIG_REISERFS_FS) += reiserfs/
  297. +obj-$(CONFIG_REISER4_FS) += reiser4/
  298. obj-$(CONFIG_EXT4_FS) += ext4/
  299. # We place ext4 before ext2 so that clean ext3 root fs's do NOT mount using the
  300. # ext2 driver, which doesn't know about journalling! Explicitly request ext2
  301. diff -urN --no-dereference linux-4.14.2.orig/fs/read_write.c linux-4.14.2/fs/read_write.c
  302. --- linux-4.14.2.orig/fs/read_write.c 2017-11-27 23:32:41.000000000 +0100
  303. +++ linux-4.14.2/fs/read_write.c 2017-11-26 22:13:09.000000000 +0100
  304. @@ -233,12 +233,11 @@
  305. }
  306. EXPORT_SYMBOL(no_llseek);
  307. -loff_t default_llseek(struct file *file, loff_t offset, int whence)
  308. +loff_t default_llseek_unlocked(struct file *file, loff_t offset, int whence)
  309. {
  310. struct inode *inode = file_inode(file);
  311. loff_t retval;
  312. - inode_lock(inode);
  313. switch (whence) {
  314. case SEEK_END:
  315. offset += i_size_read(inode);
  316. @@ -283,9 +282,19 @@
  317. retval = offset;
  318. }
  319. out:
  320. - inode_unlock(inode);
  321. return retval;
  322. }
  323. +EXPORT_SYMBOL(default_llseek_unlocked);
  324. +
  325. +loff_t default_llseek(struct file *file, loff_t offset, int origin)
  326. +{
  327. + loff_t retval;
  328. +
  329. + inode_lock(file_inode(file));
  330. + retval = default_llseek_unlocked(file, offset, origin);
  331. + inode_unlock(file_inode(file));
  332. + return retval;
  333. +}
  334. EXPORT_SYMBOL(default_llseek);
  335. loff_t vfs_llseek(struct file *file, loff_t offset, int whence)
  336. @@ -387,7 +396,7 @@
  337. read_write == READ ? MAY_READ : MAY_WRITE);
  338. }
  339. -static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
  340. +ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
  341. {
  342. struct iovec iov = { .iov_base = buf, .iov_len = len };
  343. struct kiocb kiocb;
  344. @@ -403,6 +412,7 @@
  345. *ppos = kiocb.ki_pos;
  346. return ret;
  347. }
  348. +EXPORT_SYMBOL(new_sync_read);
  349. ssize_t __vfs_read(struct file *file, char __user *buf, size_t count,
  350. loff_t *pos)
  351. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/as_ops.c linux-4.14.2/fs/reiser4/as_ops.c
  352. --- linux-4.14.2.orig/fs/reiser4/as_ops.c 1970-01-01 01:00:00.000000000 +0100
  353. +++ linux-4.14.2/fs/reiser4/as_ops.c 2017-11-26 22:13:09.000000000 +0100
  354. @@ -0,0 +1,348 @@
  355. +/* Copyright 2003 by Hans Reiser, licensing governed by reiser4/README */
  356. +
  357. +/* Interface to VFS. Reiser4 address_space_operations are defined here. */
  358. +
  359. +#include "forward.h"
  360. +#include "debug.h"
  361. +#include "dformat.h"
  362. +#include "coord.h"
  363. +#include "plugin/item/item.h"
  364. +#include "plugin/file/file.h"
  365. +#include "plugin/security/perm.h"
  366. +#include "plugin/disk_format/disk_format.h"
  367. +#include "plugin/plugin.h"
  368. +#include "plugin/plugin_set.h"
  369. +#include "plugin/object.h"
  370. +#include "txnmgr.h"
  371. +#include "jnode.h"
  372. +#include "znode.h"
  373. +#include "block_alloc.h"
  374. +#include "tree.h"
  375. +#include "vfs_ops.h"
  376. +#include "inode.h"
  377. +#include "page_cache.h"
  378. +#include "ktxnmgrd.h"
  379. +#include "super.h"
  380. +#include "reiser4.h"
  381. +#include "entd.h"
  382. +
  383. +#include <linux/profile.h>
  384. +#include <linux/types.h>
  385. +#include <linux/mount.h>
  386. +#include <linux/vfs.h>
  387. +#include <linux/mm.h>
  388. +#include <linux/buffer_head.h>
  389. +#include <linux/dcache.h>
  390. +#include <linux/list.h>
  391. +#include <linux/pagemap.h>
  392. +#include <linux/slab.h>
  393. +#include <linux/seq_file.h>
  394. +#include <linux/init.h>
  395. +#include <linux/module.h>
  396. +#include <linux/writeback.h>
  397. +#include <linux/backing-dev.h>
  398. +#include <linux/security.h>
  399. +#include <linux/migrate.h>
  400. +
  401. +/* address space operations */
  402. +
  403. +/**
  404. + * reiser4_set_page_dirty - set dirty bit, tag in page tree, dirty accounting
  405. + * @page: page to be dirtied
  406. + *
  407. + * Operation of struct address_space_operations. This implementation is used by
  408. + * unix and cryptcompress file plugins.
  409. + *
  410. + * This is called when reiser4 page gets dirtied outside of reiser4, for
  411. + * example, when dirty bit is moved from pte to physical page.
  412. + *
  413. + * Tags page in the mapping's page tree with special tag so that it is possible
  414. + * to do all the reiser4 specific work wrt dirty pages (jnode creation,
  415. + * capturing by an atom) later because it can not be done in the contexts where
  416. + * set_page_dirty is called.
  417. + */
  418. +int reiser4_set_page_dirty(struct page *page)
  419. +{
  420. + /* this page can be unformatted only */
  421. + assert("vs-1734", (page->mapping &&
  422. + page->mapping->host &&
  423. + reiser4_get_super_fake(page->mapping->host->i_sb) !=
  424. + page->mapping->host &&
  425. + reiser4_get_cc_fake(page->mapping->host->i_sb) !=
  426. + page->mapping->host &&
  427. + reiser4_get_bitmap_fake(page->mapping->host->i_sb) !=
  428. + page->mapping->host));
  429. + return __set_page_dirty_nobuffers(page);
  430. +}
  431. +
  432. +/* ->invalidatepage method for reiser4 */
  433. +
  434. +/*
  435. + * this is called for each truncated page from
  436. + * truncate_inode_pages()->truncate_{complete,partial}_page().
  437. + *
  438. + * At the moment of call, page is under lock, and outstanding io (if any) has
  439. + * completed.
  440. + */
  441. +
  442. +/**
  443. + * reiser4_invalidatepage
  444. + * @page: page to invalidate
  445. + * @offset: starting offset for partial invalidation
  446. + *
  447. + */
  448. +void reiser4_invalidatepage(struct page *page, unsigned int offset, unsigned int length)
  449. +{
  450. + int ret = 0;
  451. + int partial_page = (offset || length < PAGE_SIZE);
  452. + reiser4_context *ctx;
  453. + struct inode *inode;
  454. + jnode *node;
  455. +
  456. + /*
  457. + * This is called to truncate file's page.
  458. + *
  459. + * Originally, reiser4 implemented truncate in a standard way
  460. + * (vmtruncate() calls ->invalidatepage() on all truncated pages
  461. + * first, then file system ->truncate() call-back is invoked).
  462. + *
  463. + * This lead to the problem when ->invalidatepage() was called on a
  464. + * page with jnode that was captured into atom in ASTAGE_PRE_COMMIT
  465. + * process. That is, truncate was bypassing transactions. To avoid
  466. + * this, try_capture_page_to_invalidate() call was added here.
  467. + *
  468. + * After many troubles with vmtruncate() based truncate (including
  469. + * races with flush, tail conversion, etc.) it was re-written in the
  470. + * top-to-bottom style: items are killed in reiser4_cut_tree_object()
  471. + * and pages belonging to extent are invalidated in kill_hook_extent().
  472. + * So probably now additional call to capture is not needed here.
  473. + */
  474. +
  475. + assert("nikita-3137", PageLocked(page));
  476. + assert("nikita-3138", !PageWriteback(page));
  477. + inode = page->mapping->host;
  478. +
  479. + /*
  480. + * ->invalidatepage() should only be called for the unformatted
  481. + * jnodes. Destruction of all other types of jnodes is performed
  482. + * separately. But, during some corner cases (like handling errors
  483. + * during mount) it is simpler to let ->invalidatepage to be called on
  484. + * them. Check for this, and do nothing.
  485. + */
  486. + if (reiser4_get_super_fake(inode->i_sb) == inode)
  487. + return;
  488. + if (reiser4_get_cc_fake(inode->i_sb) == inode)
  489. + return;
  490. + if (reiser4_get_bitmap_fake(inode->i_sb) == inode)
  491. + return;
  492. + assert("vs-1426", PagePrivate(page));
  493. + assert("vs-1427",
  494. + page->mapping == jnode_get_mapping(jnode_by_page(page)));
  495. + assert("", jprivate(page) != NULL);
  496. + assert("", ergo(inode_file_plugin(inode) !=
  497. + file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID),
  498. + offset == 0));
  499. +
  500. + ctx = reiser4_init_context(inode->i_sb);
  501. + if (IS_ERR(ctx))
  502. + return;
  503. +
  504. + node = jprivate(page);
  505. + spin_lock_jnode(node);
  506. + if (!(node->state & ((1 << JNODE_DIRTY) | (1 << JNODE_FLUSH_QUEUED) |
  507. + (1 << JNODE_WRITEBACK) | (1 << JNODE_OVRWR)))) {
  508. + /* there is not need to capture */
  509. + jref(node);
  510. + JF_SET(node, JNODE_HEARD_BANSHEE);
  511. + page_clear_jnode(page, node);
  512. + reiser4_uncapture_jnode(node);
  513. + unhash_unformatted_jnode(node);
  514. + jput(node);
  515. + reiser4_exit_context(ctx);
  516. + return;
  517. + }
  518. + spin_unlock_jnode(node);
  519. +
  520. + /* capture page being truncated. */
  521. + ret = try_capture_page_to_invalidate(page);
  522. + if (ret != 0)
  523. + warning("nikita-3141", "Cannot capture: %i", ret);
  524. +
  525. + if (!partial_page) {
  526. + /* remove jnode from transaction and detach it from page. */
  527. + jref(node);
  528. + JF_SET(node, JNODE_HEARD_BANSHEE);
  529. + /* page cannot be detached from jnode concurrently, because it
  530. + * is locked */
  531. + reiser4_uncapture_page(page);
  532. +
  533. + /* this detaches page from jnode, so that jdelete will not try
  534. + * to lock page which is already locked */
  535. + spin_lock_jnode(node);
  536. + page_clear_jnode(page, node);
  537. + spin_unlock_jnode(node);
  538. + unhash_unformatted_jnode(node);
  539. +
  540. + jput(node);
  541. + }
  542. +
  543. + reiser4_exit_context(ctx);
  544. +}
  545. +
  546. +/* help function called from reiser4_releasepage(). It returns true if jnode
  547. + * can be detached from its page and page released. */
  548. +int jnode_is_releasable(jnode * node/* node to check */)
  549. +{
  550. + assert("nikita-2781", node != NULL);
  551. + assert_spin_locked(&(node->guard));
  552. + assert_spin_locked(&(node->load));
  553. +
  554. + /* is some thread is currently using jnode page, later cannot be
  555. + * detached */
  556. + if (atomic_read(&node->d_count) != 0)
  557. + return 0;
  558. +
  559. + assert("vs-1214", !jnode_is_loaded(node));
  560. +
  561. + /*
  562. + * can only release page if real block number is assigned to it. Simple
  563. + * check for ->atom wouldn't do, because it is possible for node to be
  564. + * clean, not it atom yet, and still having fake block number. For
  565. + * example, node just created in jinit_new().
  566. + */
  567. + if (reiser4_blocknr_is_fake(jnode_get_block(node)))
  568. + return 0;
  569. +
  570. + /*
  571. + * pages prepared for write can not be released anyway, so avoid
  572. + * detaching jnode from the page
  573. + */
  574. + if (JF_ISSET(node, JNODE_WRITE_PREPARED))
  575. + return 0;
  576. +
  577. + /*
  578. + * dirty jnode cannot be released. It can however be submitted to disk
  579. + * as part of early flushing, but only after getting flush-prepped.
  580. + */
  581. + if (JF_ISSET(node, JNODE_DIRTY))
  582. + return 0;
  583. +
  584. + /* overwrite set is only written by log writer. */
  585. + if (JF_ISSET(node, JNODE_OVRWR))
  586. + return 0;
  587. +
  588. + /* jnode is already under writeback */
  589. + if (JF_ISSET(node, JNODE_WRITEBACK))
  590. + return 0;
  591. +
  592. + /* don't flush bitmaps or journal records */
  593. + if (!jnode_is_znode(node) && !jnode_is_unformatted(node))
  594. + return 0;
  595. +
  596. + return 1;
  597. +}
  598. +
  599. +/*
  600. + * ->releasepage method for reiser4
  601. + *
  602. + * This is called by VM scanner when it comes across clean page. What we have
  603. + * to do here is to check whether page can really be released (freed that is)
  604. + * and if so, detach jnode from it and remove page from the page cache.
  605. + *
  606. + * Check for releasability is done by releasable() function.
  607. + */
  608. +int reiser4_releasepage(struct page *page, gfp_t gfp UNUSED_ARG)
  609. +{
  610. + jnode *node;
  611. +
  612. + assert("nikita-2257", PagePrivate(page));
  613. + assert("nikita-2259", PageLocked(page));
  614. + assert("nikita-2892", !PageWriteback(page));
  615. + assert("nikita-3019", reiser4_schedulable());
  616. +
  617. + /* NOTE-NIKITA: this can be called in the context of reiser4 call. It
  618. + is not clear what to do in this case. A lot of deadlocks seems be
  619. + possible. */
  620. +
  621. + node = jnode_by_page(page);
  622. + assert("nikita-2258", node != NULL);
  623. + assert("reiser4-4", page->mapping != NULL);
  624. + assert("reiser4-5", page->mapping->host != NULL);
  625. +
  626. + if (PageDirty(page))
  627. + return 0;
  628. +
  629. + /* extra page reference is used by reiser4 to protect
  630. + * jnode<->page link from this ->releasepage(). */
  631. + if (page_count(page) > 3)
  632. + return 0;
  633. +
  634. + /* releasable() needs jnode lock, because it looks at the jnode fields
  635. + * and we need jload_lock here to avoid races with jload(). */
  636. + spin_lock_jnode(node);
  637. + spin_lock(&(node->load));
  638. + if (jnode_is_releasable(node)) {
  639. + struct address_space *mapping;
  640. +
  641. + mapping = page->mapping;
  642. + jref(node);
  643. + /* there is no need to synchronize against
  644. + * jnode_extent_write() here, because pages seen by
  645. + * jnode_extent_write() are !releasable(). */
  646. + page_clear_jnode(page, node);
  647. + spin_unlock(&(node->load));
  648. + spin_unlock_jnode(node);
  649. +
  650. + /* we are under memory pressure so release jnode also. */
  651. + jput(node);
  652. +
  653. + return 1;
  654. + } else {
  655. + spin_unlock(&(node->load));
  656. + spin_unlock_jnode(node);
  657. + assert("nikita-3020", reiser4_schedulable());
  658. + return 0;
  659. + }
  660. +}
  661. +
  662. +#ifdef CONFIG_MIGRATION
  663. +int reiser4_migratepage(struct address_space *mapping, struct page *newpage,
  664. + struct page *page, enum migrate_mode mode)
  665. +{
  666. + /* TODO: implement movable mapping
  667. + */
  668. + return -EIO;
  669. +}
  670. +#endif /* CONFIG_MIGRATION */
  671. +
  672. +int reiser4_readpage_dispatch(struct file *file, struct page *page)
  673. +{
  674. + assert("edward-1533", PageLocked(page));
  675. + assert("edward-1534", !PageUptodate(page));
  676. + assert("edward-1535", page->mapping && page->mapping->host);
  677. +
  678. + return inode_file_plugin(page->mapping->host)->readpage(file, page);
  679. +}
  680. +
  681. +int reiser4_readpages_dispatch(struct file *file, struct address_space *mapping,
  682. + struct list_head *pages, unsigned nr_pages)
  683. +{
  684. + return inode_file_plugin(mapping->host)->readpages(file, mapping,
  685. + pages, nr_pages);
  686. +}
  687. +
  688. +int reiser4_writepages_dispatch(struct address_space *mapping,
  689. + struct writeback_control *wbc)
  690. +{
  691. + return inode_file_plugin(mapping->host)->writepages(mapping, wbc);
  692. +}
  693. +
  694. +/* Make Linus happy.
  695. + Local variables:
  696. + c-indentation-style: "K&R"
  697. + mode-name: "LC"
  698. + c-basic-offset: 8
  699. + tab-width: 8
  700. + fill-column: 120
  701. + End:
  702. +*/
  703. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/block_alloc.c linux-4.14.2/fs/reiser4/block_alloc.c
  704. --- linux-4.14.2.orig/fs/reiser4/block_alloc.c 1970-01-01 01:00:00.000000000 +0100
  705. +++ linux-4.14.2/fs/reiser4/block_alloc.c 2017-11-26 22:13:09.000000000 +0100
  706. @@ -0,0 +1,1176 @@
  707. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  708. +reiser4/README */
  709. +
  710. +#include "debug.h"
  711. +#include "dformat.h"
  712. +#include "plugin/plugin.h"
  713. +#include "txnmgr.h"
  714. +#include "znode.h"
  715. +#include "block_alloc.h"
  716. +#include "tree.h"
  717. +#include "super.h"
  718. +#include "discard.h"
  719. +
  720. +#include <linux/types.h> /* for __u?? */
  721. +#include <linux/fs.h> /* for struct super_block */
  722. +#include <linux/spinlock.h>
  723. +
  724. +/* THE REISER4 DISK SPACE RESERVATION SCHEME. */
  725. +
  726. +/* We need to be able to reserve enough disk space to ensure that an atomic
  727. + operation will have enough disk space to flush (see flush.c and
  728. + http://namesys.com/v4/v4.html) and commit it once it is started.
  729. +
  730. + In our design a call for reserving disk space may fail but not an actual
  731. + block allocation.
  732. +
  733. + All free blocks, already allocated blocks, and all kinds of reserved blocks
  734. + are counted in different per-fs block counters.
  735. +
  736. + A reiser4 super block's set of block counters currently is:
  737. +
  738. + free -- free blocks,
  739. + used -- already allocated blocks,
  740. +
  741. + grabbed -- initially reserved for performing an fs operation, those blocks
  742. + are taken from free blocks, then grabbed disk space leaks from grabbed
  743. + blocks counter to other counters like "fake allocated", "flush
  744. + reserved", "used", the rest of not used grabbed space is returned to
  745. + free space at the end of fs operation;
  746. +
  747. + fake allocated -- counts all nodes without real disk block numbers assigned,
  748. + we have separate accounting for formatted and unformatted
  749. + nodes (for easier debugging);
  750. +
  751. + flush reserved -- disk space needed for flushing and committing an atom.
  752. + Each dirty already allocated block could be written as a
  753. + part of atom's overwrite set or as a part of atom's
  754. + relocate set. In both case one additional block is needed,
  755. + it is used as a wandered block if we do overwrite or as a
  756. + new location for a relocated block.
  757. +
  758. + In addition, blocks in some states are counted on per-thread and per-atom
  759. + basis. A reiser4 context has a counter of blocks grabbed by this transaction
  760. + and the sb's grabbed blocks counter is a sum of grabbed blocks counter values
  761. + of each reiser4 context. Each reiser4 atom has a counter of "flush reserved"
  762. + blocks, which are reserved for flush processing and atom commit. */
  763. +
  764. +/* AN EXAMPLE: suppose we insert new item to the reiser4 tree. We estimate
  765. + number of blocks to grab for most expensive case of balancing when the leaf
  766. + node we insert new item to gets split and new leaf node is allocated.
  767. +
  768. + So, we need to grab blocks for
  769. +
  770. + 1) one block for possible dirtying the node we insert an item to. That block
  771. + would be used for node relocation at flush time or for allocating of a
  772. + wandered one, it depends what will be a result (what set, relocate or
  773. + overwrite the node gets assigned to) of the node processing by the flush
  774. + algorithm.
  775. +
  776. + 2) one block for either allocating a new node, or dirtying of right or left
  777. + clean neighbor, only one case may happen.
  778. +
  779. + VS-FIXME-HANS: why can only one case happen? I would expect to see dirtying
  780. + of left neighbor, right neighbor, current node, and creation of new node.
  781. + Have I forgotten something? email me.
  782. +
  783. + These grabbed blocks are counted in both reiser4 context "grabbed blocks"
  784. + counter and in the fs-wide one (both ctx->grabbed_blocks and
  785. + sbinfo->blocks_grabbed get incremented by 2), sb's free blocks counter is
  786. + decremented by 2.
  787. +
  788. + Suppose both two blocks were spent for dirtying of an already allocated clean
  789. + node (one block went from "grabbed" to "flush reserved") and for new block
  790. + allocating (one block went from "grabbed" to "fake allocated formatted").
  791. +
  792. + Inserting of a child pointer to the parent node caused parent node to be
  793. + split, the balancing code takes care about this grabbing necessary space
  794. + immediately by calling reiser4_grab with BA_RESERVED flag set which means
  795. + "can use the 5% reserved disk space".
  796. +
  797. + At this moment insertion completes and grabbed blocks (if they were not used)
  798. + should be returned to the free space counter.
  799. +
  800. + However the atom life-cycle is not completed. The atom had one "flush
  801. + reserved" block added by our insertion and the new fake allocated node is
  802. + counted as a "fake allocated formatted" one. The atom has to be fully
  803. + processed by flush before commit. Suppose that the flush moved the first,
  804. + already allocated node to the atom's overwrite list, the new fake allocated
  805. + node, obviously, went into the atom relocate set. The reiser4 flush
  806. + allocates the new node using one unit from "fake allocated formatted"
  807. + counter, the log writer uses one from "flush reserved" for wandered block
  808. + allocation.
  809. +
  810. + And, it is not the end. When the wandered block is deallocated after the
  811. + atom gets fully played (see wander.c for term description), the disk space
  812. + occupied for it is returned to free blocks. */
  813. +
  814. +/* BLOCK NUMBERS */
  815. +
  816. +/* Any reiser4 node has a block number assigned to it. We use these numbers for
  817. + indexing in hash tables, so if a block has not yet been assigned a location
  818. + on disk we need to give it a temporary fake block number.
  819. +
  820. + Current implementation of reiser4 uses 64-bit integers for block numbers. We
  821. + use highest bit in 64-bit block number to distinguish fake and real block
  822. + numbers. So, only 63 bits may be used to addressing of real device
  823. + blocks. That "fake" block numbers space is divided into subspaces of fake
  824. + block numbers for data blocks and for shadow (working) bitmap blocks.
  825. +
  826. + Fake block numbers for data blocks are generated by a cyclic counter, which
  827. + gets incremented after each real block allocation. We assume that it is
  828. + impossible to overload this counter during one transaction life. */
  829. +
  830. +/* Initialize a blocknr hint. */
  831. +void reiser4_blocknr_hint_init(reiser4_blocknr_hint * hint)
  832. +{
  833. + memset(hint, 0, sizeof(reiser4_blocknr_hint));
  834. +}
  835. +
  836. +/* Release any resources of a blocknr hint. */
  837. +void reiser4_blocknr_hint_done(reiser4_blocknr_hint * hint UNUSED_ARG)
  838. +{
  839. +/* No resources should be freed in current blocknr_hint implementation. */
  840. +}
  841. +
  842. +/* see above for explanation of fake block number. */
  843. +/* Audited by: green(2002.06.11) */
  844. +int reiser4_blocknr_is_fake(const reiser4_block_nr * da)
  845. +{
  846. + /* The reason for not simply returning result of '&' operation is that
  847. + while return value is (possibly 32bit) int, the reiser4_block_nr is
  848. + at least 64 bits long, and high bit (which is the only possible
  849. + non zero bit after the masking) would be stripped off */
  850. + return (*da & REISER4_FAKE_BLOCKNR_BIT_MASK) ? 1 : 0;
  851. +}
  852. +
  853. +/* Static functions for <reiser4 super block>/<reiser4 context> block counters
  854. + arithmetic. Mostly, they are isolated to not to code same assertions in
  855. + several places. */
  856. +static void sub_from_ctx_grabbed(reiser4_context * ctx, __u64 count)
  857. +{
  858. + BUG_ON(ctx->grabbed_blocks < count);
  859. + assert("zam-527", ctx->grabbed_blocks >= count);
  860. + ctx->grabbed_blocks -= count;
  861. +}
  862. +
  863. +static void add_to_ctx_grabbed(reiser4_context * ctx, __u64 count)
  864. +{
  865. + ctx->grabbed_blocks += count;
  866. +}
  867. +
  868. +static void sub_from_sb_grabbed(reiser4_super_info_data * sbinfo, __u64 count)
  869. +{
  870. + assert("zam-525", sbinfo->blocks_grabbed >= count);
  871. + sbinfo->blocks_grabbed -= count;
  872. +}
  873. +
  874. +/* Decrease the counter of block reserved for flush in super block. */
  875. +static void
  876. +sub_from_sb_flush_reserved(reiser4_super_info_data * sbinfo, __u64 count)
  877. +{
  878. + assert("vpf-291", sbinfo->blocks_flush_reserved >= count);
  879. + sbinfo->blocks_flush_reserved -= count;
  880. +}
  881. +
  882. +static void
  883. +sub_from_sb_fake_allocated(reiser4_super_info_data * sbinfo, __u64 count,
  884. + reiser4_ba_flags_t flags)
  885. +{
  886. + if (flags & BA_FORMATTED) {
  887. + assert("zam-806", sbinfo->blocks_fake_allocated >= count);
  888. + sbinfo->blocks_fake_allocated -= count;
  889. + } else {
  890. + assert("zam-528",
  891. + sbinfo->blocks_fake_allocated_unformatted >= count);
  892. + sbinfo->blocks_fake_allocated_unformatted -= count;
  893. + }
  894. +}
  895. +
  896. +static void sub_from_sb_used(reiser4_super_info_data * sbinfo, __u64 count)
  897. +{
  898. + assert("zam-530",
  899. + sbinfo->blocks_used >= count + sbinfo->min_blocks_used);
  900. + sbinfo->blocks_used -= count;
  901. +}
  902. +
  903. +static void
  904. +sub_from_cluster_reserved(reiser4_super_info_data * sbinfo, __u64 count)
  905. +{
  906. + assert("edward-501", sbinfo->blocks_clustered >= count);
  907. + sbinfo->blocks_clustered -= count;
  908. +}
  909. +
  910. +/* Increase the counter of block reserved for flush in atom. */
  911. +static void add_to_atom_flush_reserved_nolock(txn_atom * atom, __u32 count)
  912. +{
  913. + assert("zam-772", atom != NULL);
  914. + assert_spin_locked(&(atom->alock));
  915. + atom->flush_reserved += count;
  916. +}
  917. +
  918. +/* Decrease the counter of block reserved for flush in atom. */
  919. +static void sub_from_atom_flush_reserved_nolock(txn_atom * atom, __u32 count)
  920. +{
  921. + assert("zam-774", atom != NULL);
  922. + assert_spin_locked(&(atom->alock));
  923. + assert("nikita-2790", atom->flush_reserved >= count);
  924. + atom->flush_reserved -= count;
  925. +}
  926. +
  927. +/* super block has 6 counters: free, used, grabbed, fake allocated
  928. + (formatted and unformatted) and flush reserved. Their sum must be
  929. + number of blocks on a device. This function checks this */
  930. +int reiser4_check_block_counters(const struct super_block *super)
  931. +{
  932. + __u64 sum;
  933. +
  934. + sum = reiser4_grabbed_blocks(super) + reiser4_free_blocks(super) +
  935. + reiser4_data_blocks(super) + reiser4_fake_allocated(super) +
  936. + reiser4_fake_allocated_unformatted(super) + reiser4_flush_reserved(super) +
  937. + reiser4_clustered_blocks(super);
  938. + if (reiser4_block_count(super) != sum) {
  939. + printk("super block counters: "
  940. + "used %llu, free %llu, "
  941. + "grabbed %llu, fake allocated (formatetd %llu, unformatted %llu), "
  942. + "reserved %llu, clustered %llu, sum %llu, must be (block count) %llu\n",
  943. + (unsigned long long)reiser4_data_blocks(super),
  944. + (unsigned long long)reiser4_free_blocks(super),
  945. + (unsigned long long)reiser4_grabbed_blocks(super),
  946. + (unsigned long long)reiser4_fake_allocated(super),
  947. + (unsigned long long)
  948. + reiser4_fake_allocated_unformatted(super),
  949. + (unsigned long long)reiser4_flush_reserved(super),
  950. + (unsigned long long)reiser4_clustered_blocks(super),
  951. + (unsigned long long)sum,
  952. + (unsigned long long)reiser4_block_count(super));
  953. + return 0;
  954. + }
  955. + return 1;
  956. +}
  957. +
  958. +/* Adjust "working" free blocks counter for number of blocks we are going to
  959. + allocate. Record number of grabbed blocks in fs-wide and per-thread
  960. + counters. This function should be called before bitmap scanning or
  961. + allocating fake block numbers
  962. +
  963. + @super -- pointer to reiser4 super block;
  964. + @count -- number of blocks we reserve;
  965. +
  966. + @return -- 0 if success, -ENOSPC, if all
  967. + free blocks are preserved or already allocated.
  968. +*/
  969. +
  970. +static int
  971. +reiser4_grab(reiser4_context * ctx, __u64 count, reiser4_ba_flags_t flags)
  972. +{
  973. + __u64 free_blocks;
  974. + int ret = 0, use_reserved = flags & BA_RESERVED;
  975. + reiser4_super_info_data *sbinfo;
  976. +
  977. + assert("vs-1276", ctx == get_current_context());
  978. +
  979. + /* Do not grab anything on ro-mounted fs. */
  980. + if (rofs_super(ctx->super)) {
  981. + ctx->grab_enabled = 0;
  982. + return 0;
  983. + }
  984. +
  985. + sbinfo = get_super_private(ctx->super);
  986. +
  987. + spin_lock_reiser4_super(sbinfo);
  988. +
  989. + free_blocks = sbinfo->blocks_free;
  990. +
  991. + if ((use_reserved && free_blocks < count) ||
  992. + (!use_reserved && free_blocks < count + sbinfo->blocks_reserved)) {
  993. + ret = RETERR(-ENOSPC);
  994. + goto unlock_and_ret;
  995. + }
  996. +
  997. + add_to_ctx_grabbed(ctx, count);
  998. +
  999. + sbinfo->blocks_grabbed += count;
  1000. + sbinfo->blocks_free -= count;
  1001. +
  1002. +#if REISER4_DEBUG
  1003. + if (ctx->grabbed_initially == 0)
  1004. + ctx->grabbed_initially = count;
  1005. +#endif
  1006. +
  1007. + assert("nikita-2986", reiser4_check_block_counters(ctx->super));
  1008. +
  1009. + /* disable grab space in current context */
  1010. + ctx->grab_enabled = 0;
  1011. +
  1012. +unlock_and_ret:
  1013. + spin_unlock_reiser4_super(sbinfo);
  1014. +
  1015. + return ret;
  1016. +}
  1017. +
  1018. +int reiser4_grab_space(__u64 count, reiser4_ba_flags_t flags)
  1019. +{
  1020. + int ret;
  1021. + reiser4_context *ctx;
  1022. +
  1023. + assert("nikita-2964", ergo(flags & BA_CAN_COMMIT,
  1024. + lock_stack_isclean(get_current_lock_stack
  1025. + ())));
  1026. + ctx = get_current_context();
  1027. + if (!(flags & BA_FORCE) && !is_grab_enabled(ctx))
  1028. + return 0;
  1029. +
  1030. + ret = reiser4_grab(ctx, count, flags);
  1031. + if (ret == -ENOSPC) {
  1032. +
  1033. + /* Trying to commit the all transactions if BA_CAN_COMMIT flag
  1034. + present */
  1035. + if (flags & BA_CAN_COMMIT) {
  1036. + txnmgr_force_commit_all(ctx->super, 0);
  1037. + ctx->grab_enabled = 1;
  1038. + ret = reiser4_grab(ctx, count, flags);
  1039. + }
  1040. + }
  1041. + /*
  1042. + * allocation from reserved pool cannot fail. This is severe error.
  1043. + */
  1044. + assert("nikita-3005", ergo(flags & BA_RESERVED, ret == 0));
  1045. + return ret;
  1046. +}
  1047. +
  1048. +/*
  1049. + * SPACE RESERVED FOR UNLINK/TRUNCATE
  1050. + *
  1051. + * Unlink and truncate require space in transaction (to update stat data, at
  1052. + * least). But we don't want rm(1) to fail with "No space on device" error.
  1053. + *
  1054. + * Solution is to reserve 5% of disk space for truncates and
  1055. + * unlinks. Specifically, normal space grabbing requests don't grab space from
  1056. + * reserved area. Only requests with BA_RESERVED bit in flags are allowed to
  1057. + * drain it. Per super block delete mutex is used to allow only one
  1058. + * thread at a time to grab from reserved area.
  1059. + *
  1060. + * Grabbing from reserved area should always be performed with BA_CAN_COMMIT
  1061. + * flag.
  1062. + *
  1063. + */
  1064. +
  1065. +int reiser4_grab_reserved(struct super_block *super,
  1066. + __u64 count, reiser4_ba_flags_t flags)
  1067. +{
  1068. + reiser4_super_info_data *sbinfo = get_super_private(super);
  1069. +
  1070. + assert("nikita-3175", flags & BA_CAN_COMMIT);
  1071. +
  1072. + /* Check the delete mutex already taken by us, we assume that
  1073. + * reading of machine word is atomic. */
  1074. + if (sbinfo->delete_mutex_owner == current) {
  1075. + if (reiser4_grab_space
  1076. + (count, (flags | BA_RESERVED) & ~BA_CAN_COMMIT)) {
  1077. + warning("zam-1003",
  1078. + "nested call of grab_reserved fails count=(%llu)",
  1079. + (unsigned long long)count);
  1080. + reiser4_release_reserved(super);
  1081. + return RETERR(-ENOSPC);
  1082. + }
  1083. + return 0;
  1084. + }
  1085. +
  1086. + if (reiser4_grab_space(count, flags)) {
  1087. + mutex_lock(&sbinfo->delete_mutex);
  1088. + assert("nikita-2929", sbinfo->delete_mutex_owner == NULL);
  1089. + sbinfo->delete_mutex_owner = current;
  1090. +
  1091. + if (reiser4_grab_space(count, flags | BA_RESERVED)) {
  1092. + warning("zam-833",
  1093. + "reserved space is not enough (%llu)",
  1094. + (unsigned long long)count);
  1095. + reiser4_release_reserved(super);
  1096. + return RETERR(-ENOSPC);
  1097. + }
  1098. + }
  1099. + return 0;
  1100. +}
  1101. +
  1102. +void reiser4_release_reserved(struct super_block *super)
  1103. +{
  1104. + reiser4_super_info_data *info;
  1105. +
  1106. + info = get_super_private(super);
  1107. + if (info->delete_mutex_owner == current) {
  1108. + info->delete_mutex_owner = NULL;
  1109. + mutex_unlock(&info->delete_mutex);
  1110. + }
  1111. +}
  1112. +
  1113. +static reiser4_super_info_data *grabbed2fake_allocated_head(int count)
  1114. +{
  1115. + reiser4_context *ctx;
  1116. + reiser4_super_info_data *sbinfo;
  1117. +
  1118. + ctx = get_current_context();
  1119. + sub_from_ctx_grabbed(ctx, count);
  1120. +
  1121. + sbinfo = get_super_private(ctx->super);
  1122. + spin_lock_reiser4_super(sbinfo);
  1123. +
  1124. + sub_from_sb_grabbed(sbinfo, count);
  1125. + /* return sbinfo locked */
  1126. + return sbinfo;
  1127. +}
  1128. +
  1129. +/* is called after @count fake block numbers are allocated and pointer to
  1130. + those blocks are inserted into tree. */
  1131. +static void grabbed2fake_allocated_formatted(void)
  1132. +{
  1133. + reiser4_super_info_data *sbinfo;
  1134. +
  1135. + sbinfo = grabbed2fake_allocated_head(1);
  1136. + sbinfo->blocks_fake_allocated++;
  1137. +
  1138. + assert("vs-922", reiser4_check_block_counters(reiser4_get_current_sb()));
  1139. +
  1140. + spin_unlock_reiser4_super(sbinfo);
  1141. +}
  1142. +
  1143. +/**
  1144. + * grabbed2fake_allocated_unformatted
  1145. + * @count:
  1146. + *
  1147. + */
  1148. +static void grabbed2fake_allocated_unformatted(int count)
  1149. +{
  1150. + reiser4_super_info_data *sbinfo;
  1151. +
  1152. + sbinfo = grabbed2fake_allocated_head(count);
  1153. + sbinfo->blocks_fake_allocated_unformatted += count;
  1154. +
  1155. + assert("vs-9221", reiser4_check_block_counters(reiser4_get_current_sb()));
  1156. +
  1157. + spin_unlock_reiser4_super(sbinfo);
  1158. +}
  1159. +
  1160. +void grabbed2cluster_reserved(int count)
  1161. +{
  1162. + reiser4_context *ctx;
  1163. + reiser4_super_info_data *sbinfo;
  1164. +
  1165. + ctx = get_current_context();
  1166. + sub_from_ctx_grabbed(ctx, count);
  1167. +
  1168. + sbinfo = get_super_private(ctx->super);
  1169. + spin_lock_reiser4_super(sbinfo);
  1170. +
  1171. + sub_from_sb_grabbed(sbinfo, count);
  1172. + sbinfo->blocks_clustered += count;
  1173. +
  1174. + assert("edward-504", reiser4_check_block_counters(ctx->super));
  1175. +
  1176. + spin_unlock_reiser4_super(sbinfo);
  1177. +}
  1178. +
  1179. +void cluster_reserved2grabbed(int count)
  1180. +{
  1181. + reiser4_context *ctx;
  1182. + reiser4_super_info_data *sbinfo;
  1183. +
  1184. + ctx = get_current_context();
  1185. +
  1186. + sbinfo = get_super_private(ctx->super);
  1187. + spin_lock_reiser4_super(sbinfo);
  1188. +
  1189. + sub_from_cluster_reserved(sbinfo, count);
  1190. + sbinfo->blocks_grabbed += count;
  1191. +
  1192. + assert("edward-505", reiser4_check_block_counters(ctx->super));
  1193. +
  1194. + spin_unlock_reiser4_super(sbinfo);
  1195. + add_to_ctx_grabbed(ctx, count);
  1196. +}
  1197. +
  1198. +void cluster_reserved2free(int count)
  1199. +{
  1200. + reiser4_context *ctx;
  1201. + reiser4_super_info_data *sbinfo;
  1202. +
  1203. + ctx = get_current_context();
  1204. + sbinfo = get_super_private(ctx->super);
  1205. +
  1206. + cluster_reserved2grabbed(count);
  1207. + grabbed2free(ctx, sbinfo, count);
  1208. +}
  1209. +
  1210. +static DEFINE_SPINLOCK(fake_lock);
  1211. +static reiser4_block_nr fake_gen = 0;
  1212. +
  1213. +/**
  1214. + * assign_fake_blocknr
  1215. + * @blocknr:
  1216. + * @count:
  1217. + *
  1218. + * Obtain a fake block number for new node which will be used to refer to
  1219. + * this newly allocated node until real allocation is done.
  1220. + */
  1221. +static void assign_fake_blocknr(reiser4_block_nr *blocknr, int count)
  1222. +{
  1223. + spin_lock(&fake_lock);
  1224. + *blocknr = fake_gen;
  1225. + fake_gen += count;
  1226. + spin_unlock(&fake_lock);
  1227. +
  1228. + BUG_ON(*blocknr & REISER4_BLOCKNR_STATUS_BIT_MASK);
  1229. + /**blocknr &= ~REISER4_BLOCKNR_STATUS_BIT_MASK;*/
  1230. + *blocknr |= REISER4_UNALLOCATED_STATUS_VALUE;
  1231. + assert("zam-394", zlook(current_tree, blocknr) == NULL);
  1232. +}
  1233. +
  1234. +int assign_fake_blocknr_formatted(reiser4_block_nr * blocknr)
  1235. +{
  1236. + assign_fake_blocknr(blocknr, 1);
  1237. + grabbed2fake_allocated_formatted();
  1238. + return 0;
  1239. +}
  1240. +
  1241. +/**
  1242. + * fake_blocknrs_unformatted
  1243. + * @count: number of fake numbers to get
  1244. + *
  1245. + * Allocates @count fake block numbers which will be assigned to jnodes
  1246. + */
  1247. +reiser4_block_nr fake_blocknr_unformatted(int count)
  1248. +{
  1249. + reiser4_block_nr blocknr;
  1250. +
  1251. + assign_fake_blocknr(&blocknr, count);
  1252. + grabbed2fake_allocated_unformatted(count);
  1253. +
  1254. + return blocknr;
  1255. +}
  1256. +
  1257. +/* adjust sb block counters, if real (on-disk) block allocation immediately
  1258. + follows grabbing of free disk space. */
  1259. +static void grabbed2used(reiser4_context *ctx, reiser4_super_info_data *sbinfo,
  1260. + __u64 count)
  1261. +{
  1262. + sub_from_ctx_grabbed(ctx, count);
  1263. +
  1264. + spin_lock_reiser4_super(sbinfo);
  1265. +
  1266. + sub_from_sb_grabbed(sbinfo, count);
  1267. + sbinfo->blocks_used += count;
  1268. +
  1269. + assert("nikita-2679", reiser4_check_block_counters(ctx->super));
  1270. +
  1271. + spin_unlock_reiser4_super(sbinfo);
  1272. +}
  1273. +
  1274. +/* adjust sb block counters when @count unallocated blocks get mapped to disk */
  1275. +static void fake_allocated2used(reiser4_super_info_data *sbinfo, __u64 count,
  1276. + reiser4_ba_flags_t flags)
  1277. +{
  1278. + spin_lock_reiser4_super(sbinfo);
  1279. +
  1280. + sub_from_sb_fake_allocated(sbinfo, count, flags);
  1281. + sbinfo->blocks_used += count;
  1282. +
  1283. + assert("nikita-2680",
  1284. + reiser4_check_block_counters(reiser4_get_current_sb()));
  1285. +
  1286. + spin_unlock_reiser4_super(sbinfo);
  1287. +}
  1288. +
  1289. +static void flush_reserved2used(txn_atom * atom, __u64 count)
  1290. +{
  1291. + reiser4_super_info_data *sbinfo;
  1292. +
  1293. + assert("zam-787", atom != NULL);
  1294. + assert_spin_locked(&(atom->alock));
  1295. +
  1296. + sub_from_atom_flush_reserved_nolock(atom, (__u32) count);
  1297. +
  1298. + sbinfo = get_current_super_private();
  1299. + spin_lock_reiser4_super(sbinfo);
  1300. +
  1301. + sub_from_sb_flush_reserved(sbinfo, count);
  1302. + sbinfo->blocks_used += count;
  1303. +
  1304. + assert("zam-789",
  1305. + reiser4_check_block_counters(reiser4_get_current_sb()));
  1306. +
  1307. + spin_unlock_reiser4_super(sbinfo);
  1308. +}
  1309. +
  1310. +/* update the per fs blocknr hint default value. */
  1311. +void
  1312. +update_blocknr_hint_default(const struct super_block *s,
  1313. + const reiser4_block_nr * block)
  1314. +{
  1315. + reiser4_super_info_data *sbinfo = get_super_private(s);
  1316. +
  1317. + assert("nikita-3342", !reiser4_blocknr_is_fake(block));
  1318. +
  1319. + spin_lock_reiser4_super(sbinfo);
  1320. + if (*block < sbinfo->block_count) {
  1321. + sbinfo->blocknr_hint_default = *block;
  1322. + } else {
  1323. + warning("zam-676",
  1324. + "block number %llu is too large to be used in a blocknr hint\n",
  1325. + (unsigned long long)*block);
  1326. + dump_stack();
  1327. + DEBUGON(1);
  1328. + }
  1329. + spin_unlock_reiser4_super(sbinfo);
  1330. +}
  1331. +
  1332. +/* get current value of the default blocknr hint. */
  1333. +void get_blocknr_hint_default(reiser4_block_nr * result)
  1334. +{
  1335. + reiser4_super_info_data *sbinfo = get_current_super_private();
  1336. +
  1337. + spin_lock_reiser4_super(sbinfo);
  1338. + *result = sbinfo->blocknr_hint_default;
  1339. + assert("zam-677", *result < sbinfo->block_count);
  1340. + spin_unlock_reiser4_super(sbinfo);
  1341. +}
  1342. +
  1343. +/* Allocate "real" disk blocks by calling a proper space allocation plugin
  1344. + * method. Blocks are allocated in one contiguous disk region. The plugin
  1345. + * independent part accounts blocks by subtracting allocated amount from grabbed
  1346. + * or fake block counter and add the same amount to the counter of allocated
  1347. + * blocks.
  1348. + *
  1349. + * @hint -- a reiser4 blocknr hint object which contains further block
  1350. + * allocation hints and parameters (search start, a stage of block
  1351. + * which will be mapped to disk, etc.),
  1352. + * @blk -- an out parameter for the beginning of the allocated region,
  1353. + * @len -- in/out parameter, it should contain the maximum number of allocated
  1354. + * blocks, after block allocation completes, it contains the length of
  1355. + * allocated disk region.
  1356. + * @flags -- see reiser4_ba_flags_t description.
  1357. + *
  1358. + * @return -- 0 if success, error code otherwise.
  1359. + */
  1360. +int
  1361. +reiser4_alloc_blocks(reiser4_blocknr_hint * hint, reiser4_block_nr * blk,
  1362. + reiser4_block_nr * len, reiser4_ba_flags_t flags)
  1363. +{
  1364. + __u64 needed = *len;
  1365. + reiser4_context *ctx;
  1366. + reiser4_super_info_data *sbinfo;
  1367. + int ret;
  1368. +
  1369. + assert("zam-986", hint != NULL);
  1370. +
  1371. + ctx = get_current_context();
  1372. + sbinfo = get_super_private(ctx->super);
  1373. +
  1374. + /* For write-optimized data we use default search start value, which is
  1375. + * close to last write location. */
  1376. + if (flags & BA_USE_DEFAULT_SEARCH_START)
  1377. + get_blocknr_hint_default(&hint->blk);
  1378. +
  1379. + /* VITALY: allocator should grab this for internal/tx-lists/similar
  1380. + only. */
  1381. +/* VS-FIXME-HANS: why is this comment above addressed to vitaly (from vitaly)?*/
  1382. + if (hint->block_stage == BLOCK_NOT_COUNTED) {
  1383. + ret = reiser4_grab_space_force(*len, flags);
  1384. + if (ret != 0)
  1385. + return ret;
  1386. + }
  1387. +
  1388. + ret =
  1389. + sa_alloc_blocks(reiser4_get_space_allocator(ctx->super),
  1390. + hint, (int)needed, blk, len);
  1391. +
  1392. + if (!ret) {
  1393. + assert("zam-680", *blk < reiser4_block_count(ctx->super));
  1394. + assert("zam-681",
  1395. + *blk + *len <= reiser4_block_count(ctx->super));
  1396. +
  1397. + if (flags & BA_PERMANENT) {
  1398. + /* we assume that current atom exists at this moment */
  1399. + txn_atom *atom = get_current_atom_locked();
  1400. + atom->nr_blocks_allocated += *len;
  1401. + spin_unlock_atom(atom);
  1402. + }
  1403. +
  1404. + switch (hint->block_stage) {
  1405. + case BLOCK_NOT_COUNTED:
  1406. + case BLOCK_GRABBED:
  1407. + grabbed2used(ctx, sbinfo, *len);
  1408. + break;
  1409. + case BLOCK_UNALLOCATED:
  1410. + fake_allocated2used(sbinfo, *len, flags);
  1411. + break;
  1412. + case BLOCK_FLUSH_RESERVED:
  1413. + {
  1414. + txn_atom *atom = get_current_atom_locked();
  1415. + flush_reserved2used(atom, *len);
  1416. + spin_unlock_atom(atom);
  1417. + }
  1418. + break;
  1419. + default:
  1420. + impossible("zam-531", "wrong block stage");
  1421. + }
  1422. + } else {
  1423. + assert("zam-821",
  1424. + ergo(hint->max_dist == 0
  1425. + && !hint->backward, ret != -ENOSPC));
  1426. + if (hint->block_stage == BLOCK_NOT_COUNTED)
  1427. + grabbed2free(ctx, sbinfo, needed);
  1428. + }
  1429. +
  1430. + return ret;
  1431. +}
  1432. +
  1433. +/**
  1434. + * ask block allocator for some unformatted blocks
  1435. + */
  1436. +void allocate_blocks_unformatted(reiser4_blocknr_hint *preceder,
  1437. + reiser4_block_nr wanted_count,
  1438. + reiser4_block_nr *first_allocated,
  1439. + reiser4_block_nr *allocated,
  1440. + block_stage_t block_stage)
  1441. +{
  1442. + *allocated = wanted_count;
  1443. + preceder->max_dist = 0; /* scan whole disk, if needed */
  1444. +
  1445. + /* that number of blocks (wanted_count) is either in UNALLOCATED or in GRABBED */
  1446. + preceder->block_stage = block_stage;
  1447. +
  1448. + /* FIXME: we do not handle errors here now */
  1449. + check_me("vs-420",
  1450. + reiser4_alloc_blocks(preceder, first_allocated, allocated,
  1451. + BA_PERMANENT) == 0);
  1452. + /* update flush_pos's preceder to last allocated block number */
  1453. + preceder->blk = *first_allocated + *allocated - 1;
  1454. +}
  1455. +
  1456. +/* used -> fake_allocated -> grabbed -> free */
  1457. +
  1458. +/* adjust sb block counters when @count unallocated blocks get unmapped from
  1459. + disk */
  1460. +static void
  1461. +used2fake_allocated(reiser4_super_info_data * sbinfo, __u64 count,
  1462. + int formatted)
  1463. +{
  1464. + spin_lock_reiser4_super(sbinfo);
  1465. +
  1466. + if (formatted)
  1467. + sbinfo->blocks_fake_allocated += count;
  1468. + else
  1469. + sbinfo->blocks_fake_allocated_unformatted += count;
  1470. +
  1471. + sub_from_sb_used(sbinfo, count);
  1472. +
  1473. + assert("nikita-2681",
  1474. + reiser4_check_block_counters(reiser4_get_current_sb()));
  1475. +
  1476. + spin_unlock_reiser4_super(sbinfo);
  1477. +}
  1478. +
  1479. +static void
  1480. +used2flush_reserved(reiser4_super_info_data * sbinfo, txn_atom * atom,
  1481. + __u64 count, reiser4_ba_flags_t flags UNUSED_ARG)
  1482. +{
  1483. + assert("nikita-2791", atom != NULL);
  1484. + assert_spin_locked(&(atom->alock));
  1485. +
  1486. + add_to_atom_flush_reserved_nolock(atom, (__u32) count);
  1487. +
  1488. + spin_lock_reiser4_super(sbinfo);
  1489. +
  1490. + sbinfo->blocks_flush_reserved += count;
  1491. + /*add_to_sb_flush_reserved(sbinfo, count); */
  1492. + sub_from_sb_used(sbinfo, count);
  1493. +
  1494. + assert("nikita-2681",
  1495. + reiser4_check_block_counters(reiser4_get_current_sb()));
  1496. +
  1497. + spin_unlock_reiser4_super(sbinfo);
  1498. +}
  1499. +
  1500. +/* disk space, virtually used by fake block numbers is counted as "grabbed"
  1501. + again. */
  1502. +static void
  1503. +fake_allocated2grabbed(reiser4_context * ctx, reiser4_super_info_data * sbinfo,
  1504. + __u64 count, reiser4_ba_flags_t flags)
  1505. +{
  1506. + add_to_ctx_grabbed(ctx, count);
  1507. +
  1508. + spin_lock_reiser4_super(sbinfo);
  1509. +
  1510. + assert("nikita-2682", reiser4_check_block_counters(ctx->super));
  1511. +
  1512. + sbinfo->blocks_grabbed += count;
  1513. + sub_from_sb_fake_allocated(sbinfo, count, flags & BA_FORMATTED);
  1514. +
  1515. + assert("nikita-2683", reiser4_check_block_counters(ctx->super));
  1516. +
  1517. + spin_unlock_reiser4_super(sbinfo);
  1518. +}
  1519. +
  1520. +void fake_allocated2free(__u64 count, reiser4_ba_flags_t flags)
  1521. +{
  1522. + reiser4_context *ctx;
  1523. + reiser4_super_info_data *sbinfo;
  1524. +
  1525. + ctx = get_current_context();
  1526. + sbinfo = get_super_private(ctx->super);
  1527. +
  1528. + fake_allocated2grabbed(ctx, sbinfo, count, flags);
  1529. + grabbed2free(ctx, sbinfo, count);
  1530. +}
  1531. +
  1532. +void grabbed2free_mark(__u64 mark)
  1533. +{
  1534. + reiser4_context *ctx;
  1535. + reiser4_super_info_data *sbinfo;
  1536. +
  1537. + ctx = get_current_context();
  1538. + sbinfo = get_super_private(ctx->super);
  1539. +
  1540. + assert("nikita-3007", (__s64) mark >= 0);
  1541. + assert("nikita-3006", ctx->grabbed_blocks >= mark);
  1542. + grabbed2free(ctx, sbinfo, ctx->grabbed_blocks - mark);
  1543. +}
  1544. +
  1545. +/**
  1546. + * grabbed2free - adjust grabbed and free block counters
  1547. + * @ctx: context to update grabbed block counter of
  1548. + * @sbinfo: super block to update grabbed and free block counters of
  1549. + * @count: number of blocks to adjust counters by
  1550. + *
  1551. + * Decreases context's and per filesystem's counters of grabbed
  1552. + * blocks. Increases per filesystem's counter of free blocks.
  1553. + */
  1554. +void grabbed2free(reiser4_context *ctx, reiser4_super_info_data *sbinfo,
  1555. + __u64 count)
  1556. +{
  1557. + sub_from_ctx_grabbed(ctx, count);
  1558. +
  1559. + spin_lock_reiser4_super(sbinfo);
  1560. +
  1561. + sub_from_sb_grabbed(sbinfo, count);
  1562. + sbinfo->blocks_free += count;
  1563. + assert("nikita-2684", reiser4_check_block_counters(ctx->super));
  1564. +
  1565. + spin_unlock_reiser4_super(sbinfo);
  1566. +}
  1567. +
  1568. +void grabbed2flush_reserved_nolock(txn_atom * atom, __u64 count)
  1569. +{
  1570. + reiser4_context *ctx;
  1571. + reiser4_super_info_data *sbinfo;
  1572. +
  1573. + assert("vs-1095", atom);
  1574. +
  1575. + ctx = get_current_context();
  1576. + sbinfo = get_super_private(ctx->super);
  1577. +
  1578. + sub_from_ctx_grabbed(ctx, count);
  1579. +
  1580. + add_to_atom_flush_reserved_nolock(atom, count);
  1581. +
  1582. + spin_lock_reiser4_super(sbinfo);
  1583. +
  1584. + sbinfo->blocks_flush_reserved += count;
  1585. + sub_from_sb_grabbed(sbinfo, count);
  1586. +
  1587. + assert("vpf-292", reiser4_check_block_counters(ctx->super));
  1588. +
  1589. + spin_unlock_reiser4_super(sbinfo);
  1590. +}
  1591. +
  1592. +void grabbed2flush_reserved(__u64 count)
  1593. +{
  1594. + txn_atom *atom = get_current_atom_locked();
  1595. +
  1596. + grabbed2flush_reserved_nolock(atom, count);
  1597. +
  1598. + spin_unlock_atom(atom);
  1599. +}
  1600. +
  1601. +void flush_reserved2grabbed(txn_atom * atom, __u64 count)
  1602. +{
  1603. + reiser4_context *ctx;
  1604. + reiser4_super_info_data *sbinfo;
  1605. +
  1606. + assert("nikita-2788", atom != NULL);
  1607. + assert_spin_locked(&(atom->alock));
  1608. +
  1609. + ctx = get_current_context();
  1610. + sbinfo = get_super_private(ctx->super);
  1611. +
  1612. + add_to_ctx_grabbed(ctx, count);
  1613. +
  1614. + sub_from_atom_flush_reserved_nolock(atom, (__u32) count);
  1615. +
  1616. + spin_lock_reiser4_super(sbinfo);
  1617. +
  1618. + sbinfo->blocks_grabbed += count;
  1619. + sub_from_sb_flush_reserved(sbinfo, count);
  1620. +
  1621. + assert("vpf-292", reiser4_check_block_counters(ctx->super));
  1622. +
  1623. + spin_unlock_reiser4_super(sbinfo);
  1624. +}
  1625. +
  1626. +/**
  1627. + * all_grabbed2free - releases all blocks grabbed in context
  1628. + *
  1629. + * Decreases context's and super block's grabbed block counters by number of
  1630. + * blocks grabbed by current context and increases super block's free block
  1631. + * counter correspondingly.
  1632. + */
  1633. +void all_grabbed2free(void)
  1634. +{
  1635. + reiser4_context *ctx = get_current_context();
  1636. +
  1637. + grabbed2free(ctx, get_super_private(ctx->super), ctx->grabbed_blocks);
  1638. +}
  1639. +
  1640. +/* adjust sb block counters if real (on-disk) blocks do not become unallocated
  1641. + after freeing, @count blocks become "grabbed". */
  1642. +static void
  1643. +used2grabbed(reiser4_context * ctx, reiser4_super_info_data * sbinfo,
  1644. + __u64 count)
  1645. +{
  1646. + add_to_ctx_grabbed(ctx, count);
  1647. +
  1648. + spin_lock_reiser4_super(sbinfo);
  1649. +
  1650. + sbinfo->blocks_grabbed += count;
  1651. + sub_from_sb_used(sbinfo, count);
  1652. +
  1653. + assert("nikita-2685", reiser4_check_block_counters(ctx->super));
  1654. +
  1655. + spin_unlock_reiser4_super(sbinfo);
  1656. +}
  1657. +
  1658. +/* this used to be done through used2grabbed and grabbed2free*/
  1659. +static void used2free(reiser4_super_info_data * sbinfo, __u64 count)
  1660. +{
  1661. + spin_lock_reiser4_super(sbinfo);
  1662. +
  1663. + sbinfo->blocks_free += count;
  1664. + sub_from_sb_used(sbinfo, count);
  1665. +
  1666. + assert("nikita-2685",
  1667. + reiser4_check_block_counters(reiser4_get_current_sb()));
  1668. +
  1669. + spin_unlock_reiser4_super(sbinfo);
  1670. +}
  1671. +
  1672. +/* check "allocated" state of given block range */
  1673. +int
  1674. +reiser4_check_blocks(const reiser4_block_nr * start,
  1675. + const reiser4_block_nr * len, int desired)
  1676. +{
  1677. + return sa_check_blocks(start, len, desired);
  1678. +}
  1679. +
  1680. +/* Blocks deallocation function may do an actual deallocation through space
  1681. + plugin allocation or store deleted block numbers in atom's delete_set data
  1682. + structure depend on @defer parameter. */
  1683. +
  1684. +/* if BA_DEFER bit is not turned on, @target_stage means the stage of blocks
  1685. + which will be deleted from WORKING bitmap. They might be just unmapped from
  1686. + disk, or freed but disk space is still grabbed by current thread, or these
  1687. + blocks must not be counted in any reiser4 sb block counters,
  1688. + see block_stage_t comment */
  1689. +
  1690. +/* BA_FORMATTED bit is only used when BA_DEFER in not present: it is used to
  1691. + distinguish blocks allocated for unformatted and formatted nodes */
  1692. +
  1693. +int
  1694. +reiser4_dealloc_blocks(const reiser4_block_nr * start,
  1695. + const reiser4_block_nr * len,
  1696. + block_stage_t target_stage, reiser4_ba_flags_t flags)
  1697. +{
  1698. + txn_atom *atom = NULL;
  1699. + int ret;
  1700. + reiser4_context *ctx;
  1701. + reiser4_super_info_data *sbinfo;
  1702. + void *new_entry = NULL;
  1703. +
  1704. + ctx = get_current_context();
  1705. + sbinfo = get_super_private(ctx->super);
  1706. +
  1707. + if (REISER4_DEBUG) {
  1708. + assert("zam-431", *len != 0);
  1709. + assert("zam-432", *start != 0);
  1710. + assert("zam-558", !reiser4_blocknr_is_fake(start));
  1711. +
  1712. + spin_lock_reiser4_super(sbinfo);
  1713. + assert("zam-562", *start < sbinfo->block_count);
  1714. + spin_unlock_reiser4_super(sbinfo);
  1715. + }
  1716. +
  1717. + if (flags & BA_DEFER) {
  1718. + /*
  1719. + * These blocks will be later deallocated by apply_dset().
  1720. + * It is equivalent to a non-deferred deallocation with target
  1721. + * stage BLOCK_NOT_COUNTED.
  1722. + */
  1723. +
  1724. + /* store deleted block numbers in the atom's deferred delete set
  1725. + for further actual deletion */
  1726. + do {
  1727. + atom = get_current_atom_locked();
  1728. + assert("zam-430", atom != NULL);
  1729. +
  1730. + ret = atom_dset_deferred_add_extent(atom, &new_entry, start, len);
  1731. +
  1732. + if (ret == -ENOMEM)
  1733. + return ret;
  1734. +
  1735. + /* This loop might spin at most two times */
  1736. + } while (ret == -E_REPEAT);
  1737. +
  1738. + assert("zam-477", ret == 0);
  1739. + assert("zam-433", atom != NULL);
  1740. +
  1741. + spin_unlock_atom(atom);
  1742. +
  1743. + } else {
  1744. + assert("zam-425", get_current_super_private() != NULL);
  1745. + sa_dealloc_blocks(reiser4_get_space_allocator(ctx->super),
  1746. + *start, *len);
  1747. +
  1748. + if (flags & BA_PERMANENT) {
  1749. + /* These blocks were counted as allocated, we have to
  1750. + * revert it back if allocation is discarded. */
  1751. + txn_atom *atom = get_current_atom_locked();
  1752. + atom->nr_blocks_allocated -= *len;
  1753. + spin_unlock_atom(atom);
  1754. + }
  1755. +
  1756. + switch (target_stage) {
  1757. + case BLOCK_NOT_COUNTED:
  1758. + assert("vs-960", flags & BA_FORMATTED);
  1759. + /* VITALY: This is what was grabbed for
  1760. + internal/tx-lists/similar only */
  1761. + used2free(sbinfo, *len);
  1762. + break;
  1763. +
  1764. + case BLOCK_GRABBED:
  1765. + used2grabbed(ctx, sbinfo, *len);
  1766. + break;
  1767. +
  1768. + case BLOCK_UNALLOCATED:
  1769. + used2fake_allocated(sbinfo, *len, flags & BA_FORMATTED);
  1770. + break;
  1771. +
  1772. + case BLOCK_FLUSH_RESERVED:{
  1773. + txn_atom *atom;
  1774. +
  1775. + atom = get_current_atom_locked();
  1776. + used2flush_reserved(sbinfo, atom, *len,
  1777. + flags & BA_FORMATTED);
  1778. + spin_unlock_atom(atom);
  1779. + break;
  1780. + }
  1781. + default:
  1782. + impossible("zam-532", "wrong block stage");
  1783. + }
  1784. + }
  1785. +
  1786. + return 0;
  1787. +}
  1788. +
  1789. +/* wrappers for block allocator plugin methods */
  1790. +int reiser4_pre_commit_hook(void)
  1791. +{
  1792. + assert("zam-502", get_current_super_private() != NULL);
  1793. + sa_pre_commit_hook();
  1794. + return 0;
  1795. +}
  1796. +
  1797. +/* an actor which applies delete set to block allocator data */
  1798. +static int
  1799. +apply_dset(txn_atom * atom UNUSED_ARG, const reiser4_block_nr * a,
  1800. + const reiser4_block_nr * b, void *data UNUSED_ARG)
  1801. +{
  1802. + reiser4_context *ctx;
  1803. + reiser4_super_info_data *sbinfo;
  1804. +
  1805. + __u64 len = 1;
  1806. +
  1807. + ctx = get_current_context();
  1808. + sbinfo = get_super_private(ctx->super);
  1809. +
  1810. + assert("zam-877", atom->stage >= ASTAGE_PRE_COMMIT);
  1811. + assert("zam-552", sbinfo != NULL);
  1812. +
  1813. + if (b != NULL)
  1814. + len = *b;
  1815. +
  1816. + if (REISER4_DEBUG) {
  1817. + spin_lock_reiser4_super(sbinfo);
  1818. +
  1819. + assert("zam-554", *a < reiser4_block_count(ctx->super));
  1820. + assert("zam-555", *a + len <= reiser4_block_count(ctx->super));
  1821. +
  1822. + spin_unlock_reiser4_super(sbinfo);
  1823. + }
  1824. +
  1825. + sa_dealloc_blocks(&sbinfo->space_allocator, *a, len);
  1826. + /* adjust sb block counters */
  1827. + used2free(sbinfo, len);
  1828. + return 0;
  1829. +}
  1830. +
  1831. +void reiser4_post_commit_hook(void)
  1832. +{
  1833. +#ifdef REISER4_DEBUG
  1834. + txn_atom *atom;
  1835. +
  1836. + atom = get_current_atom_locked();
  1837. + assert("zam-452", atom->stage == ASTAGE_POST_COMMIT);
  1838. + spin_unlock_atom(atom);
  1839. +#endif
  1840. +
  1841. + assert("zam-504", get_current_super_private() != NULL);
  1842. + sa_post_commit_hook();
  1843. +}
  1844. +
  1845. +void reiser4_post_write_back_hook(void)
  1846. +{
  1847. + struct list_head discarded_set;
  1848. + txn_atom *atom;
  1849. + int ret;
  1850. +
  1851. + /* process and issue discard requests */
  1852. + blocknr_list_init (&discarded_set);
  1853. + do {
  1854. + atom = get_current_atom_locked();
  1855. + ret = discard_atom(atom, &discarded_set);
  1856. + } while (ret == -E_REPEAT);
  1857. +
  1858. + if (ret) {
  1859. + warning("intelfx-8", "discard atom failed (%d)", ret);
  1860. + }
  1861. +
  1862. + atom = get_current_atom_locked();
  1863. + discard_atom_post(atom, &discarded_set);
  1864. +
  1865. + /* do the block deallocation which was deferred
  1866. + until commit is done */
  1867. + atom_dset_deferred_apply(atom, apply_dset, NULL, 1);
  1868. +
  1869. + assert("zam-504", get_current_super_private() != NULL);
  1870. + sa_post_write_back_hook();
  1871. +}
  1872. +
  1873. +/*
  1874. + Local variables:
  1875. + c-indentation-style: "K&R"
  1876. + mode-name: "LC"
  1877. + c-basic-offset: 8
  1878. + tab-width: 8
  1879. + fill-column: 120
  1880. + scroll-step: 1
  1881. + End:
  1882. +*/
  1883. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/block_alloc.h linux-4.14.2/fs/reiser4/block_alloc.h
  1884. --- linux-4.14.2.orig/fs/reiser4/block_alloc.h 1970-01-01 01:00:00.000000000 +0100
  1885. +++ linux-4.14.2/fs/reiser4/block_alloc.h 2017-11-26 22:13:09.000000000 +0100
  1886. @@ -0,0 +1,177 @@
  1887. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  1888. +
  1889. +#if !defined(__FS_REISER4_BLOCK_ALLOC_H__)
  1890. +#define __FS_REISER4_BLOCK_ALLOC_H__
  1891. +
  1892. +#include "dformat.h"
  1893. +#include "forward.h"
  1894. +
  1895. +#include <linux/types.h> /* for __u?? */
  1896. +#include <linux/fs.h>
  1897. +
  1898. +/* Mask when is applied to given block number shows is that block number is a
  1899. + fake one */
  1900. +#define REISER4_FAKE_BLOCKNR_BIT_MASK 0x8000000000000000ULL
  1901. +/* Mask which isolates a type of object this fake block number was assigned
  1902. + to */
  1903. +#define REISER4_BLOCKNR_STATUS_BIT_MASK 0xC000000000000000ULL
  1904. +
  1905. +/*result after applying the REISER4_BLOCKNR_STATUS_BIT_MASK should be compared
  1906. + against these two values to understand is the object unallocated or bitmap
  1907. + shadow object (WORKING BITMAP block, look at the plugin/space/bitmap.c) */
  1908. +#define REISER4_UNALLOCATED_STATUS_VALUE 0xC000000000000000ULL
  1909. +#define REISER4_BITMAP_BLOCKS_STATUS_VALUE 0x8000000000000000ULL
  1910. +
  1911. +/* specification how block allocation was counted in sb block counters */
  1912. +typedef enum {
  1913. + BLOCK_NOT_COUNTED = 0, /* reiser4 has no info about this block yet */
  1914. + BLOCK_GRABBED = 1, /* free space grabbed for further allocation
  1915. + of this block */
  1916. + BLOCK_FLUSH_RESERVED = 2, /* block is reserved for flush needs. */
  1917. + BLOCK_UNALLOCATED = 3, /* block is used for existing in-memory object
  1918. + ( unallocated formatted or unformatted
  1919. + node) */
  1920. + BLOCK_ALLOCATED = 4 /* block is mapped to disk, real on-disk block
  1921. + number assigned */
  1922. +} block_stage_t;
  1923. +
  1924. +/* a hint for block allocator */
  1925. +struct reiser4_blocknr_hint {
  1926. + /* FIXME: I think we want to add a longterm lock on the bitmap block
  1927. + here. This is to prevent jnode_flush() calls from interleaving
  1928. + allocations on the same bitmap, once a hint is established. */
  1929. +
  1930. + /* search start hint */
  1931. + reiser4_block_nr blk;
  1932. + /* if not zero, it is a region size we search for free blocks in */
  1933. + reiser4_block_nr max_dist;
  1934. + /* level for allocation, may be useful have branch-level and higher
  1935. + write-optimized. */
  1936. + tree_level level;
  1937. + /* block allocator assumes that blocks, which will be mapped to disk,
  1938. + are in this specified block_stage */
  1939. + block_stage_t block_stage;
  1940. + /* If direction = 1 allocate blocks in backward direction from the end
  1941. + * of disk to the beginning of disk. */
  1942. + unsigned int backward:1;
  1943. +
  1944. +};
  1945. +
  1946. +/* These flags control block allocation/deallocation behavior */
  1947. +enum reiser4_ba_flags {
  1948. + /* do allocatations from reserved (5%) area */
  1949. + BA_RESERVED = (1 << 0),
  1950. +
  1951. + /* block allocator can do commit trying to recover free space */
  1952. + BA_CAN_COMMIT = (1 << 1),
  1953. +
  1954. + /* if operation will be applied to formatted block */
  1955. + BA_FORMATTED = (1 << 2),
  1956. +
  1957. + /* defer actual block freeing until transaction commit */
  1958. + BA_DEFER = (1 << 3),
  1959. +
  1960. + /* allocate blocks for permanent fs objects (formatted or unformatted),
  1961. + not wandered of log blocks */
  1962. + BA_PERMANENT = (1 << 4),
  1963. +
  1964. + /* grab space even it was disabled */
  1965. + BA_FORCE = (1 << 5),
  1966. +
  1967. + /* use default start value for free blocks search. */
  1968. + BA_USE_DEFAULT_SEARCH_START = (1 << 6)
  1969. +};
  1970. +
  1971. +typedef enum reiser4_ba_flags reiser4_ba_flags_t;
  1972. +
  1973. +extern void reiser4_blocknr_hint_init(reiser4_blocknr_hint * hint);
  1974. +extern void reiser4_blocknr_hint_done(reiser4_blocknr_hint * hint);
  1975. +extern void update_blocknr_hint_default(const struct super_block *,
  1976. + const reiser4_block_nr *);
  1977. +extern void get_blocknr_hint_default(reiser4_block_nr *);
  1978. +
  1979. +extern reiser4_block_nr reiser4_fs_reserved_space(struct super_block *super);
  1980. +
  1981. +int assign_fake_blocknr_formatted(reiser4_block_nr *);
  1982. +reiser4_block_nr fake_blocknr_unformatted(int);
  1983. +
  1984. +/* free -> grabbed -> fake_allocated -> used */
  1985. +
  1986. +int reiser4_grab_space(__u64 count, reiser4_ba_flags_t flags);
  1987. +void all_grabbed2free(void);
  1988. +void grabbed2free(reiser4_context * , reiser4_super_info_data * , __u64 count);
  1989. +void fake_allocated2free(__u64 count, reiser4_ba_flags_t flags);
  1990. +void grabbed2flush_reserved_nolock(txn_atom * atom, __u64 count);
  1991. +void grabbed2flush_reserved(__u64 count);
  1992. +int reiser4_alloc_blocks(reiser4_blocknr_hint * hint,
  1993. + reiser4_block_nr * start,
  1994. + reiser4_block_nr * len, reiser4_ba_flags_t flags);
  1995. +int reiser4_dealloc_blocks(const reiser4_block_nr *,
  1996. + const reiser4_block_nr *,
  1997. + block_stage_t, reiser4_ba_flags_t flags);
  1998. +
  1999. +static inline int reiser4_alloc_block(reiser4_blocknr_hint * hint,
  2000. + reiser4_block_nr * start,
  2001. + reiser4_ba_flags_t flags)
  2002. +{
  2003. + reiser4_block_nr one = 1;
  2004. + return reiser4_alloc_blocks(hint, start, &one, flags);
  2005. +}
  2006. +
  2007. +static inline int reiser4_dealloc_block(const reiser4_block_nr * block,
  2008. + block_stage_t stage,
  2009. + reiser4_ba_flags_t flags)
  2010. +{
  2011. + const reiser4_block_nr one = 1;
  2012. + return reiser4_dealloc_blocks(block, &one, stage, flags);
  2013. +}
  2014. +
  2015. +#define reiser4_grab_space_force(count, flags) \
  2016. + reiser4_grab_space(count, flags | BA_FORCE)
  2017. +
  2018. +extern void grabbed2free_mark(__u64 mark);
  2019. +extern int reiser4_grab_reserved(struct super_block *,
  2020. + __u64, reiser4_ba_flags_t);
  2021. +extern void reiser4_release_reserved(struct super_block *super);
  2022. +
  2023. +/* grabbed -> fake_allocated */
  2024. +
  2025. +/* fake_allocated -> used */
  2026. +
  2027. +/* used -> fake_allocated -> grabbed -> free */
  2028. +
  2029. +extern void flush_reserved2grabbed(txn_atom * atom, __u64 count);
  2030. +
  2031. +extern int reiser4_blocknr_is_fake(const reiser4_block_nr * da);
  2032. +
  2033. +extern void grabbed2cluster_reserved(int count);
  2034. +extern void cluster_reserved2grabbed(int count);
  2035. +extern void cluster_reserved2free(int count);
  2036. +
  2037. +extern int reiser4_check_block_counters(const struct super_block *);
  2038. +
  2039. +
  2040. +extern int reiser4_check_blocks(const reiser4_block_nr *start,
  2041. + const reiser4_block_nr *len, int desired);
  2042. +
  2043. +static inline int reiser4_check_block(const reiser4_block_nr *start,
  2044. + int desired)
  2045. +{
  2046. + return reiser4_check_blocks(start, NULL, desired);
  2047. +}
  2048. +
  2049. +extern int reiser4_pre_commit_hook(void);
  2050. +extern void reiser4_post_commit_hook(void);
  2051. +extern void reiser4_post_write_back_hook(void);
  2052. +
  2053. +#endif /* __FS_REISER4_BLOCK_ALLOC_H__ */
  2054. +
  2055. +/* Make Linus happy.
  2056. + Local variables:
  2057. + c-indentation-style: "K&R"
  2058. + mode-name: "LC"
  2059. + c-basic-offset: 8
  2060. + tab-width: 8
  2061. + fill-column: 120
  2062. + End:
  2063. +*/
  2064. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/blocknrlist.c linux-4.14.2/fs/reiser4/blocknrlist.c
  2065. --- linux-4.14.2.orig/fs/reiser4/blocknrlist.c 1970-01-01 01:00:00.000000000 +0100
  2066. +++ linux-4.14.2/fs/reiser4/blocknrlist.c 2017-11-26 22:13:09.000000000 +0100
  2067. @@ -0,0 +1,336 @@
  2068. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  2069. + * reiser4/README */
  2070. +
  2071. +/* This is a block list implementation, used to create ordered block sets
  2072. + (at the cost of being less memory efficient than blocknr_set).
  2073. + It is used by discard code. */
  2074. +
  2075. +#include "debug.h"
  2076. +#include "dformat.h"
  2077. +#include "txnmgr.h"
  2078. +#include "context.h"
  2079. +#include "super.h"
  2080. +
  2081. +#include <linux/slab.h>
  2082. +#include <linux/list_sort.h>
  2083. +
  2084. +static struct kmem_cache *blocknr_list_slab = NULL;
  2085. +
  2086. +/**
  2087. + * Represents an extent range [@start; @end).
  2088. + */
  2089. +struct blocknr_list_entry {
  2090. + reiser4_block_nr start, len;
  2091. + struct list_head link;
  2092. +};
  2093. +
  2094. +#define blocknr_list_entry(ptr) list_entry(ptr, blocknr_list_entry, link)
  2095. +
  2096. +static void blocknr_list_entry_init(blocknr_list_entry *entry)
  2097. +{
  2098. + assert("intelfx-11", entry != NULL);
  2099. +
  2100. + entry->start = 0;
  2101. + entry->len = 0;
  2102. + INIT_LIST_HEAD(&entry->link);
  2103. +}
  2104. +
  2105. +static blocknr_list_entry *blocknr_list_entry_alloc(void)
  2106. +{
  2107. + blocknr_list_entry *entry;
  2108. +
  2109. + entry = (blocknr_list_entry *)kmem_cache_alloc(blocknr_list_slab,
  2110. + reiser4_ctx_gfp_mask_get());
  2111. + if (entry == NULL) {
  2112. + return NULL;
  2113. + }
  2114. +
  2115. + blocknr_list_entry_init(entry);
  2116. +
  2117. + return entry;
  2118. +}
  2119. +
  2120. +static void blocknr_list_entry_free(blocknr_list_entry *entry)
  2121. +{
  2122. + assert("intelfx-12", entry != NULL);
  2123. +
  2124. + kmem_cache_free(blocknr_list_slab, entry);
  2125. +}
  2126. +
  2127. +/**
  2128. + * Given ranges @to and [@start; @end), if they overlap, their union
  2129. + * is calculated and saved in @to.
  2130. + */
  2131. +static int blocknr_list_entry_merge(blocknr_list_entry *to,
  2132. + reiser4_block_nr start,
  2133. + reiser4_block_nr len)
  2134. +{
  2135. + reiser4_block_nr end, to_end;
  2136. +
  2137. + assert("intelfx-13", to != NULL);
  2138. +
  2139. + assert("intelfx-16", to->len > 0);
  2140. + assert("intelfx-17", len > 0);
  2141. +
  2142. + end = start + len;
  2143. + to_end = to->start + to->len;
  2144. +
  2145. + if ((to->start <= end) && (start <= to_end)) {
  2146. + if (start < to->start) {
  2147. + to->start = start;
  2148. + }
  2149. +
  2150. + if (end > to_end) {
  2151. + to_end = end;
  2152. + }
  2153. +
  2154. + to->len = to_end - to->start;
  2155. +
  2156. + return 0;
  2157. + }
  2158. +
  2159. + return -1;
  2160. +}
  2161. +
  2162. +static int blocknr_list_entry_merge_entry(blocknr_list_entry *to,
  2163. + blocknr_list_entry *from)
  2164. +{
  2165. + assert("intelfx-18", from != NULL);
  2166. +
  2167. + return blocknr_list_entry_merge(to, from->start, from->len);
  2168. +}
  2169. +
  2170. +/**
  2171. + * A comparison function for list_sort().
  2172. + *
  2173. + * "The comparison function @cmp must return a negative value if @a
  2174. + * should sort before @b, and a positive value if @a should sort after
  2175. + * @b. If @a and @b are equivalent, and their original relative
  2176. + * ordering is to be preserved, @cmp must return 0."
  2177. + */
  2178. +static int blocknr_list_entry_compare(void* priv UNUSED_ARG,
  2179. + struct list_head *a, struct list_head *b)
  2180. +{
  2181. + blocknr_list_entry *entry_a, *entry_b;
  2182. + reiser4_block_nr entry_a_end, entry_b_end;
  2183. +
  2184. + assert("intelfx-19", a != NULL);
  2185. + assert("intelfx-20", b != NULL);
  2186. +
  2187. + entry_a = blocknr_list_entry(a);
  2188. + entry_b = blocknr_list_entry(b);
  2189. +
  2190. + entry_a_end = entry_a->start + entry_a->len;
  2191. + entry_b_end = entry_b->start + entry_b->len;
  2192. +
  2193. + /* First sort by starting block numbers... */
  2194. + if (entry_a->start < entry_b->start) {
  2195. + return -1;
  2196. + }
  2197. +
  2198. + if (entry_a->start > entry_b->start) {
  2199. + return 1;
  2200. + }
  2201. +
  2202. + /** Then by ending block numbers.
  2203. + * If @a contains @b, it will be sorted before. */
  2204. + if (entry_a_end > entry_b_end) {
  2205. + return -1;
  2206. + }
  2207. +
  2208. + if (entry_a_end < entry_b_end) {
  2209. + return 1;
  2210. + }
  2211. +
  2212. + return 0;
  2213. +}
  2214. +
  2215. +int blocknr_list_init_static(void)
  2216. +{
  2217. + assert("intelfx-54", blocknr_list_slab == NULL);
  2218. +
  2219. + blocknr_list_slab = kmem_cache_create("blocknr_list_entry",
  2220. + sizeof(blocknr_list_entry),
  2221. + 0,
  2222. + SLAB_HWCACHE_ALIGN |
  2223. + SLAB_RECLAIM_ACCOUNT,
  2224. + NULL);
  2225. + if (blocknr_list_slab == NULL) {
  2226. + return RETERR(-ENOMEM);
  2227. + }
  2228. +
  2229. + return 0;
  2230. +}
  2231. +
  2232. +void blocknr_list_done_static(void)
  2233. +{
  2234. + destroy_reiser4_cache(&blocknr_list_slab);
  2235. +}
  2236. +
  2237. +void blocknr_list_init(struct list_head* blist)
  2238. +{
  2239. + assert("intelfx-24", blist != NULL);
  2240. +
  2241. + INIT_LIST_HEAD(blist);
  2242. +}
  2243. +
  2244. +void blocknr_list_destroy(struct list_head* blist)
  2245. +{
  2246. + struct list_head *pos, *tmp;
  2247. + blocknr_list_entry *entry;
  2248. +
  2249. + assert("intelfx-25", blist != NULL);
  2250. +
  2251. + list_for_each_safe(pos, tmp, blist) {
  2252. + entry = blocknr_list_entry(pos);
  2253. + list_del_init(pos);
  2254. + blocknr_list_entry_free(entry);
  2255. + }
  2256. +
  2257. + assert("intelfx-48", list_empty(blist));
  2258. +}
  2259. +
  2260. +void blocknr_list_merge(struct list_head *from, struct list_head *to)
  2261. +{
  2262. + assert("intelfx-26", from != NULL);
  2263. + assert("intelfx-27", to != NULL);
  2264. +
  2265. + list_splice_tail_init(from, to);
  2266. +
  2267. + assert("intelfx-49", list_empty(from));
  2268. +}
  2269. +
  2270. +void blocknr_list_sort_and_join(struct list_head *blist)
  2271. +{
  2272. + struct list_head *pos, *next;
  2273. + struct blocknr_list_entry *entry, *next_entry;
  2274. +
  2275. + assert("intelfx-50", blist != NULL);
  2276. +
  2277. + /* Step 1. Sort the extent list. */
  2278. + list_sort(NULL, blist, blocknr_list_entry_compare);
  2279. +
  2280. + /* Step 2. Join adjacent extents in the list. */
  2281. + pos = blist->next;
  2282. + next = pos->next;
  2283. + entry = blocknr_list_entry(pos);
  2284. +
  2285. + for (; next != blist; next = pos->next) {
  2286. + /** @next is a valid node at this point */
  2287. + next_entry = blocknr_list_entry(next);
  2288. +
  2289. + /** try to merge @next into @pos */
  2290. + if (!blocknr_list_entry_merge_entry(entry, next_entry)) {
  2291. + /** successful; delete the @next node.
  2292. + * next merge will be attempted into the same node. */
  2293. + list_del_init(next);
  2294. + blocknr_list_entry_free(next_entry);
  2295. + } else {
  2296. + /** otherwise advance @pos. */
  2297. + pos = next;
  2298. + entry = next_entry;
  2299. + }
  2300. + }
  2301. +}
  2302. +
  2303. +int blocknr_list_add_extent(txn_atom *atom,
  2304. + struct list_head *blist,
  2305. + blocknr_list_entry **new_entry,
  2306. + const reiser4_block_nr *start,
  2307. + const reiser4_block_nr *len)
  2308. +{
  2309. + assert("intelfx-29", atom != NULL);
  2310. + assert("intelfx-42", atom_is_protected(atom));
  2311. + assert("intelfx-43", blist != NULL);
  2312. + assert("intelfx-30", new_entry != NULL);
  2313. + assert("intelfx-31", start != NULL);
  2314. + assert("intelfx-32", len != NULL && *len > 0);
  2315. +
  2316. + if (*new_entry == NULL) {
  2317. + /*
  2318. + * Optimization: try to merge new extent into the last one.
  2319. + */
  2320. + if (!list_empty(blist)) {
  2321. + blocknr_list_entry *last_entry;
  2322. + last_entry = blocknr_list_entry(blist->prev);
  2323. + if (!blocknr_list_entry_merge(last_entry, *start, *len)) {
  2324. + return 0;
  2325. + }
  2326. + }
  2327. +
  2328. + /*
  2329. + * Otherwise, allocate a new entry and tell -E_REPEAT.
  2330. + * Next time we'll take the branch below.
  2331. + */
  2332. + spin_unlock_atom(atom);
  2333. + *new_entry = blocknr_list_entry_alloc();
  2334. + return (*new_entry != NULL) ? -E_REPEAT : RETERR(-ENOMEM);
  2335. + }
  2336. +
  2337. + /*
  2338. + * The entry has been allocated beforehand, fill it and link to the list.
  2339. + */
  2340. + (*new_entry)->start = *start;
  2341. + (*new_entry)->len = *len;
  2342. + list_add_tail(&(*new_entry)->link, blist);
  2343. +
  2344. + return 0;
  2345. +}
  2346. +
  2347. +int blocknr_list_iterator(txn_atom *atom,
  2348. + struct list_head *blist,
  2349. + blocknr_set_actor_f actor,
  2350. + void *data,
  2351. + int delete)
  2352. +{
  2353. + struct list_head *pos;
  2354. + blocknr_list_entry *entry;
  2355. + int ret = 0;
  2356. +
  2357. + assert("intelfx-46", blist != NULL);
  2358. + assert("intelfx-47", actor != NULL);
  2359. +
  2360. + if (delete) {
  2361. + struct list_head *tmp;
  2362. +
  2363. + list_for_each_safe(pos, tmp, blist) {
  2364. + entry = blocknr_list_entry(pos);
  2365. +
  2366. + /*
  2367. + * Do not exit, delete flag is set. Instead, on the first error we
  2368. + * downgrade from iterating to just deleting.
  2369. + */
  2370. + if (ret == 0) {
  2371. + ret = actor(atom, &entry->start, &entry->len, data);
  2372. + }
  2373. +
  2374. + list_del_init(pos);
  2375. + blocknr_list_entry_free(entry);
  2376. + }
  2377. +
  2378. + assert("intelfx-44", list_empty(blist));
  2379. + } else {
  2380. + list_for_each(pos, blist) {
  2381. + entry = blocknr_list_entry(pos);
  2382. +
  2383. + ret = actor(atom, &entry->start, &entry->len, data);
  2384. +
  2385. + if (ret != 0) {
  2386. + return ret;
  2387. + }
  2388. + }
  2389. + }
  2390. +
  2391. + return ret;
  2392. +}
  2393. +
  2394. +/* Make Linus happy.
  2395. + Local variables:
  2396. + c-indentation-style: "K&R"
  2397. + mode-name: "LC"
  2398. + c-basic-offset: 8
  2399. + tab-width: 8
  2400. + fill-column: 120
  2401. + scroll-step: 1
  2402. + End:
  2403. +*/
  2404. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/blocknrset.c linux-4.14.2/fs/reiser4/blocknrset.c
  2405. --- linux-4.14.2.orig/fs/reiser4/blocknrset.c 1970-01-01 01:00:00.000000000 +0100
  2406. +++ linux-4.14.2/fs/reiser4/blocknrset.c 2017-11-26 22:13:09.000000000 +0100
  2407. @@ -0,0 +1,399 @@
  2408. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  2409. +reiser4/README */
  2410. +
  2411. +/* This file contains code for various block number sets used by the atom to
  2412. + track the deleted set and wandered block mappings. */
  2413. +
  2414. +#include "debug.h"
  2415. +#include "dformat.h"
  2416. +#include "txnmgr.h"
  2417. +#include "context.h"
  2418. +#include "super.h"
  2419. +
  2420. +#include <linux/slab.h>
  2421. +
  2422. +/* The proposed data structure for storing unordered block number sets is a
  2423. + list of elements, each of which contains an array of block number or/and
  2424. + array of block number pairs. That element called blocknr_set_entry is used
  2425. + to store block numbers from the beginning and for extents from the end of
  2426. + the data field (char data[...]). The ->nr_blocks and ->nr_pairs fields
  2427. + count numbers of blocks and extents.
  2428. +
  2429. + +------------------- blocknr_set_entry->data ------------------+
  2430. + |block1|block2| ... <free space> ... |pair3|pair2|pair1|
  2431. + +------------------------------------------------------------+
  2432. +
  2433. + When current blocknr_set_entry is full, allocate a new one. */
  2434. +
  2435. +/* Usage examples: blocknr sets are used in reiser4 for storing atom's delete
  2436. + * set (single blocks and block extents), in that case blocknr pair represent an
  2437. + * extent; atom's wandered map is also stored as a blocknr set, blocknr pairs
  2438. + * there represent a (real block) -> (wandered block) mapping. */
  2439. +
  2440. +/* Protection: blocknr sets belong to reiser4 atom, and
  2441. + * their modifications are performed with the atom lock held */
  2442. +
  2443. +/* The total size of a blocknr_set_entry. */
  2444. +#define BLOCKNR_SET_ENTRY_SIZE 128
  2445. +
  2446. +/* The number of blocks that can fit the blocknr data area. */
  2447. +#define BLOCKNR_SET_ENTRIES_NUMBER \
  2448. + ((BLOCKNR_SET_ENTRY_SIZE - \
  2449. + 2 * sizeof(unsigned) - \
  2450. + sizeof(struct list_head)) / \
  2451. + sizeof(reiser4_block_nr))
  2452. +
  2453. +static struct kmem_cache *blocknr_set_slab = NULL;
  2454. +
  2455. +/* An entry of the blocknr_set */
  2456. +struct blocknr_set_entry {
  2457. + unsigned nr_singles;
  2458. + unsigned nr_pairs;
  2459. + struct list_head link;
  2460. + reiser4_block_nr entries[BLOCKNR_SET_ENTRIES_NUMBER];
  2461. +};
  2462. +
  2463. +/* A pair of blocks as recorded in the blocknr_set_entry data. */
  2464. +struct blocknr_pair {
  2465. + reiser4_block_nr a;
  2466. + reiser4_block_nr b;
  2467. +};
  2468. +
  2469. +/* Return the number of blocknr slots available in a blocknr_set_entry. */
  2470. +/* Audited by: green(2002.06.11) */
  2471. +static unsigned bse_avail(blocknr_set_entry * bse)
  2472. +{
  2473. + unsigned used = bse->nr_singles + 2 * bse->nr_pairs;
  2474. +
  2475. + assert("jmacd-5088", BLOCKNR_SET_ENTRIES_NUMBER >= used);
  2476. + cassert(sizeof(blocknr_set_entry) == BLOCKNR_SET_ENTRY_SIZE);
  2477. +
  2478. + return BLOCKNR_SET_ENTRIES_NUMBER - used;
  2479. +}
  2480. +
  2481. +/* Initialize a blocknr_set_entry. */
  2482. +static void bse_init(blocknr_set_entry *bse)
  2483. +{
  2484. + bse->nr_singles = 0;
  2485. + bse->nr_pairs = 0;
  2486. + INIT_LIST_HEAD(&bse->link);
  2487. +}
  2488. +
  2489. +/* Allocate and initialize a blocknr_set_entry. */
  2490. +/* Audited by: green(2002.06.11) */
  2491. +static blocknr_set_entry *bse_alloc(void)
  2492. +{
  2493. + blocknr_set_entry *e;
  2494. +
  2495. + if ((e = (blocknr_set_entry *) kmem_cache_alloc(blocknr_set_slab,
  2496. + reiser4_ctx_gfp_mask_get())) == NULL)
  2497. + return NULL;
  2498. +
  2499. + bse_init(e);
  2500. +
  2501. + return e;
  2502. +}
  2503. +
  2504. +/* Free a blocknr_set_entry. */
  2505. +/* Audited by: green(2002.06.11) */
  2506. +static void bse_free(blocknr_set_entry * bse)
  2507. +{
  2508. + kmem_cache_free(blocknr_set_slab, bse);
  2509. +}
  2510. +
  2511. +/* Add a block number to a blocknr_set_entry */
  2512. +/* Audited by: green(2002.06.11) */
  2513. +static void
  2514. +bse_put_single(blocknr_set_entry * bse, const reiser4_block_nr * block)
  2515. +{
  2516. + assert("jmacd-5099", bse_avail(bse) >= 1);
  2517. +
  2518. + bse->entries[bse->nr_singles++] = *block;
  2519. +}
  2520. +
  2521. +/* Get a pair of block numbers */
  2522. +/* Audited by: green(2002.06.11) */
  2523. +static inline struct blocknr_pair *bse_get_pair(blocknr_set_entry * bse,
  2524. + unsigned pno)
  2525. +{
  2526. + assert("green-1", BLOCKNR_SET_ENTRIES_NUMBER >= 2 * (pno + 1));
  2527. +
  2528. + return (struct blocknr_pair *) (bse->entries +
  2529. + BLOCKNR_SET_ENTRIES_NUMBER -
  2530. + 2 * (pno + 1));
  2531. +}
  2532. +
  2533. +/* Add a pair of block numbers to a blocknr_set_entry */
  2534. +/* Audited by: green(2002.06.11) */
  2535. +static void
  2536. +bse_put_pair(blocknr_set_entry * bse, const reiser4_block_nr * a,
  2537. + const reiser4_block_nr * b)
  2538. +{
  2539. + struct blocknr_pair *pair;
  2540. +
  2541. + assert("jmacd-5100", bse_avail(bse) >= 2 && a != NULL && b != NULL);
  2542. +
  2543. + pair = bse_get_pair(bse, bse->nr_pairs++);
  2544. +
  2545. + pair->a = *a;
  2546. + pair->b = *b;
  2547. +}
  2548. +
  2549. +/* Add either a block or pair of blocks to the block number set. The first
  2550. + blocknr (@a) must be non-NULL. If @b is NULL a single blocknr is added, if
  2551. + @b is non-NULL a pair is added. The block number set belongs to atom, and
  2552. + the call is made with the atom lock held. There may not be enough space in
  2553. + the current blocknr_set_entry. If new_bsep points to a non-NULL
  2554. + blocknr_set_entry then it will be added to the blocknr_set and new_bsep
  2555. + will be set to NULL. If new_bsep contains NULL then the atom lock will be
  2556. + released and a new bse will be allocated in new_bsep. E_REPEAT will be
  2557. + returned with the atom unlocked for the operation to be tried again. If
  2558. + the operation succeeds, 0 is returned. If new_bsep is non-NULL and not
  2559. + used during the call, it will be freed automatically. */
  2560. +static int blocknr_set_add(txn_atom *atom, struct list_head *bset,
  2561. + blocknr_set_entry **new_bsep, const reiser4_block_nr *a,
  2562. + const reiser4_block_nr *b)
  2563. +{
  2564. + blocknr_set_entry *bse;
  2565. + unsigned entries_needed;
  2566. +
  2567. + assert("jmacd-5101", a != NULL);
  2568. +
  2569. + entries_needed = (b == NULL) ? 1 : 2;
  2570. + if (list_empty(bset) ||
  2571. + bse_avail(list_entry(bset->next, blocknr_set_entry, link)) < entries_needed) {
  2572. + /* See if a bse was previously allocated. */
  2573. + if (*new_bsep == NULL) {
  2574. + spin_unlock_atom(atom);
  2575. + *new_bsep = bse_alloc();
  2576. + return (*new_bsep != NULL) ? -E_REPEAT :
  2577. + RETERR(-ENOMEM);
  2578. + }
  2579. +
  2580. + /* Put it on the head of the list. */
  2581. + list_add(&((*new_bsep)->link), bset);
  2582. +
  2583. + *new_bsep = NULL;
  2584. + }
  2585. +
  2586. + /* Add the single or pair. */
  2587. + bse = list_entry(bset->next, blocknr_set_entry, link);
  2588. + if (b == NULL) {
  2589. + bse_put_single(bse, a);
  2590. + } else {
  2591. + bse_put_pair(bse, a, b);
  2592. + }
  2593. +
  2594. + /* If new_bsep is non-NULL then there was an allocation race, free this
  2595. + copy. */
  2596. + if (*new_bsep != NULL) {
  2597. + bse_free(*new_bsep);
  2598. + *new_bsep = NULL;
  2599. + }
  2600. +
  2601. + return 0;
  2602. +}
  2603. +
  2604. +/* Add an extent to the block set. If the length is 1, it is treated as a
  2605. + single block (e.g., reiser4_set_add_block). */
  2606. +/* Audited by: green(2002.06.11) */
  2607. +/* Auditor note: Entire call chain cannot hold any spinlocks, because
  2608. + kmalloc might schedule. The only exception is atom spinlock, which is
  2609. + properly freed. */
  2610. +int
  2611. +blocknr_set_add_extent(txn_atom * atom,
  2612. + struct list_head *bset,
  2613. + blocknr_set_entry ** new_bsep,
  2614. + const reiser4_block_nr * start,
  2615. + const reiser4_block_nr * len)
  2616. +{
  2617. + assert("jmacd-5102", start != NULL && len != NULL && *len > 0);
  2618. + return blocknr_set_add(atom, bset, new_bsep, start,
  2619. + *len == 1 ? NULL : len);
  2620. +}
  2621. +
  2622. +/* Add a block pair to the block set. It adds exactly a pair, which is checked
  2623. + * by an assertion that both arguments are not null.*/
  2624. +/* Audited by: green(2002.06.11) */
  2625. +/* Auditor note: Entire call chain cannot hold any spinlocks, because
  2626. + kmalloc might schedule. The only exception is atom spinlock, which is
  2627. + properly freed. */
  2628. +int
  2629. +blocknr_set_add_pair(txn_atom * atom,
  2630. + struct list_head *bset,
  2631. + blocknr_set_entry ** new_bsep, const reiser4_block_nr * a,
  2632. + const reiser4_block_nr * b)
  2633. +{
  2634. + assert("jmacd-5103", a != NULL && b != NULL);
  2635. + return blocknr_set_add(atom, bset, new_bsep, a, b);
  2636. +}
  2637. +
  2638. +/* Initialize slab cache of blocknr_set_entry objects. */
  2639. +int blocknr_set_init_static(void)
  2640. +{
  2641. + assert("intelfx-55", blocknr_set_slab == NULL);
  2642. +
  2643. + blocknr_set_slab = kmem_cache_create("blocknr_set_entry",
  2644. + sizeof(blocknr_set_entry),
  2645. + 0,
  2646. + SLAB_HWCACHE_ALIGN |
  2647. + SLAB_RECLAIM_ACCOUNT,
  2648. + NULL);
  2649. +
  2650. + if (blocknr_set_slab == NULL) {
  2651. + return RETERR(-ENOMEM);
  2652. + }
  2653. +
  2654. + return 0;
  2655. +}
  2656. +
  2657. +/* Destroy slab cache of blocknr_set_entry objects. */
  2658. +void blocknr_set_done_static(void)
  2659. +{
  2660. + destroy_reiser4_cache(&blocknr_set_slab);
  2661. +}
  2662. +
  2663. +/* Initialize a blocknr_set. */
  2664. +void blocknr_set_init(struct list_head *bset)
  2665. +{
  2666. + INIT_LIST_HEAD(bset);
  2667. +}
  2668. +
  2669. +/* Release the entries of a blocknr_set. */
  2670. +void blocknr_set_destroy(struct list_head *bset)
  2671. +{
  2672. + blocknr_set_entry *bse;
  2673. +
  2674. + while (!list_empty(bset)) {
  2675. + bse = list_entry(bset->next, blocknr_set_entry, link);
  2676. + list_del_init(&bse->link);
  2677. + bse_free(bse);
  2678. + }
  2679. +}
  2680. +
  2681. +/* Merge blocknr_set entries out of @from into @into. */
  2682. +/* Audited by: green(2002.06.11) */
  2683. +/* Auditor comments: This merge does not know if merged sets contain
  2684. + blocks pairs (As for wandered sets) or extents, so it cannot really merge
  2685. + overlapping ranges if there is some. So I believe it may lead to
  2686. + some blocks being presented several times in one blocknr_set. To help
  2687. + debugging such problems it might help to check for duplicate entries on
  2688. + actual processing of this set. Testing this kind of stuff right here is
  2689. + also complicated by the fact that these sets are not sorted and going
  2690. + through whole set on each element addition is going to be CPU-heavy task */
  2691. +void blocknr_set_merge(struct list_head *from, struct list_head *into)
  2692. +{
  2693. + blocknr_set_entry *bse_into = NULL;
  2694. +
  2695. + /* If @from is empty, no work to perform. */
  2696. + if (list_empty(from))
  2697. + return;
  2698. + /* If @into is not empty, try merging partial-entries. */
  2699. + if (!list_empty(into)) {
  2700. +
  2701. + /* Neither set is empty, pop the front to members and try to
  2702. + combine them. */
  2703. + blocknr_set_entry *bse_from;
  2704. + unsigned into_avail;
  2705. +
  2706. + bse_into = list_entry(into->next, blocknr_set_entry, link);
  2707. + list_del_init(&bse_into->link);
  2708. + bse_from = list_entry(from->next, blocknr_set_entry, link);
  2709. + list_del_init(&bse_from->link);
  2710. +
  2711. + /* Combine singles. */
  2712. + for (into_avail = bse_avail(bse_into);
  2713. + into_avail != 0 && bse_from->nr_singles != 0;
  2714. + into_avail -= 1) {
  2715. + bse_put_single(bse_into,
  2716. + &bse_from->entries[--bse_from->
  2717. + nr_singles]);
  2718. + }
  2719. +
  2720. + /* Combine pairs. */
  2721. + for (; into_avail > 1 && bse_from->nr_pairs != 0;
  2722. + into_avail -= 2) {
  2723. + struct blocknr_pair *pair =
  2724. + bse_get_pair(bse_from, --bse_from->nr_pairs);
  2725. + bse_put_pair(bse_into, &pair->a, &pair->b);
  2726. + }
  2727. +
  2728. + /* If bse_from is empty, delete it now. */
  2729. + if (bse_avail(bse_from) == BLOCKNR_SET_ENTRIES_NUMBER) {
  2730. + bse_free(bse_from);
  2731. + } else {
  2732. + /* Otherwise, bse_into is full or nearly full (e.g.,
  2733. + it could have one slot avail and bse_from has one
  2734. + pair left). Push it back onto the list. bse_from
  2735. + becomes bse_into, which will be the new partial. */
  2736. + list_add(&bse_into->link, into);
  2737. + bse_into = bse_from;
  2738. + }
  2739. + }
  2740. +
  2741. + /* Splice lists together. */
  2742. + list_splice_init(from, into->prev);
  2743. +
  2744. + /* Add the partial entry back to the head of the list. */
  2745. + if (bse_into != NULL)
  2746. + list_add(&bse_into->link, into);
  2747. +}
  2748. +
  2749. +/* Iterate over all blocknr set elements. */
  2750. +int blocknr_set_iterator(txn_atom *atom, struct list_head *bset,
  2751. + blocknr_set_actor_f actor, void *data, int delete)
  2752. +{
  2753. +
  2754. + blocknr_set_entry *entry;
  2755. +
  2756. + assert("zam-429", atom != NULL);
  2757. + assert("zam-430", atom_is_protected(atom));
  2758. + assert("zam-431", bset != 0);
  2759. + assert("zam-432", actor != NULL);
  2760. +
  2761. + entry = list_entry(bset->next, blocknr_set_entry, link);
  2762. + while (bset != &entry->link) {
  2763. + blocknr_set_entry *tmp = list_entry(entry->link.next, blocknr_set_entry, link);
  2764. + unsigned int i;
  2765. + int ret;
  2766. +
  2767. + for (i = 0; i < entry->nr_singles; i++) {
  2768. + ret = actor(atom, &entry->entries[i], NULL, data);
  2769. +
  2770. + /* We can't break a loop if delete flag is set. */
  2771. + if (ret != 0 && !delete)
  2772. + return ret;
  2773. + }
  2774. +
  2775. + for (i = 0; i < entry->nr_pairs; i++) {
  2776. + struct blocknr_pair *ab;
  2777. +
  2778. + ab = bse_get_pair(entry, i);
  2779. +
  2780. + ret = actor(atom, &ab->a, &ab->b, data);
  2781. +
  2782. + if (ret != 0 && !delete)
  2783. + return ret;
  2784. + }
  2785. +
  2786. + if (delete) {
  2787. + list_del(&entry->link);
  2788. + bse_free(entry);
  2789. + }
  2790. +
  2791. + entry = tmp;
  2792. + }
  2793. +
  2794. + return 0;
  2795. +}
  2796. +
  2797. +/*
  2798. + * Local variables:
  2799. + * c-indentation-style: "K&R"
  2800. + * mode-name: "LC"
  2801. + * c-basic-offset: 8
  2802. + * tab-width: 8
  2803. + * fill-column: 79
  2804. + * scroll-step: 1
  2805. + * End:
  2806. + */
  2807. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/carry.c linux-4.14.2/fs/reiser4/carry.c
  2808. --- linux-4.14.2.orig/fs/reiser4/carry.c 1970-01-01 01:00:00.000000000 +0100
  2809. +++ linux-4.14.2/fs/reiser4/carry.c 2017-11-26 22:13:09.000000000 +0100
  2810. @@ -0,0 +1,1408 @@
  2811. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  2812. + reiser4/README */
  2813. +/* Functions to "carry" tree modification(s) upward. */
  2814. +/* Tree is modified one level at a time. As we modify a level we accumulate a
  2815. + set of changes that need to be propagated to the next level. We manage
  2816. + node locking such that any searches that collide with carrying are
  2817. + restarted, from the root if necessary.
  2818. +
  2819. + Insertion of a new item may result in items being moved among nodes and
  2820. + this requires the delimiting key to be updated at the least common parent
  2821. + of the nodes modified to preserve search tree invariants. Also, insertion
  2822. + may require allocation of a new node. A pointer to the new node has to be
  2823. + inserted into some node on the parent level, etc.
  2824. +
  2825. + Tree carrying is meant to be analogous to arithmetic carrying.
  2826. +
  2827. + A carry operation is always associated with some node (&carry_node).
  2828. +
  2829. + Carry process starts with some initial set of operations to be performed
  2830. + and an initial set of already locked nodes. Operations are performed one
  2831. + by one. Performing each single operation has following possible effects:
  2832. +
  2833. + - content of carry node associated with operation is modified
  2834. + - new carry nodes are locked and involved into carry process on this level
  2835. + - new carry operations are posted to the next level
  2836. +
  2837. + After all carry operations on this level are done, process is repeated for
  2838. + the accumulated sequence on carry operations for the next level. This
  2839. + starts by trying to lock (in left to right order) all carry nodes
  2840. + associated with carry operations on the parent level. After this, we decide
  2841. + whether more nodes are required on the left of already locked set. If so,
  2842. + all locks taken on the parent level are released, new carry nodes are
  2843. + added, and locking process repeats.
  2844. +
  2845. + It may happen that balancing process fails owing to unrecoverable error on
  2846. + some of upper levels of a tree (possible causes are io error, failure to
  2847. + allocate new node, etc.). In this case we should unmount the filesystem,
  2848. + rebooting if it is the root, and possibly advise the use of fsck.
  2849. +
  2850. + USAGE:
  2851. +
  2852. + int some_tree_operation( znode *node, ... )
  2853. + {
  2854. + // Allocate on a stack pool of carry objects: operations and nodes.
  2855. + // Most carry processes will only take objects from here, without
  2856. + // dynamic allocation.
  2857. +
  2858. +I feel uneasy about this pool. It adds to code complexity, I understand why it
  2859. +exists, but.... -Hans
  2860. +
  2861. + carry_pool pool;
  2862. + carry_level lowest_level;
  2863. + carry_op *op;
  2864. +
  2865. + init_carry_pool( &pool );
  2866. + init_carry_level( &lowest_level, &pool );
  2867. +
  2868. + // operation may be one of:
  2869. + // COP_INSERT --- insert new item into node
  2870. + // COP_CUT --- remove part of or whole node
  2871. + // COP_PASTE --- increase size of item
  2872. + // COP_DELETE --- delete pointer from parent node
  2873. + // COP_UPDATE --- update delimiting key in least
  2874. + // common ancestor of two
  2875. +
  2876. + op = reiser4_post_carry( &lowest_level, operation, node, 0 );
  2877. + if( IS_ERR( op ) || ( op == NULL ) ) {
  2878. + handle error
  2879. + } else {
  2880. + // fill in remaining fields in @op, according to carry.h:carry_op
  2881. + result = carry(&lowest_level, NULL);
  2882. + }
  2883. + done_carry_pool(&pool);
  2884. + }
  2885. +
  2886. + When you are implementing node plugin method that participates in carry
  2887. + (shifting, insertion, deletion, etc.), do the following:
  2888. +
  2889. + int foo_node_method(znode * node, ..., carry_level * todo)
  2890. + {
  2891. + carry_op *op;
  2892. +
  2893. + ....
  2894. +
  2895. + // note, that last argument to reiser4_post_carry() is non-null
  2896. + // here, because @op is to be applied to the parent of @node, rather
  2897. + // than to the @node itself as in the previous case.
  2898. +
  2899. + op = node_post_carry(todo, operation, node, 1);
  2900. + // fill in remaining fields in @op, according to carry.h:carry_op
  2901. +
  2902. + ....
  2903. +
  2904. + }
  2905. +
  2906. + BATCHING:
  2907. +
  2908. + One of the main advantages of level-by-level balancing implemented here is
  2909. + ability to batch updates on a parent level and to peform them more
  2910. + efficiently as a result.
  2911. +
  2912. + Description To Be Done (TBD).
  2913. +
  2914. + DIFFICULTIES AND SUBTLE POINTS:
  2915. +
  2916. + 1. complex plumbing is required, because:
  2917. +
  2918. + a. effective allocation through pools is needed
  2919. +
  2920. + b. target of operation is not exactly known when operation is
  2921. + posted. This is worked around through bitfields in &carry_node and
  2922. + logic in lock_carry_node()
  2923. +
  2924. + c. of interaction with locking code: node should be added into sibling
  2925. + list when pointer to it is inserted into its parent, which is some time
  2926. + after node was created. Between these moments, node is somewhat in
  2927. + suspended state and is only registered in the carry lists
  2928. +
  2929. + 2. whole balancing logic is implemented here, in particular, insertion
  2930. + logic is coded in make_space().
  2931. +
  2932. + 3. special cases like insertion (reiser4_add_tree_root()) or deletion
  2933. + (reiser4_kill_tree_root()) of tree root and morphing of paste into insert
  2934. + (insert_paste()) have to be handled.
  2935. +
  2936. + 4. there is non-trivial interdependency between allocation of new nodes
  2937. + and almost everything else. This is mainly due to the (1.c) above. I shall
  2938. + write about this later.
  2939. +
  2940. +*/
  2941. +
  2942. +#include "forward.h"
  2943. +#include "debug.h"
  2944. +#include "key.h"
  2945. +#include "coord.h"
  2946. +#include "plugin/item/item.h"
  2947. +#include "plugin/item/extent.h"
  2948. +#include "plugin/node/node.h"
  2949. +#include "jnode.h"
  2950. +#include "znode.h"
  2951. +#include "tree_mod.h"
  2952. +#include "tree_walk.h"
  2953. +#include "block_alloc.h"
  2954. +#include "pool.h"
  2955. +#include "tree.h"
  2956. +#include "carry.h"
  2957. +#include "carry_ops.h"
  2958. +#include "super.h"
  2959. +#include "reiser4.h"
  2960. +
  2961. +#include <linux/types.h>
  2962. +
  2963. +/* level locking/unlocking */
  2964. +static int lock_carry_level(carry_level * level);
  2965. +static void unlock_carry_level(carry_level * level, int failure);
  2966. +static void done_carry_level(carry_level * level);
  2967. +static void unlock_carry_node(carry_level * level, carry_node * node, int fail);
  2968. +
  2969. +int lock_carry_node(carry_level * level, carry_node * node);
  2970. +int lock_carry_node_tail(carry_node * node);
  2971. +
  2972. +/* carry processing proper */
  2973. +static int carry_on_level(carry_level * doing, carry_level * todo);
  2974. +
  2975. +static carry_op *add_op(carry_level * level, pool_ordering order,
  2976. + carry_op * reference);
  2977. +
  2978. +/* handlers for carry operations. */
  2979. +
  2980. +static void fatal_carry_error(carry_level * doing, int ecode);
  2981. +static int add_new_root(carry_level * level, carry_node * node, znode * fake);
  2982. +
  2983. +static void print_level(const char *prefix, carry_level * level);
  2984. +
  2985. +#if REISER4_DEBUG
  2986. +typedef enum {
  2987. + CARRY_TODO,
  2988. + CARRY_DOING
  2989. +} carry_queue_state;
  2990. +static int carry_level_invariant(carry_level * level, carry_queue_state state);
  2991. +#endif
  2992. +
  2993. +/* main entry point for tree balancing.
  2994. +
  2995. + Tree carry performs operations from @doing and while doing so accumulates
  2996. + information about operations to be performed on the next level ("carried"
  2997. + to the parent level). Carried operations are performed, causing possibly
  2998. + more operations to be carried upward etc. carry() takes care about
  2999. + locking and pinning znodes while operating on them.
  3000. +
  3001. + For usage, see comment at the top of fs/reiser4/carry.c
  3002. +
  3003. +*/
  3004. +int reiser4_carry(carry_level * doing /* set of carry operations to be
  3005. + * performed */ ,
  3006. + carry_level * done /* set of nodes, already performed
  3007. + * at the previous level.
  3008. + * NULL in most cases */)
  3009. +{
  3010. + int result = 0;
  3011. + gfp_t old_mask;
  3012. + /* queue of new requests */
  3013. + carry_level *todo;
  3014. + ON_DEBUG(STORE_COUNTERS);
  3015. +
  3016. + assert("nikita-888", doing != NULL);
  3017. + BUG_ON(done != NULL);
  3018. +
  3019. + todo = doing + 1;
  3020. + init_carry_level(todo, doing->pool);
  3021. +
  3022. + /* queue of requests preformed on the previous level */
  3023. + done = todo + 1;
  3024. + init_carry_level(done, doing->pool);
  3025. + /*
  3026. + * NOTE: We are not allowed to fail in the loop below.
  3027. + * Incomplete carry (even if carry_on_level is complete)
  3028. + * can leave the tree in an inconsistent state (broken
  3029. + * order of keys in a node, etc).
  3030. + */
  3031. + old_mask = get_current_context()->gfp_mask;
  3032. + get_current_context()->gfp_mask |= __GFP_NOFAIL;
  3033. +
  3034. + /* iterate until there is nothing more to do */
  3035. + while (result == 0 && doing->ops_num > 0) {
  3036. + carry_level *tmp;
  3037. +
  3038. + /* at this point @done is locked. */
  3039. + /* repeat lock/do/unlock while
  3040. +
  3041. + (1) lock_carry_level() fails due to deadlock avoidance, or
  3042. +
  3043. + (2) carry_on_level() decides that more nodes have to
  3044. + be involved.
  3045. +
  3046. + (3) some unexpected error occurred while balancing on the
  3047. + upper levels. In this case all changes are rolled back.
  3048. +
  3049. + */
  3050. + while (1) {
  3051. + result = lock_carry_level(doing);
  3052. + if (result == 0) {
  3053. + /* perform operations from @doing and
  3054. + accumulate new requests in @todo */
  3055. + result = carry_on_level(doing, todo);
  3056. + if (result == 0)
  3057. + break;
  3058. + else if (result != -E_REPEAT ||
  3059. + !doing->restartable) {
  3060. + warning("nikita-1043",
  3061. + "Fatal error during carry: %i",
  3062. + result);
  3063. + print_level("done", done);
  3064. + print_level("doing", doing);
  3065. + print_level("todo", todo);
  3066. + /* do some rough stuff like aborting
  3067. + all pending transcrashes and thus
  3068. + pushing tree back to the consistent
  3069. + state. Alternatvely, just panic.
  3070. + */
  3071. + fatal_carry_error(doing, result);
  3072. + return result;
  3073. + }
  3074. + } else if (result != -E_REPEAT) {
  3075. + fatal_carry_error(doing, result);
  3076. + return result;
  3077. + }
  3078. + unlock_carry_level(doing, 1);
  3079. + }
  3080. + /* at this point @done can be safely unlocked */
  3081. + done_carry_level(done);
  3082. +
  3083. + /* cyclically shift queues */
  3084. + tmp = done;
  3085. + done = doing;
  3086. + doing = todo;
  3087. + todo = tmp;
  3088. + init_carry_level(todo, doing->pool);
  3089. +
  3090. + /* give other threads chance to run */
  3091. + reiser4_preempt_point();
  3092. + }
  3093. + get_current_context()->gfp_mask = old_mask;
  3094. + done_carry_level(done);
  3095. +
  3096. + /* all counters, but x_refs should remain the same. x_refs can change
  3097. + owing to transaction manager */
  3098. + ON_DEBUG(CHECK_COUNTERS);
  3099. + return result;
  3100. +}
  3101. +
  3102. +/* perform carry operations on given level.
  3103. +
  3104. + Optimizations proposed by pooh:
  3105. +
  3106. + (1) don't lock all nodes from queue at the same time. Lock nodes lazily as
  3107. + required;
  3108. +
  3109. + (2) unlock node if there are no more operations to be performed upon it and
  3110. + node didn't add any operation to @todo. This can be implemented by
  3111. + attaching to each node two counters: counter of operaions working on this
  3112. + node and counter and operations carried upward from this node.
  3113. +
  3114. +*/
  3115. +static int carry_on_level(carry_level * doing /* queue of carry operations to
  3116. + * do on this level */ ,
  3117. + carry_level * todo /* queue where new carry
  3118. + * operations to be performed on
  3119. + * the * parent level are
  3120. + * accumulated during @doing
  3121. + * processing. */ )
  3122. +{
  3123. + int result;
  3124. + int (*f) (carry_op *, carry_level *, carry_level *);
  3125. + carry_op *op;
  3126. + carry_op *tmp_op;
  3127. +
  3128. + assert("nikita-1034", doing != NULL);
  3129. + assert("nikita-1035", todo != NULL);
  3130. +
  3131. + /* @doing->nodes are locked. */
  3132. +
  3133. + /* This function can be split into two phases: analysis and modification
  3134. +
  3135. + Analysis calculates precisely what items should be moved between
  3136. + nodes. This information is gathered in some structures attached to
  3137. + each carry_node in a @doing queue. Analysis also determines whether
  3138. + new nodes are to be allocated etc.
  3139. +
  3140. + After analysis is completed, actual modification is performed. Here
  3141. + we can take advantage of "batch modification": if there are several
  3142. + operations acting on the same node, modifications can be performed
  3143. + more efficiently when batched together.
  3144. +
  3145. + Above is an optimization left for the future.
  3146. + */
  3147. + /* Important, but delayed optimization: it's possible to batch
  3148. + operations together and perform them more efficiently as a
  3149. + result. For example, deletion of several neighboring items from a
  3150. + node can be converted to a single ->cut() operation.
  3151. +
  3152. + Before processing queue, it should be scanned and "mergeable"
  3153. + operations merged.
  3154. + */
  3155. + result = 0;
  3156. + for_all_ops(doing, op, tmp_op) {
  3157. + carry_opcode opcode;
  3158. +
  3159. + assert("nikita-1041", op != NULL);
  3160. + opcode = op->op;
  3161. + assert("nikita-1042", op->op < COP_LAST_OP);
  3162. + f = op_dispatch_table[op->op].handler;
  3163. + result = f(op, doing, todo);
  3164. + /* locking can fail with -E_REPEAT. Any different error is fatal
  3165. + and will be handled by fatal_carry_error() sledgehammer.
  3166. + */
  3167. + if (result != 0)
  3168. + break;
  3169. + }
  3170. + if (result == 0) {
  3171. + carry_plugin_info info;
  3172. + carry_node *scan;
  3173. + carry_node *tmp_scan;
  3174. +
  3175. + info.doing = doing;
  3176. + info.todo = todo;
  3177. +
  3178. + assert("nikita-3002",
  3179. + carry_level_invariant(doing, CARRY_DOING));
  3180. + for_all_nodes(doing, scan, tmp_scan) {
  3181. + znode *node;
  3182. +
  3183. + node = reiser4_carry_real(scan);
  3184. + assert("nikita-2547", node != NULL);
  3185. + if (node_is_empty(node)) {
  3186. + result =
  3187. + node_plugin_by_node(node)->
  3188. + prepare_removal(node, &info);
  3189. + if (result != 0)
  3190. + break;
  3191. + }
  3192. + }
  3193. + }
  3194. + return result;
  3195. +}
  3196. +
  3197. +/* post carry operation
  3198. +
  3199. + This is main function used by external carry clients: node layout plugins
  3200. + and tree operations to create new carry operation to be performed on some
  3201. + level.
  3202. +
  3203. + New operation will be included in the @level queue. To actually perform it,
  3204. + call carry( level, ... ). This function takes write lock on @node. Carry
  3205. + manages all its locks by itself, don't worry about this.
  3206. +
  3207. + This function adds operation and node at the end of the queue. It is up to
  3208. + caller to guarantee proper ordering of node queue.
  3209. +
  3210. +*/
  3211. +carry_op * reiser4_post_carry(carry_level * level /* queue where new operation
  3212. + * is to be posted at */ ,
  3213. + carry_opcode op /* opcode of operation */ ,
  3214. + znode * node /* node on which this operation
  3215. + * will operate */ ,
  3216. + int apply_to_parent_p /* whether operation will
  3217. + * operate directly on @node
  3218. + * or on it parent. */)
  3219. +{
  3220. + carry_op *result;
  3221. + carry_node *child;
  3222. +
  3223. + assert("nikita-1046", level != NULL);
  3224. + assert("nikita-1788", znode_is_write_locked(node));
  3225. +
  3226. + result = add_op(level, POOLO_LAST, NULL);
  3227. + if (IS_ERR(result))
  3228. + return result;
  3229. + child = reiser4_add_carry(level, POOLO_LAST, NULL);
  3230. + if (IS_ERR(child)) {
  3231. + reiser4_pool_free(&level->pool->op_pool, &result->header);
  3232. + return (carry_op *) child;
  3233. + }
  3234. + result->node = child;
  3235. + result->op = op;
  3236. + child->parent = apply_to_parent_p;
  3237. + if (ZF_ISSET(node, JNODE_ORPHAN))
  3238. + child->left_before = 1;
  3239. + child->node = node;
  3240. + return result;
  3241. +}
  3242. +
  3243. +/* initialize carry queue */
  3244. +void init_carry_level(carry_level * level /* level to initialize */ ,
  3245. + carry_pool * pool /* pool @level will allocate objects
  3246. + * from */ )
  3247. +{
  3248. + assert("nikita-1045", level != NULL);
  3249. + assert("nikita-967", pool != NULL);
  3250. +
  3251. + memset(level, 0, sizeof *level);
  3252. + level->pool = pool;
  3253. +
  3254. + INIT_LIST_HEAD(&level->nodes);
  3255. + INIT_LIST_HEAD(&level->ops);
  3256. +}
  3257. +
  3258. +/* allocate carry pool and initialize pools within queue */
  3259. +carry_pool *init_carry_pool(int size)
  3260. +{
  3261. + carry_pool *pool;
  3262. +
  3263. + assert("", size >= sizeof(carry_pool) + 3 * sizeof(carry_level));
  3264. + pool = kmalloc(size, reiser4_ctx_gfp_mask_get());
  3265. + if (pool == NULL)
  3266. + return ERR_PTR(RETERR(-ENOMEM));
  3267. +
  3268. + reiser4_init_pool(&pool->op_pool, sizeof(carry_op), CARRIES_POOL_SIZE,
  3269. + (char *)pool->op);
  3270. + reiser4_init_pool(&pool->node_pool, sizeof(carry_node),
  3271. + NODES_LOCKED_POOL_SIZE, (char *)pool->node);
  3272. + return pool;
  3273. +}
  3274. +
  3275. +/* finish with queue pools */
  3276. +void done_carry_pool(carry_pool * pool/* pool to destroy */)
  3277. +{
  3278. + reiser4_done_pool(&pool->op_pool);
  3279. + reiser4_done_pool(&pool->node_pool);
  3280. + kfree(pool);
  3281. +}
  3282. +
  3283. +/* add new carry node to the @level.
  3284. +
  3285. + Returns pointer to the new carry node allocated from pool. It's up to
  3286. + callers to maintain proper order in the @level. Assumption is that if carry
  3287. + nodes on one level are already sorted and modifications are peroformed from
  3288. + left to right, carry nodes added on the parent level will be ordered
  3289. + automatically. To control ordering use @order and @reference parameters.
  3290. +
  3291. +*/
  3292. +carry_node *reiser4_add_carry_skip(carry_level * level /* &carry_level to add
  3293. + * node to */ ,
  3294. + pool_ordering order /* where to insert:
  3295. + * at the beginning of
  3296. + * @level,
  3297. + * before @reference,
  3298. + * after @reference,
  3299. + * at the end of @level
  3300. + */ ,
  3301. + carry_node * reference/* reference node for
  3302. + * insertion */)
  3303. +{
  3304. + ON_DEBUG(carry_node * orig_ref = reference);
  3305. +
  3306. + if (order == POOLO_BEFORE) {
  3307. + reference = find_left_carry(reference, level);
  3308. + if (reference == NULL)
  3309. + reference = list_entry(level->nodes.next, carry_node,
  3310. + header.level_linkage);
  3311. + else
  3312. + reference = list_entry(reference->header.level_linkage.next,
  3313. + carry_node, header.level_linkage);
  3314. + } else if (order == POOLO_AFTER) {
  3315. + reference = find_right_carry(reference, level);
  3316. + if (reference == NULL)
  3317. + reference = list_entry(level->nodes.prev, carry_node,
  3318. + header.level_linkage);
  3319. + else
  3320. + reference = list_entry(reference->header.level_linkage.prev,
  3321. + carry_node, header.level_linkage);
  3322. + }
  3323. + assert("nikita-2209",
  3324. + ergo(orig_ref != NULL,
  3325. + reiser4_carry_real(reference) ==
  3326. + reiser4_carry_real(orig_ref)));
  3327. + return reiser4_add_carry(level, order, reference);
  3328. +}
  3329. +
  3330. +carry_node *reiser4_add_carry(carry_level * level, /* carry_level to add
  3331. + node to */
  3332. + pool_ordering order, /* where to insert:
  3333. + * at the beginning of
  3334. + * @level;
  3335. + * before @reference;
  3336. + * after @reference;
  3337. + * at the end of @level
  3338. + */
  3339. + carry_node * reference /* reference node for
  3340. + * insertion */)
  3341. +{
  3342. + carry_node *result;
  3343. +
  3344. + result =
  3345. + (carry_node *) reiser4_add_obj(&level->pool->node_pool,
  3346. + &level->nodes,
  3347. + order, &reference->header);
  3348. + if (!IS_ERR(result) && (result != NULL))
  3349. + ++level->nodes_num;
  3350. + return result;
  3351. +}
  3352. +
  3353. +/**
  3354. + * add new carry operation to the @level.
  3355. + *
  3356. + * Returns pointer to the new carry operations allocated from pool. It's up to
  3357. + * callers to maintain proper order in the @level. To control ordering use
  3358. + * @order and @reference parameters.
  3359. + */
  3360. +static carry_op *add_op(carry_level * level, /* &carry_level to add node to */
  3361. + pool_ordering order, /* where to insert:
  3362. + * at the beginning of @level;
  3363. + * before @reference;
  3364. + * after @reference;
  3365. + * at the end of @level */
  3366. + carry_op * reference /* reference node for insertion */)
  3367. +{
  3368. + carry_op *result;
  3369. +
  3370. + result =
  3371. + (carry_op *) reiser4_add_obj(&level->pool->op_pool, &level->ops,
  3372. + order, &reference->header);
  3373. + if (!IS_ERR(result) && (result != NULL))
  3374. + ++level->ops_num;
  3375. + return result;
  3376. +}
  3377. +
  3378. +/**
  3379. + * Return node on the right of which @node was created.
  3380. + *
  3381. + * Each node is created on the right of some existing node (or it is new root,
  3382. + * which is special case not handled here).
  3383. + *
  3384. + * @node is new node created on some level, but not yet inserted into its
  3385. + * parent, it has corresponding bit (JNODE_ORPHAN) set in zstate.
  3386. + */
  3387. +static carry_node *find_begetting_brother(carry_node * node,/* node to start
  3388. + search from */
  3389. + carry_level * kin UNUSED_ARG
  3390. + /* level to scan */)
  3391. +{
  3392. + carry_node *scan;
  3393. +
  3394. + assert("nikita-1614", node != NULL);
  3395. + assert("nikita-1615", kin != NULL);
  3396. + assert("nikita-1616", LOCK_CNT_GTZ(rw_locked_tree));
  3397. + assert("nikita-1619", ergo(reiser4_carry_real(node) != NULL,
  3398. + ZF_ISSET(reiser4_carry_real(node),
  3399. + JNODE_ORPHAN)));
  3400. + for (scan = node;;
  3401. + scan = list_entry(scan->header.level_linkage.prev, carry_node,
  3402. + header.level_linkage)) {
  3403. + assert("nikita-1617", &kin->nodes != &scan->header.level_linkage);
  3404. + if ((scan->node != node->node) &&
  3405. + !ZF_ISSET(scan->node, JNODE_ORPHAN)) {
  3406. + assert("nikita-1618", reiser4_carry_real(scan) != NULL);
  3407. + break;
  3408. + }
  3409. + }
  3410. + return scan;
  3411. +}
  3412. +
  3413. +static cmp_t
  3414. +carry_node_cmp(carry_level * level, carry_node * n1, carry_node * n2)
  3415. +{
  3416. + assert("nikita-2199", n1 != NULL);
  3417. + assert("nikita-2200", n2 != NULL);
  3418. +
  3419. + if (n1 == n2)
  3420. + return EQUAL_TO;
  3421. + while (1) {
  3422. + n1 = carry_node_next(n1);
  3423. + if (carry_node_end(level, n1))
  3424. + return GREATER_THAN;
  3425. + if (n1 == n2)
  3426. + return LESS_THAN;
  3427. + }
  3428. + impossible("nikita-2201", "End of level reached");
  3429. +}
  3430. +
  3431. +carry_node *find_carry_node(carry_level * level, const znode * node)
  3432. +{
  3433. + carry_node *scan;
  3434. + carry_node *tmp_scan;
  3435. +
  3436. + assert("nikita-2202", level != NULL);
  3437. + assert("nikita-2203", node != NULL);
  3438. +
  3439. + for_all_nodes(level, scan, tmp_scan) {
  3440. + if (reiser4_carry_real(scan) == node)
  3441. + return scan;
  3442. + }
  3443. + return NULL;
  3444. +}
  3445. +
  3446. +znode *reiser4_carry_real(const carry_node * node)
  3447. +{
  3448. + assert("nikita-3061", node != NULL);
  3449. +
  3450. + return node->lock_handle.node;
  3451. +}
  3452. +
  3453. +carry_node *insert_carry_node(carry_level * doing, carry_level * todo,
  3454. + const znode * node)
  3455. +{
  3456. + carry_node *base;
  3457. + carry_node *scan;
  3458. + carry_node *tmp_scan;
  3459. + carry_node *proj;
  3460. +
  3461. + base = find_carry_node(doing, node);
  3462. + assert("nikita-2204", base != NULL);
  3463. +
  3464. + for_all_nodes(todo, scan, tmp_scan) {
  3465. + proj = find_carry_node(doing, scan->node);
  3466. + assert("nikita-2205", proj != NULL);
  3467. + if (carry_node_cmp(doing, proj, base) != LESS_THAN)
  3468. + break;
  3469. + }
  3470. + return scan;
  3471. +}
  3472. +
  3473. +static carry_node *add_carry_atplace(carry_level * doing, carry_level * todo,
  3474. + znode * node)
  3475. +{
  3476. + carry_node *reference;
  3477. +
  3478. + assert("nikita-2994", doing != NULL);
  3479. + assert("nikita-2995", todo != NULL);
  3480. + assert("nikita-2996", node != NULL);
  3481. +
  3482. + reference = insert_carry_node(doing, todo, node);
  3483. + assert("nikita-2997", reference != NULL);
  3484. +
  3485. + return reiser4_add_carry(todo, POOLO_BEFORE, reference);
  3486. +}
  3487. +
  3488. +/* like reiser4_post_carry(), but designed to be called from node plugin
  3489. + methods. This function is different from reiser4_post_carry() in that it
  3490. + finds proper place to insert node in the queue. */
  3491. +carry_op *node_post_carry(carry_plugin_info * info /* carry parameters
  3492. + * passed down to node
  3493. + * plugin */ ,
  3494. + carry_opcode op /* opcode of operation */ ,
  3495. + znode * node /* node on which this
  3496. + * operation will operate */ ,
  3497. + int apply_to_parent_p /* whether operation will
  3498. + * operate directly on @node
  3499. + * or on it parent. */ )
  3500. +{
  3501. + carry_op *result;
  3502. + carry_node *child;
  3503. +
  3504. + assert("nikita-2207", info != NULL);
  3505. + assert("nikita-2208", info->todo != NULL);
  3506. +
  3507. + if (info->doing == NULL)
  3508. + return reiser4_post_carry(info->todo, op, node,
  3509. + apply_to_parent_p);
  3510. +
  3511. + result = add_op(info->todo, POOLO_LAST, NULL);
  3512. + if (IS_ERR(result))
  3513. + return result;
  3514. + child = add_carry_atplace(info->doing, info->todo, node);
  3515. + if (IS_ERR(child)) {
  3516. + reiser4_pool_free(&info->todo->pool->op_pool, &result->header);
  3517. + return (carry_op *) child;
  3518. + }
  3519. + result->node = child;
  3520. + result->op = op;
  3521. + child->parent = apply_to_parent_p;
  3522. + if (ZF_ISSET(node, JNODE_ORPHAN))
  3523. + child->left_before = 1;
  3524. + child->node = node;
  3525. + return result;
  3526. +}
  3527. +
  3528. +/* lock all carry nodes in @level */
  3529. +static int lock_carry_level(carry_level * level/* level to lock */)
  3530. +{
  3531. + int result;
  3532. + carry_node *node;
  3533. + carry_node *tmp_node;
  3534. +
  3535. + assert("nikita-881", level != NULL);
  3536. + assert("nikita-2229", carry_level_invariant(level, CARRY_TODO));
  3537. +
  3538. + /* lock nodes from left to right */
  3539. + result = 0;
  3540. + for_all_nodes(level, node, tmp_node) {
  3541. + result = lock_carry_node(level, node);
  3542. + if (result != 0)
  3543. + break;
  3544. + }
  3545. + return result;
  3546. +}
  3547. +
  3548. +/* Synchronize delimiting keys between @node and its left neighbor.
  3549. +
  3550. + To reduce contention on dk key and simplify carry code, we synchronize
  3551. + delimiting keys only when carry ultimately leaves tree level (carrying
  3552. + changes upward) and unlocks nodes at this level.
  3553. +
  3554. + This function first finds left neighbor of @node and then updates left
  3555. + neighbor's right delimiting key to conincide with least key in @node.
  3556. +
  3557. +*/
  3558. +
  3559. +ON_DEBUG(extern atomic_t delim_key_version;
  3560. + )
  3561. +
  3562. +static void sync_dkeys(znode * spot/* node to update */)
  3563. +{
  3564. + reiser4_key pivot;
  3565. + reiser4_tree *tree;
  3566. +
  3567. + assert("nikita-1610", spot != NULL);
  3568. + assert("nikita-1612", LOCK_CNT_NIL(rw_locked_dk));
  3569. +
  3570. + tree = znode_get_tree(spot);
  3571. + read_lock_tree(tree);
  3572. + write_lock_dk(tree);
  3573. +
  3574. + assert("nikita-2192", znode_is_loaded(spot));
  3575. +
  3576. + /* sync left delimiting key of @spot with key in its leftmost item */
  3577. + if (node_is_empty(spot))
  3578. + pivot = *znode_get_rd_key(spot);
  3579. + else
  3580. + leftmost_key_in_node(spot, &pivot);
  3581. +
  3582. + znode_set_ld_key(spot, &pivot);
  3583. +
  3584. + /* there can be sequence of empty nodes pending removal on the left of
  3585. + @spot. Scan them and update their left and right delimiting keys to
  3586. + match left delimiting key of @spot. Also, update right delimiting
  3587. + key of first non-empty left neighbor.
  3588. + */
  3589. + while (1) {
  3590. + if (!ZF_ISSET(spot, JNODE_LEFT_CONNECTED))
  3591. + break;
  3592. +
  3593. + spot = spot->left;
  3594. + if (spot == NULL)
  3595. + break;
  3596. +
  3597. + znode_set_rd_key(spot, &pivot);
  3598. + /* don't sink into the domain of another balancing */
  3599. + if (!znode_is_write_locked(spot))
  3600. + break;
  3601. + if (ZF_ISSET(spot, JNODE_HEARD_BANSHEE))
  3602. + znode_set_ld_key(spot, &pivot);
  3603. + else
  3604. + break;
  3605. + }
  3606. +
  3607. + write_unlock_dk(tree);
  3608. + read_unlock_tree(tree);
  3609. +}
  3610. +
  3611. +/* unlock all carry nodes in @level */
  3612. +static void unlock_carry_level(carry_level * level /* level to unlock */ ,
  3613. + int failure /* true if unlocking owing to
  3614. + * failure */ )
  3615. +{
  3616. + carry_node *node;
  3617. + carry_node *tmp_node;
  3618. +
  3619. + assert("nikita-889", level != NULL);
  3620. +
  3621. + if (!failure) {
  3622. + znode *spot;
  3623. +
  3624. + spot = NULL;
  3625. + /* update delimiting keys */
  3626. + for_all_nodes(level, node, tmp_node) {
  3627. + if (reiser4_carry_real(node) != spot) {
  3628. + spot = reiser4_carry_real(node);
  3629. + sync_dkeys(spot);
  3630. + }
  3631. + }
  3632. + }
  3633. +
  3634. + /* nodes can be unlocked in arbitrary order. In preemptible
  3635. + environment it's better to unlock in reverse order of locking,
  3636. + though.
  3637. + */
  3638. + for_all_nodes_back(level, node, tmp_node) {
  3639. + /* all allocated nodes should be already linked to their
  3640. + parents at this moment. */
  3641. + assert("nikita-1631",
  3642. + ergo(!failure, !ZF_ISSET(reiser4_carry_real(node),
  3643. + JNODE_ORPHAN)));
  3644. + ON_DEBUG(check_dkeys(reiser4_carry_real(node)));
  3645. + unlock_carry_node(level, node, failure);
  3646. + }
  3647. + level->new_root = NULL;
  3648. +}
  3649. +
  3650. +/* finish with @level
  3651. +
  3652. + Unlock nodes and release all allocated resources */
  3653. +static void done_carry_level(carry_level * level/* level to finish */)
  3654. +{
  3655. + carry_node *node;
  3656. + carry_node *tmp_node;
  3657. + carry_op *op;
  3658. + carry_op *tmp_op;
  3659. +
  3660. + assert("nikita-1076", level != NULL);
  3661. +
  3662. + unlock_carry_level(level, 0);
  3663. + for_all_nodes(level, node, tmp_node) {
  3664. + assert("nikita-2113", list_empty_careful(&node->lock_handle.locks_link));
  3665. + assert("nikita-2114", list_empty_careful(&node->lock_handle.owners_link));
  3666. + reiser4_pool_free(&level->pool->node_pool, &node->header);
  3667. + }
  3668. + for_all_ops(level, op, tmp_op)
  3669. + reiser4_pool_free(&level->pool->op_pool, &op->header);
  3670. +}
  3671. +
  3672. +/* helper function to complete locking of carry node
  3673. +
  3674. + Finish locking of carry node. There are several ways in which new carry
  3675. + node can be added into carry level and locked. Normal is through
  3676. + lock_carry_node(), but also from find_{left|right}_neighbor(). This
  3677. + function factors out common final part of all locking scenarios. It
  3678. + supposes that @node -> lock_handle is lock handle for lock just taken and
  3679. + fills ->real_node from this lock handle.
  3680. +
  3681. +*/
  3682. +int lock_carry_node_tail(carry_node * node/* node to complete locking of */)
  3683. +{
  3684. + assert("nikita-1052", node != NULL);
  3685. + assert("nikita-1187", reiser4_carry_real(node) != NULL);
  3686. + assert("nikita-1188", !node->unlock);
  3687. +
  3688. + node->unlock = 1;
  3689. + /* Load node content into memory and install node plugin by
  3690. + looking at the node header.
  3691. +
  3692. + Most of the time this call is cheap because the node is
  3693. + already in memory.
  3694. +
  3695. + Corresponding zrelse() is in unlock_carry_node()
  3696. + */
  3697. + return zload(reiser4_carry_real(node));
  3698. +}
  3699. +
  3700. +/* lock carry node
  3701. +
  3702. + "Resolve" node to real znode, lock it and mark as locked.
  3703. + This requires recursive locking of znodes.
  3704. +
  3705. + When operation is posted to the parent level, node it will be applied to is
  3706. + not yet known. For example, when shifting data between two nodes,
  3707. + delimiting has to be updated in parent or parents of nodes involved. But
  3708. + their parents is not yet locked and, moreover said nodes can be reparented
  3709. + by concurrent balancing.
  3710. +
  3711. + To work around this, carry operation is applied to special "carry node"
  3712. + rather than to the znode itself. Carry node consists of some "base" or
  3713. + "reference" znode and flags indicating how to get to the target of carry
  3714. + operation (->real_node field of carry_node) from base.
  3715. +
  3716. +*/
  3717. +int lock_carry_node(carry_level * level /* level @node is in */ ,
  3718. + carry_node * node/* node to lock */)
  3719. +{
  3720. + int result;
  3721. + znode *reference_point;
  3722. + lock_handle lh;
  3723. + lock_handle tmp_lh;
  3724. + reiser4_tree *tree;
  3725. +
  3726. + assert("nikita-887", level != NULL);
  3727. + assert("nikita-882", node != NULL);
  3728. +
  3729. + result = 0;
  3730. + reference_point = node->node;
  3731. + init_lh(&lh);
  3732. + init_lh(&tmp_lh);
  3733. + if (node->left_before) {
  3734. + /* handling of new nodes, allocated on the previous level:
  3735. +
  3736. + some carry ops were propably posted from the new node, but
  3737. + this node neither has parent pointer set, nor is
  3738. + connected. This will be done in ->create_hook() for
  3739. + internal item.
  3740. +
  3741. + No then less, parent of new node has to be locked. To do
  3742. + this, first go to the "left" in the carry order. This
  3743. + depends on the decision to always allocate new node on the
  3744. + right of existing one.
  3745. +
  3746. + Loop handles case when multiple nodes, all orphans, were
  3747. + inserted.
  3748. +
  3749. + Strictly speaking, taking tree lock is not necessary here,
  3750. + because all nodes scanned by loop in
  3751. + find_begetting_brother() are write-locked by this thread,
  3752. + and thus, their sibling linkage cannot change.
  3753. +
  3754. + */
  3755. + tree = znode_get_tree(reference_point);
  3756. + read_lock_tree(tree);
  3757. + reference_point = find_begetting_brother(node, level)->node;
  3758. + read_unlock_tree(tree);
  3759. + assert("nikita-1186", reference_point != NULL);
  3760. + }
  3761. + if (node->parent && (result == 0)) {
  3762. + result =
  3763. + reiser4_get_parent(&tmp_lh, reference_point,
  3764. + ZNODE_WRITE_LOCK);
  3765. + if (result != 0) {
  3766. + ; /* nothing */
  3767. + } else if (znode_get_level(tmp_lh.node) == 0) {
  3768. + assert("nikita-1347", znode_above_root(tmp_lh.node));
  3769. + result = add_new_root(level, node, tmp_lh.node);
  3770. + if (result == 0) {
  3771. + reference_point = level->new_root;
  3772. + move_lh(&lh, &node->lock_handle);
  3773. + }
  3774. + } else if ((level->new_root != NULL)
  3775. + && (level->new_root !=
  3776. + znode_parent_nolock(reference_point))) {
  3777. + /* parent of node exists, but this level aready
  3778. + created different new root, so */
  3779. + warning("nikita-1109",
  3780. + /* it should be "radicis", but tradition is
  3781. + tradition. do banshees read latin? */
  3782. + "hodie natus est radici frater");
  3783. + result = -EIO;
  3784. + } else {
  3785. + move_lh(&lh, &tmp_lh);
  3786. + reference_point = lh.node;
  3787. + }
  3788. + }
  3789. + if (node->left && (result == 0)) {
  3790. + assert("nikita-1183", node->parent);
  3791. + assert("nikita-883", reference_point != NULL);
  3792. + result =
  3793. + reiser4_get_left_neighbor(&tmp_lh, reference_point,
  3794. + ZNODE_WRITE_LOCK,
  3795. + GN_CAN_USE_UPPER_LEVELS);
  3796. + if (result == 0) {
  3797. + done_lh(&lh);
  3798. + move_lh(&lh, &tmp_lh);
  3799. + reference_point = lh.node;
  3800. + }
  3801. + }
  3802. + if (!node->parent && !node->left && !node->left_before) {
  3803. + result =
  3804. + longterm_lock_znode(&lh, reference_point, ZNODE_WRITE_LOCK,
  3805. + ZNODE_LOCK_HIPRI);
  3806. + }
  3807. + if (result == 0) {
  3808. + move_lh(&node->lock_handle, &lh);
  3809. + result = lock_carry_node_tail(node);
  3810. + }
  3811. + done_lh(&tmp_lh);
  3812. + done_lh(&lh);
  3813. + return result;
  3814. +}
  3815. +
  3816. +/* release a lock on &carry_node.
  3817. +
  3818. + Release if necessary lock on @node. This opearion is pair of
  3819. + lock_carry_node() and is idempotent: you can call it more than once on the
  3820. + same node.
  3821. +
  3822. +*/
  3823. +static void
  3824. +unlock_carry_node(carry_level * level,
  3825. + carry_node * node /* node to be released */ ,
  3826. + int failure /* 0 if node is unlocked due
  3827. + * to some error */ )
  3828. +{
  3829. + znode *real_node;
  3830. +
  3831. + assert("nikita-884", node != NULL);
  3832. +
  3833. + real_node = reiser4_carry_real(node);
  3834. + /* pair to zload() in lock_carry_node_tail() */
  3835. + zrelse(real_node);
  3836. + if (node->unlock && (real_node != NULL)) {
  3837. + assert("nikita-899", real_node == node->lock_handle.node);
  3838. + longterm_unlock_znode(&node->lock_handle);
  3839. + }
  3840. + if (failure) {
  3841. + if (node->deallocate && (real_node != NULL)) {
  3842. + /* free node in bitmap
  3843. +
  3844. + Prepare node for removal. Last zput() will finish
  3845. + with it.
  3846. + */
  3847. + ZF_SET(real_node, JNODE_HEARD_BANSHEE);
  3848. + }
  3849. + if (node->free) {
  3850. + assert("nikita-2177",
  3851. + list_empty_careful(&node->lock_handle.locks_link));
  3852. + assert("nikita-2112",
  3853. + list_empty_careful(&node->lock_handle.owners_link));
  3854. + reiser4_pool_free(&level->pool->node_pool,
  3855. + &node->header);
  3856. + }
  3857. + }
  3858. +}
  3859. +
  3860. +/* fatal_carry_error() - all-catching error handling function
  3861. +
  3862. + It is possible that carry faces unrecoverable error, like unability to
  3863. + insert pointer at the internal level. Our simple solution is just panic in
  3864. + this situation. More sophisticated things like attempt to remount
  3865. + file-system as read-only can be implemented without much difficlties.
  3866. +
  3867. + It is believed, that:
  3868. +
  3869. + 1. in stead of panicking, all current transactions can be aborted rolling
  3870. + system back to the consistent state.
  3871. +
  3872. +Umm, if you simply panic without doing anything more at all, then all current
  3873. +transactions are aborted and the system is rolled back to a consistent state,
  3874. +by virtue of the design of the transactional mechanism. Well, wait, let's be
  3875. +precise. If an internal node is corrupted on disk due to hardware failure,
  3876. +then there may be no consistent state that can be rolled back to, so instead
  3877. +we should say that it will rollback the transactions, which barring other
  3878. +factors means rolling back to a consistent state.
  3879. +
  3880. +# Nikita: there is a subtle difference between panic and aborting
  3881. +# transactions: machine doesn't reboot. Processes aren't killed. Processes
  3882. +# don't using reiser4 (not that we care about such processes), or using other
  3883. +# reiser4 mounts (about them we do care) will simply continue to run. With
  3884. +# some luck, even application using aborted file system can survive: it will
  3885. +# get some error, like EBADF, from each file descriptor on failed file system,
  3886. +# but applications that do care about tolerance will cope with this (squid
  3887. +# will).
  3888. +
  3889. +It would be a nice feature though to support rollback without rebooting
  3890. +followed by remount, but this can wait for later versions.
  3891. +
  3892. + 2. once isolated transactions will be implemented it will be possible to
  3893. + roll back offending transaction.
  3894. +
  3895. +2. is additional code complexity of inconsistent value (it implies that a
  3896. +broken tree should be kept in operation), so we must think about it more
  3897. +before deciding if it should be done. -Hans
  3898. +
  3899. +*/
  3900. +static void fatal_carry_error(carry_level * doing UNUSED_ARG /* carry level
  3901. + * where
  3902. + * unrecoverable
  3903. + * error
  3904. + * occurred */ ,
  3905. + int ecode/* error code */)
  3906. +{
  3907. + assert("nikita-1230", doing != NULL);
  3908. + assert("nikita-1231", ecode < 0);
  3909. +
  3910. + reiser4_panic("nikita-1232", "Carry failed: %i", ecode);
  3911. +}
  3912. +
  3913. +/**
  3914. + * Add new root to the tree
  3915. + *
  3916. + * This function itself only manages changes in carry structures and delegates
  3917. + * all hard work (allocation of znode for new root, changes of parent and
  3918. + * sibling pointers) to the reiser4_add_tree_root().
  3919. + *
  3920. + * Locking: old tree root is locked by carry at this point. Fake znode is also
  3921. + * locked.
  3922. + */
  3923. +static int add_new_root(carry_level * level,/* carry level in context of which
  3924. + * operation is performed */
  3925. + carry_node * node, /* carry node for existing root */
  3926. + znode * fake /* "fake" znode already locked by
  3927. + * us */)
  3928. +{
  3929. + int result;
  3930. +
  3931. + assert("nikita-1104", level != NULL);
  3932. + assert("nikita-1105", node != NULL);
  3933. +
  3934. + assert("nikita-1403", znode_is_write_locked(node->node));
  3935. + assert("nikita-1404", znode_is_write_locked(fake));
  3936. +
  3937. + /* trying to create new root. */
  3938. + /* @node is root and it's already locked by us. This
  3939. + means that nobody else can be trying to add/remove
  3940. + tree root right now.
  3941. + */
  3942. + if (level->new_root == NULL)
  3943. + level->new_root = reiser4_add_tree_root(node->node, fake);
  3944. + if (!IS_ERR(level->new_root)) {
  3945. + assert("nikita-1210", znode_is_root(level->new_root));
  3946. + node->deallocate = 1;
  3947. + result =
  3948. + longterm_lock_znode(&node->lock_handle, level->new_root,
  3949. + ZNODE_WRITE_LOCK, ZNODE_LOCK_LOPRI);
  3950. + if (result == 0)
  3951. + zput(level->new_root);
  3952. + } else {
  3953. + result = PTR_ERR(level->new_root);
  3954. + level->new_root = NULL;
  3955. + }
  3956. + return result;
  3957. +}
  3958. +
  3959. +/* allocate new znode and add the operation that inserts the
  3960. + pointer to it into the parent node into the todo level
  3961. +
  3962. + Allocate new znode, add it into carry queue and post into @todo queue
  3963. + request to add pointer to new node into its parent.
  3964. +
  3965. + This is carry related routing that calls reiser4_new_node() to allocate new
  3966. + node.
  3967. +*/
  3968. +carry_node *add_new_znode(znode * brother /* existing left neighbor of new
  3969. + * node */ ,
  3970. + carry_node * ref /* carry node after which new
  3971. + * carry node is to be inserted
  3972. + * into queue. This affects
  3973. + * locking. */ ,
  3974. + carry_level * doing /* carry queue where new node is
  3975. + * to be added */ ,
  3976. + carry_level * todo /* carry queue where COP_INSERT
  3977. + * operation to add pointer to
  3978. + * new node will ne added */ )
  3979. +{
  3980. + carry_node *fresh;
  3981. + znode *new_znode;
  3982. + carry_op *add_pointer;
  3983. + carry_plugin_info info;
  3984. +
  3985. + assert("nikita-1048", brother != NULL);
  3986. + assert("nikita-1049", todo != NULL);
  3987. +
  3988. + /* There is a lot of possible variations here: to what parent
  3989. + new node will be attached and where. For simplicity, always
  3990. + do the following:
  3991. +
  3992. + (1) new node and @brother will have the same parent.
  3993. +
  3994. + (2) new node is added on the right of @brother
  3995. +
  3996. + */
  3997. +
  3998. + fresh = reiser4_add_carry_skip(doing,
  3999. + ref ? POOLO_AFTER : POOLO_LAST, ref);
  4000. + if (IS_ERR(fresh))
  4001. + return fresh;
  4002. +
  4003. + fresh->deallocate = 1;
  4004. + fresh->free = 1;
  4005. +
  4006. + new_znode = reiser4_new_node(brother, znode_get_level(brother));
  4007. + if (IS_ERR(new_znode))
  4008. + /* @fresh will be deallocated automatically by error
  4009. + handling code in the caller. */
  4010. + return (carry_node *) new_znode;
  4011. +
  4012. + /* new_znode returned znode with x_count 1. Caller has to decrease
  4013. + it. make_space() does. */
  4014. +
  4015. + ZF_SET(new_znode, JNODE_ORPHAN);
  4016. + fresh->node = new_znode;
  4017. +
  4018. + while (ZF_ISSET(reiser4_carry_real(ref), JNODE_ORPHAN)) {
  4019. + ref = carry_node_prev(ref);
  4020. + assert("nikita-1606", !carry_node_end(doing, ref));
  4021. + }
  4022. +
  4023. + info.todo = todo;
  4024. + info.doing = doing;
  4025. + add_pointer = node_post_carry(&info, COP_INSERT,
  4026. + reiser4_carry_real(ref), 1);
  4027. + if (IS_ERR(add_pointer)) {
  4028. + /* no need to deallocate @new_znode here: it will be
  4029. + deallocated during carry error handling. */
  4030. + return (carry_node *) add_pointer;
  4031. + }
  4032. +
  4033. + add_pointer->u.insert.type = COPT_CHILD;
  4034. + add_pointer->u.insert.child = fresh;
  4035. + add_pointer->u.insert.brother = brother;
  4036. + /* initially new node spawns empty key range */
  4037. + write_lock_dk(znode_get_tree(brother));
  4038. + znode_set_ld_key(new_znode,
  4039. + znode_set_rd_key(new_znode,
  4040. + znode_get_rd_key(brother)));
  4041. + write_unlock_dk(znode_get_tree(brother));
  4042. + return fresh;
  4043. +}
  4044. +
  4045. +/* DEBUGGING FUNCTIONS.
  4046. +
  4047. + Probably we also should leave them on even when
  4048. + debugging is turned off to print dumps at errors.
  4049. +*/
  4050. +#if REISER4_DEBUG
  4051. +static int carry_level_invariant(carry_level * level, carry_queue_state state)
  4052. +{
  4053. + carry_node *node;
  4054. + carry_node *tmp_node;
  4055. +
  4056. + if (level == NULL)
  4057. + return 0;
  4058. +
  4059. + if (level->track_type != 0 &&
  4060. + level->track_type != CARRY_TRACK_NODE &&
  4061. + level->track_type != CARRY_TRACK_CHANGE)
  4062. + return 0;
  4063. +
  4064. + /* check that nodes are in ascending order */
  4065. + for_all_nodes(level, node, tmp_node) {
  4066. + znode *left;
  4067. + znode *right;
  4068. +
  4069. + reiser4_key lkey;
  4070. + reiser4_key rkey;
  4071. +
  4072. + if (node != carry_node_front(level)) {
  4073. + if (state == CARRY_TODO) {
  4074. + right = node->node;
  4075. + left = carry_node_prev(node)->node;
  4076. + } else {
  4077. + right = reiser4_carry_real(node);
  4078. + left = reiser4_carry_real(carry_node_prev(node));
  4079. + }
  4080. + if (right == NULL || left == NULL)
  4081. + continue;
  4082. + if (node_is_empty(right) || node_is_empty(left))
  4083. + continue;
  4084. + if (!keyle(leftmost_key_in_node(left, &lkey),
  4085. + leftmost_key_in_node(right, &rkey))) {
  4086. + warning("", "wrong key order");
  4087. + return 0;
  4088. + }
  4089. + }
  4090. + }
  4091. + return 1;
  4092. +}
  4093. +#endif
  4094. +
  4095. +/* get symbolic name for boolean */
  4096. +static const char *tf(int boolean/* truth value */)
  4097. +{
  4098. + return boolean ? "t" : "f";
  4099. +}
  4100. +
  4101. +/* symbolic name for carry operation */
  4102. +static const char *carry_op_name(carry_opcode op/* carry opcode */)
  4103. +{
  4104. + switch (op) {
  4105. + case COP_INSERT:
  4106. + return "COP_INSERT";
  4107. + case COP_DELETE:
  4108. + return "COP_DELETE";
  4109. + case COP_CUT:
  4110. + return "COP_CUT";
  4111. + case COP_PASTE:
  4112. + return "COP_PASTE";
  4113. + case COP_UPDATE:
  4114. + return "COP_UPDATE";
  4115. + case COP_EXTENT:
  4116. + return "COP_EXTENT";
  4117. + case COP_INSERT_FLOW:
  4118. + return "COP_INSERT_FLOW";
  4119. + default:{
  4120. + /* not mt safe, but who cares? */
  4121. + static char buf[20];
  4122. +
  4123. + sprintf(buf, "unknown op: %x", op);
  4124. + return buf;
  4125. + }
  4126. + }
  4127. +}
  4128. +
  4129. +/* dump information about carry node */
  4130. +static void print_carry(const char *prefix /* prefix to print */ ,
  4131. + carry_node * node/* node to print */)
  4132. +{
  4133. + if (node == NULL) {
  4134. + printk("%s: null\n", prefix);
  4135. + return;
  4136. + }
  4137. + printk
  4138. + ("%s: %p parent: %s, left: %s, unlock: %s, free: %s, dealloc: %s\n",
  4139. + prefix, node, tf(node->parent), tf(node->left), tf(node->unlock),
  4140. + tf(node->free), tf(node->deallocate));
  4141. +}
  4142. +
  4143. +/* dump information about carry operation */
  4144. +static void print_op(const char *prefix /* prefix to print */ ,
  4145. + carry_op * op/* operation to print */)
  4146. +{
  4147. + if (op == NULL) {
  4148. + printk("%s: null\n", prefix);
  4149. + return;
  4150. + }
  4151. + printk("%s: %p carry_opcode: %s\n", prefix, op, carry_op_name(op->op));
  4152. + print_carry("\tnode", op->node);
  4153. + switch (op->op) {
  4154. + case COP_INSERT:
  4155. + case COP_PASTE:
  4156. + print_coord("\tcoord",
  4157. + op->u.insert.d ? op->u.insert.d->coord : NULL, 0);
  4158. + reiser4_print_key("\tkey",
  4159. + op->u.insert.d ? op->u.insert.d->key : NULL);
  4160. + print_carry("\tchild", op->u.insert.child);
  4161. + break;
  4162. + case COP_DELETE:
  4163. + print_carry("\tchild", op->u.delete.child);
  4164. + break;
  4165. + case COP_CUT:
  4166. + if (op->u.cut_or_kill.is_cut) {
  4167. + print_coord("\tfrom",
  4168. + op->u.cut_or_kill.u.kill->params.from, 0);
  4169. + print_coord("\tto", op->u.cut_or_kill.u.kill->params.to,
  4170. + 0);
  4171. + } else {
  4172. + print_coord("\tfrom",
  4173. + op->u.cut_or_kill.u.cut->params.from, 0);
  4174. + print_coord("\tto", op->u.cut_or_kill.u.cut->params.to,
  4175. + 0);
  4176. + }
  4177. + break;
  4178. + case COP_UPDATE:
  4179. + print_carry("\tleft", op->u.update.left);
  4180. + break;
  4181. + default:
  4182. + /* do nothing */
  4183. + break;
  4184. + }
  4185. +}
  4186. +
  4187. +/* dump information about all nodes and operations in a @level */
  4188. +static void print_level(const char *prefix /* prefix to print */ ,
  4189. + carry_level * level/* level to print */)
  4190. +{
  4191. + carry_node *node;
  4192. + carry_node *tmp_node;
  4193. + carry_op *op;
  4194. + carry_op *tmp_op;
  4195. +
  4196. + if (level == NULL) {
  4197. + printk("%s: null\n", prefix);
  4198. + return;
  4199. + }
  4200. + printk("%s: %p, restartable: %s\n",
  4201. + prefix, level, tf(level->restartable));
  4202. +
  4203. + for_all_nodes(level, node, tmp_node)
  4204. + print_carry("\tcarry node", node);
  4205. + for_all_ops(level, op, tmp_op)
  4206. + print_op("\tcarry op", op);
  4207. +}
  4208. +
  4209. +/* Make Linus happy.
  4210. + Local variables:
  4211. + c-indentation-style: "K&R"
  4212. + mode-name: "LC"
  4213. + c-basic-offset: 8
  4214. + tab-width: 8
  4215. + fill-column: 120
  4216. + scroll-step: 1
  4217. + End:
  4218. +*/
  4219. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/carry.h linux-4.14.2/fs/reiser4/carry.h
  4220. --- linux-4.14.2.orig/fs/reiser4/carry.h 1970-01-01 01:00:00.000000000 +0100
  4221. +++ linux-4.14.2/fs/reiser4/carry.h 2017-11-26 22:13:09.000000000 +0100
  4222. @@ -0,0 +1,445 @@
  4223. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  4224. + reiser4/README */
  4225. +
  4226. +/* Functions and data types to "carry" tree modification(s) upward.
  4227. + See fs/reiser4/carry.c for details. */
  4228. +
  4229. +#if !defined(__FS_REISER4_CARRY_H__)
  4230. +#define __FS_REISER4_CARRY_H__
  4231. +
  4232. +#include "forward.h"
  4233. +#include "debug.h"
  4234. +#include "pool.h"
  4235. +#include "znode.h"
  4236. +
  4237. +#include <linux/types.h>
  4238. +
  4239. +/* &carry_node - "location" of carry node.
  4240. +
  4241. + "location" of node that is involved or going to be involved into
  4242. + carry process. Node where operation will be carried to on the
  4243. + parent level cannot be recorded explicitly. Operation will be carried
  4244. + usually to the parent of some node (where changes are performed at
  4245. + the current level) or, to the left neighbor of its parent. But while
  4246. + modifications are performed at the current level, parent may
  4247. + change. So, we have to allow some indirection (or, positevly,
  4248. + flexibility) in locating carry nodes.
  4249. +
  4250. +*/
  4251. +typedef struct carry_node {
  4252. + /* pool linkage */
  4253. + struct reiser4_pool_header header;
  4254. +
  4255. + /* base node from which real_node is calculated. See
  4256. + fs/reiser4/carry.c:lock_carry_node(). */
  4257. + znode *node;
  4258. +
  4259. + /* how to get ->real_node */
  4260. + /* to get ->real_node obtain parent of ->node */
  4261. + __u32 parent:1;
  4262. + /* to get ->real_node obtain left neighbor of parent of
  4263. + ->node */
  4264. + __u32 left:1;
  4265. + __u32 left_before:1;
  4266. +
  4267. + /* locking */
  4268. +
  4269. + /* this node was locked by carry process and should be
  4270. + unlocked when carry leaves a level */
  4271. + __u32 unlock:1;
  4272. +
  4273. + /* disk block for this node was allocated by carry process and
  4274. + should be deallocated when carry leaves a level */
  4275. + __u32 deallocate:1;
  4276. + /* this carry node was allocated by carry process and should be
  4277. + freed when carry leaves a level */
  4278. + __u32 free:1;
  4279. +
  4280. + /* type of lock we want to take on this node */
  4281. + lock_handle lock_handle;
  4282. +} carry_node;
  4283. +
  4284. +/* &carry_opcode - elementary operations that can be carried upward
  4285. +
  4286. + Operations that carry() can handle. This list is supposed to be
  4287. + expanded.
  4288. +
  4289. + Each carry operation (cop) is handled by appropriate function defined
  4290. + in fs/reiser4/carry.c. For example COP_INSERT is handled by
  4291. + fs/reiser4/carry.c:carry_insert() etc. These functions in turn
  4292. + call plugins of nodes affected by operation to modify nodes' content
  4293. + and to gather operations to be performed on the next level.
  4294. +
  4295. +*/
  4296. +typedef enum {
  4297. + /* insert new item into node. */
  4298. + COP_INSERT,
  4299. + /* delete pointer from parent node */
  4300. + COP_DELETE,
  4301. + /* remove part of or whole node. */
  4302. + COP_CUT,
  4303. + /* increase size of item. */
  4304. + COP_PASTE,
  4305. + /* insert extent (that is sequence of unformatted nodes). */
  4306. + COP_EXTENT,
  4307. + /* update delimiting key in least common ancestor of two
  4308. + nodes. This is performed when items are moved between two
  4309. + nodes.
  4310. + */
  4311. + COP_UPDATE,
  4312. + /* insert flow */
  4313. + COP_INSERT_FLOW,
  4314. + COP_LAST_OP,
  4315. +} carry_opcode;
  4316. +
  4317. +#define CARRY_FLOW_NEW_NODES_LIMIT 20
  4318. +
  4319. +/* mode (or subtype) of COP_{INSERT|PASTE} operation. Specifies how target
  4320. + item is determined. */
  4321. +typedef enum {
  4322. + /* target item is one containing pointer to the ->child node */
  4323. + COPT_CHILD,
  4324. + /* target item is given explicitly by @coord */
  4325. + COPT_ITEM_DATA,
  4326. + /* target item is given by key */
  4327. + COPT_KEY,
  4328. + /* see insert_paste_common() for more comments on this. */
  4329. + COPT_PASTE_RESTARTED,
  4330. +} cop_insert_pos_type;
  4331. +
  4332. +/* flags to cut and delete */
  4333. +typedef enum {
  4334. + /* don't kill node even if it became completely empty as results of
  4335. + * cut. This is needed for eottl handling. See carry_extent() for
  4336. + * details. */
  4337. + DELETE_RETAIN_EMPTY = (1 << 0)
  4338. +} cop_delete_flag;
  4339. +
  4340. +/*
  4341. + * carry() implements "lock handle tracking" feature.
  4342. + *
  4343. + * Callers supply carry with node where to perform initial operation and lock
  4344. + * handle on this node. Trying to optimize node utilization carry may actually
  4345. + * move insertion point to different node. Callers expect that lock handle
  4346. + * will rebe transferred to the new node also.
  4347. + *
  4348. + */
  4349. +typedef enum {
  4350. + /* transfer lock handle along with insertion point */
  4351. + CARRY_TRACK_CHANGE = 1,
  4352. + /* acquire new lock handle to the node where insertion point is. This
  4353. + * is used when carry() client doesn't initially possess lock handle
  4354. + * on the insertion point node, for example, by extent insertion
  4355. + * code. See carry_extent(). */
  4356. + CARRY_TRACK_NODE = 2
  4357. +} carry_track_type;
  4358. +
  4359. +/* data supplied to COP_{INSERT|PASTE} by callers */
  4360. +typedef struct carry_insert_data {
  4361. + /* position where new item is to be inserted */
  4362. + coord_t *coord;
  4363. + /* new item description */
  4364. + reiser4_item_data * data;
  4365. + /* key of new item */
  4366. + const reiser4_key * key;
  4367. +} carry_insert_data;
  4368. +
  4369. +/* cut and kill are similar, so carry_cut_data and carry_kill_data share the
  4370. + below structure of parameters */
  4371. +struct cut_kill_params {
  4372. + /* coord where cut starts (inclusive) */
  4373. + coord_t *from;
  4374. + /* coord where cut stops (inclusive, this item/unit will also be
  4375. + * cut) */
  4376. + coord_t *to;
  4377. + /* starting key. This is necessary when item and unit pos don't
  4378. + * uniquely identify what portion or tree to remove. For example, this
  4379. + * indicates what portion of extent unit will be affected. */
  4380. + const reiser4_key * from_key;
  4381. + /* exclusive stop key */
  4382. + const reiser4_key * to_key;
  4383. + /* if this is not NULL, smallest actually removed key is stored
  4384. + * here. */
  4385. + reiser4_key *smallest_removed;
  4386. + /* kill_node_content() is called for file truncate */
  4387. + int truncate;
  4388. +};
  4389. +
  4390. +struct carry_cut_data {
  4391. + struct cut_kill_params params;
  4392. +};
  4393. +
  4394. +struct carry_kill_data {
  4395. + struct cut_kill_params params;
  4396. + /* parameter to be passed to the ->kill_hook() method of item
  4397. + * plugin */
  4398. + /*void *iplug_params; *//* FIXME: unused currently */
  4399. + /* if not NULL---inode whose items are being removed. This is needed
  4400. + * for ->kill_hook() of extent item to update VM structures when
  4401. + * removing pages. */
  4402. + struct inode *inode;
  4403. + /* sibling list maintenance is complicated by existence of eottl. When
  4404. + * eottl whose left and right neighbors are formatted leaves is
  4405. + * removed, one has to connect said leaves in the sibling list. This
  4406. + * cannot be done when extent removal is just started as locking rules
  4407. + * require sibling list update to happen atomically with removal of
  4408. + * extent item. Therefore: 1. pointers to left and right neighbors
  4409. + * have to be passed down to the ->kill_hook() of extent item, and
  4410. + * 2. said neighbors have to be locked. */
  4411. + lock_handle *left;
  4412. + lock_handle *right;
  4413. + /* flags modifying behavior of kill. Currently, it may have
  4414. + DELETE_RETAIN_EMPTY set. */
  4415. + unsigned flags;
  4416. + char *buf;
  4417. +};
  4418. +
  4419. +/* &carry_tree_op - operation to "carry" upward.
  4420. +
  4421. + Description of an operation we want to "carry" to the upper level of
  4422. + a tree: e.g, when we insert something and there is not enough space
  4423. + we allocate a new node and "carry" the operation of inserting a
  4424. + pointer to the new node to the upper level, on removal of empty node,
  4425. + we carry up operation of removing appropriate entry from parent.
  4426. +
  4427. + There are two types of carry ops: when adding or deleting node we
  4428. + node at the parent level where appropriate modification has to be
  4429. + performed is known in advance. When shifting items between nodes
  4430. + (split, merge), delimiting key should be changed in the least common
  4431. + parent of the nodes involved that is not known in advance.
  4432. +
  4433. + For the operations of the first type we store in &carry_op pointer to
  4434. + the &carry_node at the parent level. For the operation of the second
  4435. + type we store &carry_node or parents of the left and right nodes
  4436. + modified and keep track of them upward until they coincide.
  4437. +
  4438. +*/
  4439. +typedef struct carry_op {
  4440. + /* pool linkage */
  4441. + struct reiser4_pool_header header;
  4442. + carry_opcode op;
  4443. + /* node on which operation is to be performed:
  4444. +
  4445. + for insert, paste: node where new item is to be inserted
  4446. +
  4447. + for delete: node where pointer is to be deleted
  4448. +
  4449. + for cut: node to cut from
  4450. +
  4451. + for update: node where delimiting key is to be modified
  4452. +
  4453. + for modify: parent of modified node
  4454. +
  4455. + */
  4456. + carry_node *node;
  4457. + union {
  4458. + struct {
  4459. + /* (sub-)type of insertion/paste. Taken from
  4460. + cop_insert_pos_type. */
  4461. + __u8 type;
  4462. + /* various operation flags. Taken from
  4463. + cop_insert_flag. */
  4464. + __u8 flags;
  4465. + carry_insert_data *d;
  4466. + carry_node *child;
  4467. + znode *brother;
  4468. + } insert, paste, extent;
  4469. +
  4470. + struct {
  4471. + int is_cut;
  4472. + union {
  4473. + carry_kill_data *kill;
  4474. + carry_cut_data *cut;
  4475. + } u;
  4476. + } cut_or_kill;
  4477. +
  4478. + struct {
  4479. + carry_node *left;
  4480. + } update;
  4481. + struct {
  4482. + /* changed child */
  4483. + carry_node *child;
  4484. + /* bitmask of changes. See &cop_modify_flag */
  4485. + __u32 flag;
  4486. + } modify;
  4487. + struct {
  4488. + /* flags to deletion operation. Are taken from
  4489. + cop_delete_flag */
  4490. + __u32 flags;
  4491. + /* child to delete from parent. If this is
  4492. + NULL, delete op->node. */
  4493. + carry_node *child;
  4494. + } delete;
  4495. + struct {
  4496. + /* various operation flags. Taken from
  4497. + cop_insert_flag. */
  4498. + __u32 flags;
  4499. + flow_t *flow;
  4500. + coord_t *insert_point;
  4501. + reiser4_item_data *data;
  4502. + /* flow insertion is limited by number of new blocks
  4503. + added in that operation which do not get any data
  4504. + but part of flow. This limit is set by macro
  4505. + CARRY_FLOW_NEW_NODES_LIMIT. This field stores number
  4506. + of nodes added already during one carry_flow */
  4507. + int new_nodes;
  4508. + } insert_flow;
  4509. + } u;
  4510. +} carry_op;
  4511. +
  4512. +/* &carry_op_pool - preallocated pool of carry operations, and nodes */
  4513. +typedef struct carry_pool {
  4514. + carry_op op[CARRIES_POOL_SIZE];
  4515. + struct reiser4_pool op_pool;
  4516. + carry_node node[NODES_LOCKED_POOL_SIZE];
  4517. + struct reiser4_pool node_pool;
  4518. +} carry_pool;
  4519. +
  4520. +/* &carry_tree_level - carry process on given level
  4521. +
  4522. + Description of balancing process on the given level.
  4523. +
  4524. + No need for locking here, as carry_tree_level is essentially per
  4525. + thread thing (for now).
  4526. +
  4527. +*/
  4528. +struct carry_level {
  4529. + /* this level may be restarted */
  4530. + __u32 restartable:1;
  4531. + /* list of carry nodes on this level, ordered by key order */
  4532. + struct list_head nodes;
  4533. + struct list_head ops;
  4534. + /* pool where new objects are allocated from */
  4535. + carry_pool *pool;
  4536. + int ops_num;
  4537. + int nodes_num;
  4538. + /* new root created on this level, if any */
  4539. + znode *new_root;
  4540. + /* This is set by caller (insert_by_key(), rreiser4_esize_item(), etc.)
  4541. + when they want ->tracked to automagically wander to the node where
  4542. + insertion point moved after insert or paste.
  4543. + */
  4544. + carry_track_type track_type;
  4545. + /* lock handle supplied by user that we are tracking. See
  4546. + above. */
  4547. + lock_handle *tracked;
  4548. +};
  4549. +
  4550. +/* information carry passes to plugin methods that may add new operations to
  4551. + the @todo queue */
  4552. +struct carry_plugin_info {
  4553. + carry_level *doing;
  4554. + carry_level *todo;
  4555. +};
  4556. +
  4557. +int reiser4_carry(carry_level * doing, carry_level * done);
  4558. +
  4559. +carry_node *reiser4_add_carry(carry_level * level, pool_ordering order,
  4560. + carry_node * reference);
  4561. +carry_node *reiser4_add_carry_skip(carry_level * level, pool_ordering order,
  4562. + carry_node * reference);
  4563. +
  4564. +extern carry_node *insert_carry_node(carry_level * doing,
  4565. + carry_level * todo, const znode * node);
  4566. +
  4567. +extern carry_pool *init_carry_pool(int);
  4568. +extern void done_carry_pool(carry_pool * pool);
  4569. +
  4570. +extern void init_carry_level(carry_level * level, carry_pool * pool);
  4571. +
  4572. +extern carry_op *reiser4_post_carry(carry_level * level, carry_opcode op,
  4573. + znode * node, int apply_to_parent);
  4574. +extern carry_op *node_post_carry(carry_plugin_info * info, carry_opcode op,
  4575. + znode * node, int apply_to_parent_p);
  4576. +
  4577. +carry_node *add_new_znode(znode * brother, carry_node * reference,
  4578. + carry_level * doing, carry_level * todo);
  4579. +
  4580. +carry_node *find_carry_node(carry_level * level, const znode * node);
  4581. +
  4582. +extern znode *reiser4_carry_real(const carry_node * node);
  4583. +
  4584. +/* helper macros to iterate over carry queues */
  4585. +
  4586. +#define carry_node_next(node) \
  4587. + list_entry((node)->header.level_linkage.next, carry_node, \
  4588. + header.level_linkage)
  4589. +
  4590. +#define carry_node_prev(node) \
  4591. + list_entry((node)->header.level_linkage.prev, carry_node, \
  4592. + header.level_linkage)
  4593. +
  4594. +#define carry_node_front(level) \
  4595. + list_entry((level)->nodes.next, carry_node, header.level_linkage)
  4596. +
  4597. +#define carry_node_back(level) \
  4598. + list_entry((level)->nodes.prev, carry_node, header.level_linkage)
  4599. +
  4600. +#define carry_node_end(level, node) \
  4601. + (&(level)->nodes == &(node)->header.level_linkage)
  4602. +
  4603. +/* macro to iterate over all operations in a @level */
  4604. +#define for_all_ops(level /* carry level (of type carry_level *) */, \
  4605. + op /* pointer to carry operation, modified by loop (of \
  4606. + * type carry_op *) */, \
  4607. + tmp /* pointer to carry operation (of type carry_op *), \
  4608. + * used to make iterator stable in the face of \
  4609. + * deletions from the level */ ) \
  4610. +for (op = list_entry(level->ops.next, carry_op, header.level_linkage), \
  4611. + tmp = list_entry(op->header.level_linkage.next, carry_op, header.level_linkage); \
  4612. + &op->header.level_linkage != &level->ops; \
  4613. + op = tmp, \
  4614. + tmp = list_entry(op->header.level_linkage.next, carry_op, header.level_linkage))
  4615. +
  4616. +#if 0
  4617. +for (op = (carry_op *) pool_level_list_front(&level->ops), \
  4618. + tmp = (carry_op *) pool_level_list_next(&op->header) ; \
  4619. + !pool_level_list_end(&level->ops, &op->header) ; \
  4620. + op = tmp, tmp = (carry_op *) pool_level_list_next(&op->header))
  4621. +#endif
  4622. +
  4623. +/* macro to iterate over all nodes in a @level */ \
  4624. +#define for_all_nodes(level /* carry level (of type carry_level *) */, \
  4625. + node /* pointer to carry node, modified by loop (of \
  4626. + * type carry_node *) */, \
  4627. + tmp /* pointer to carry node (of type carry_node *), \
  4628. + * used to make iterator stable in the face of * \
  4629. + * deletions from the level */ ) \
  4630. +for (node = list_entry(level->nodes.next, carry_node, header.level_linkage), \
  4631. + tmp = list_entry(node->header.level_linkage.next, carry_node, header.level_linkage); \
  4632. + &node->header.level_linkage != &level->nodes; \
  4633. + node = tmp, \
  4634. + tmp = list_entry(node->header.level_linkage.next, carry_node, header.level_linkage))
  4635. +
  4636. +#if 0
  4637. +for (node = carry_node_front(level), \
  4638. + tmp = carry_node_next(node) ; !carry_node_end(level, node) ; \
  4639. + node = tmp, tmp = carry_node_next(node))
  4640. +#endif
  4641. +
  4642. +/* macro to iterate over all nodes in a @level in reverse order
  4643. +
  4644. + This is used, because nodes are unlocked in reversed order of locking */
  4645. +#define for_all_nodes_back(level /* carry level (of type carry_level *) */, \
  4646. + node /* pointer to carry node, modified by loop \
  4647. + * (of type carry_node *) */, \
  4648. + tmp /* pointer to carry node (of type carry_node \
  4649. + * *), used to make iterator stable in the \
  4650. + * face of deletions from the level */ ) \
  4651. +for (node = carry_node_back(level), \
  4652. + tmp = carry_node_prev(node) ; !carry_node_end(level, node) ; \
  4653. + node = tmp, tmp = carry_node_prev(node))
  4654. +
  4655. +/* __FS_REISER4_CARRY_H__ */
  4656. +#endif
  4657. +
  4658. +/* Make Linus happy.
  4659. + Local variables:
  4660. + c-indentation-style: "K&R"
  4661. + mode-name: "LC"
  4662. + c-basic-offset: 8
  4663. + tab-width: 8
  4664. + fill-column: 120
  4665. + scroll-step: 1
  4666. + End:
  4667. +*/
  4668. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/carry_ops.c linux-4.14.2/fs/reiser4/carry_ops.c
  4669. --- linux-4.14.2.orig/fs/reiser4/carry_ops.c 1970-01-01 01:00:00.000000000 +0100
  4670. +++ linux-4.14.2/fs/reiser4/carry_ops.c 2017-11-26 22:13:09.000000000 +0100
  4671. @@ -0,0 +1,2136 @@
  4672. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  4673. + reiser4/README */
  4674. +
  4675. +/* implementation of carry operations */
  4676. +
  4677. +#include "forward.h"
  4678. +#include "debug.h"
  4679. +#include "key.h"
  4680. +#include "coord.h"
  4681. +#include "plugin/item/item.h"
  4682. +#include "plugin/node/node.h"
  4683. +#include "jnode.h"
  4684. +#include "znode.h"
  4685. +#include "block_alloc.h"
  4686. +#include "tree_walk.h"
  4687. +#include "pool.h"
  4688. +#include "tree_mod.h"
  4689. +#include "carry.h"
  4690. +#include "carry_ops.h"
  4691. +#include "tree.h"
  4692. +#include "super.h"
  4693. +#include "reiser4.h"
  4694. +
  4695. +#include <linux/types.h>
  4696. +#include <linux/err.h>
  4697. +
  4698. +static int carry_shift_data(sideof side, coord_t *insert_coord, znode * node,
  4699. + carry_level * doing, carry_level * todo,
  4700. + unsigned int including_insert_coord_p);
  4701. +
  4702. +extern int lock_carry_node(carry_level * level, carry_node * node);
  4703. +extern int lock_carry_node_tail(carry_node * node);
  4704. +
  4705. +/* find left neighbor of a carry node
  4706. +
  4707. + Look for left neighbor of @node and add it to the @doing queue. See
  4708. + comments in the body.
  4709. +
  4710. +*/
  4711. +static carry_node *find_left_neighbor(carry_op * op /* node to find left
  4712. + * neighbor of */ ,
  4713. + carry_level * doing/* level to scan */)
  4714. +{
  4715. + int result;
  4716. + carry_node *node;
  4717. + carry_node *left;
  4718. + int flags;
  4719. + reiser4_tree *tree;
  4720. +
  4721. + node = op->node;
  4722. +
  4723. + tree = current_tree;
  4724. + read_lock_tree(tree);
  4725. + /* first, check whether left neighbor is already in a @doing queue */
  4726. + if (reiser4_carry_real(node)->left != NULL) {
  4727. + /* NOTE: there is locking subtlety here. Look into
  4728. + * find_right_neighbor() for more info */
  4729. + if (find_carry_node(doing,
  4730. + reiser4_carry_real(node)->left) != NULL) {
  4731. + read_unlock_tree(tree);
  4732. + left = node;
  4733. + do {
  4734. + left = list_entry(left->header.level_linkage.prev,
  4735. + carry_node, header.level_linkage);
  4736. + assert("nikita-3408", !carry_node_end(doing,
  4737. + left));
  4738. + } while (reiser4_carry_real(left) ==
  4739. + reiser4_carry_real(node));
  4740. + return left;
  4741. + }
  4742. + }
  4743. + read_unlock_tree(tree);
  4744. +
  4745. + left = reiser4_add_carry_skip(doing, POOLO_BEFORE, node);
  4746. + if (IS_ERR(left))
  4747. + return left;
  4748. +
  4749. + left->node = node->node;
  4750. + left->free = 1;
  4751. +
  4752. + flags = GN_TRY_LOCK;
  4753. + if (!(op->u.insert.flags & COPI_LOAD_LEFT))
  4754. + flags |= GN_NO_ALLOC;
  4755. +
  4756. + /* then, feeling lucky, peek left neighbor in the cache. */
  4757. + result = reiser4_get_left_neighbor(&left->lock_handle,
  4758. + reiser4_carry_real(node),
  4759. + ZNODE_WRITE_LOCK, flags);
  4760. + if (result == 0) {
  4761. + /* ok, node found and locked. */
  4762. + result = lock_carry_node_tail(left);
  4763. + if (result != 0)
  4764. + left = ERR_PTR(result);
  4765. + } else if (result == -E_NO_NEIGHBOR || result == -ENOENT) {
  4766. + /* node is leftmost node in a tree, or neighbor wasn't in
  4767. + cache, or there is an extent on the left. */
  4768. + reiser4_pool_free(&doing->pool->node_pool, &left->header);
  4769. + left = NULL;
  4770. + } else if (doing->restartable) {
  4771. + /* if left neighbor is locked, and level is restartable, add
  4772. + new node to @doing and restart. */
  4773. + assert("nikita-913", node->parent != 0);
  4774. + assert("nikita-914", node->node != NULL);
  4775. + left->left = 1;
  4776. + left->free = 0;
  4777. + left = ERR_PTR(-E_REPEAT);
  4778. + } else {
  4779. + /* left neighbor is locked, level cannot be restarted. Just
  4780. + ignore left neighbor. */
  4781. + reiser4_pool_free(&doing->pool->node_pool, &left->header);
  4782. + left = NULL;
  4783. + }
  4784. + return left;
  4785. +}
  4786. +
  4787. +/* find right neighbor of a carry node
  4788. +
  4789. + Look for right neighbor of @node and add it to the @doing queue. See
  4790. + comments in the body.
  4791. +
  4792. +*/
  4793. +static carry_node *find_right_neighbor(carry_op * op /* node to find right
  4794. + * neighbor of */ ,
  4795. + carry_level * doing/* level to scan */)
  4796. +{
  4797. + int result;
  4798. + carry_node *node;
  4799. + carry_node *right;
  4800. + lock_handle lh;
  4801. + int flags;
  4802. + reiser4_tree *tree;
  4803. +
  4804. + init_lh(&lh);
  4805. +
  4806. + node = op->node;
  4807. +
  4808. + tree = current_tree;
  4809. + read_lock_tree(tree);
  4810. + /* first, check whether right neighbor is already in a @doing queue */
  4811. + if (reiser4_carry_real(node)->right != NULL) {
  4812. + /*
  4813. + * Tree lock is taken here anyway, because, even if _outcome_
  4814. + * of (find_carry_node() != NULL) doesn't depends on
  4815. + * concurrent updates to ->right, find_carry_node() cannot
  4816. + * work with second argument NULL. Hence, following comment is
  4817. + * of historic importance only.
  4818. + *
  4819. + * Subtle:
  4820. + *
  4821. + * Q: why don't we need tree lock here, looking for the right
  4822. + * neighbor?
  4823. + *
  4824. + * A: even if value of node->real_node->right were changed
  4825. + * during find_carry_node() execution, outcome of execution
  4826. + * wouldn't change, because (in short) other thread cannot add
  4827. + * elements to the @doing, and if node->real_node->right
  4828. + * already was in @doing, value of node->real_node->right
  4829. + * couldn't change, because node cannot be inserted between
  4830. + * locked neighbors.
  4831. + */
  4832. + if (find_carry_node(doing,
  4833. + reiser4_carry_real(node)->right) != NULL) {
  4834. + read_unlock_tree(tree);
  4835. + /*
  4836. + * What we are doing here (this is also applicable to
  4837. + * the find_left_neighbor()).
  4838. + *
  4839. + * tree_walk.c code requires that insertion of a
  4840. + * pointer to a child, modification of parent pointer
  4841. + * in the child, and insertion of the child into
  4842. + * sibling list are atomic (see
  4843. + * plugin/item/internal.c:create_hook_internal()).
  4844. + *
  4845. + * carry allocates new node long before pointer to it
  4846. + * is inserted into parent and, actually, long before
  4847. + * parent is even known. Such allocated-but-orphaned
  4848. + * nodes are only trackable through carry level lists.
  4849. + *
  4850. + * Situation that is handled here is following: @node
  4851. + * has valid ->right pointer, but there is
  4852. + * allocated-but-orphaned node in the carry queue that
  4853. + * is logically between @node and @node->right. Here
  4854. + * we are searching for it. Critical point is that
  4855. + * this is only possible if @node->right is also in
  4856. + * the carry queue (this is checked above), because
  4857. + * this is the only way new orphaned node could be
  4858. + * inserted between them (before inserting new node,
  4859. + * make_space() first tries to shift to the right, so,
  4860. + * right neighbor will be locked and queued).
  4861. + *
  4862. + */
  4863. + right = node;
  4864. + do {
  4865. + right = list_entry(right->header.level_linkage.next,
  4866. + carry_node, header.level_linkage);
  4867. + assert("nikita-3408", !carry_node_end(doing,
  4868. + right));
  4869. + } while (reiser4_carry_real(right) ==
  4870. + reiser4_carry_real(node));
  4871. + return right;
  4872. + }
  4873. + }
  4874. + read_unlock_tree(tree);
  4875. +
  4876. + flags = GN_CAN_USE_UPPER_LEVELS;
  4877. + if (!(op->u.insert.flags & COPI_LOAD_RIGHT))
  4878. + flags = GN_NO_ALLOC;
  4879. +
  4880. + /* then, try to lock right neighbor */
  4881. + init_lh(&lh);
  4882. + result = reiser4_get_right_neighbor(&lh,
  4883. + reiser4_carry_real(node),
  4884. + ZNODE_WRITE_LOCK, flags);
  4885. + if (result == 0) {
  4886. + /* ok, node found and locked. */
  4887. + right = reiser4_add_carry_skip(doing, POOLO_AFTER, node);
  4888. + if (!IS_ERR(right)) {
  4889. + right->node = lh.node;
  4890. + move_lh(&right->lock_handle, &lh);
  4891. + right->free = 1;
  4892. + result = lock_carry_node_tail(right);
  4893. + if (result != 0)
  4894. + right = ERR_PTR(result);
  4895. + }
  4896. + } else if ((result == -E_NO_NEIGHBOR) || (result == -ENOENT)) {
  4897. + /* node is rightmost node in a tree, or neighbor wasn't in
  4898. + cache, or there is an extent on the right. */
  4899. + right = NULL;
  4900. + } else
  4901. + right = ERR_PTR(result);
  4902. + done_lh(&lh);
  4903. + return right;
  4904. +}
  4905. +
  4906. +/* how much free space in a @node is needed for @op
  4907. +
  4908. + How much space in @node is required for completion of @op, where @op is
  4909. + insert or paste operation.
  4910. +*/
  4911. +static unsigned int space_needed_for_op(znode * node /* znode data are
  4912. + * inserted or
  4913. + * pasted in */ ,
  4914. + carry_op * op /* carry
  4915. + operation */ )
  4916. +{
  4917. + assert("nikita-919", op != NULL);
  4918. +
  4919. + switch (op->op) {
  4920. + default:
  4921. + impossible("nikita-1701", "Wrong opcode");
  4922. + case COP_INSERT:
  4923. + return space_needed(node, NULL, op->u.insert.d->data, 1);
  4924. + case COP_PASTE:
  4925. + return space_needed(node, op->u.insert.d->coord,
  4926. + op->u.insert.d->data, 0);
  4927. + }
  4928. +}
  4929. +
  4930. +/* how much space in @node is required to insert or paste @data at
  4931. + @coord. */
  4932. +unsigned int space_needed(const znode * node /* node data are inserted or
  4933. + * pasted in */ ,
  4934. + const coord_t *coord /* coord where data are
  4935. + * inserted or pasted
  4936. + * at */ ,
  4937. + const reiser4_item_data * data /* data to insert or
  4938. + * paste */ ,
  4939. + int insertion/* non-0 is inserting, 0---paste */)
  4940. +{
  4941. + int result;
  4942. + item_plugin *iplug;
  4943. +
  4944. + assert("nikita-917", node != NULL);
  4945. + assert("nikita-918", node_plugin_by_node(node) != NULL);
  4946. + assert("vs-230", !insertion || (coord == NULL));
  4947. +
  4948. + result = 0;
  4949. + iplug = data->iplug;
  4950. + if (iplug->b.estimate != NULL) {
  4951. + /* ask item plugin how much space is needed to insert this
  4952. + item */
  4953. + result += iplug->b.estimate(insertion ? NULL : coord, data);
  4954. + } else {
  4955. + /* reasonable default */
  4956. + result += data->length;
  4957. + }
  4958. + if (insertion) {
  4959. + node_plugin *nplug;
  4960. +
  4961. + nplug = node->nplug;
  4962. + /* and add node overhead */
  4963. + if (nplug->item_overhead != NULL)
  4964. + result += nplug->item_overhead(node, NULL);
  4965. + }
  4966. + return result;
  4967. +}
  4968. +
  4969. +/* find &coord in parent where pointer to new child is to be stored. */
  4970. +static int find_new_child_coord(carry_op * op /* COP_INSERT carry operation to
  4971. + * insert pointer to new
  4972. + * child */ )
  4973. +{
  4974. + int result;
  4975. + znode *node;
  4976. + znode *child;
  4977. +
  4978. + assert("nikita-941", op != NULL);
  4979. + assert("nikita-942", op->op == COP_INSERT);
  4980. +
  4981. + node = reiser4_carry_real(op->node);
  4982. + assert("nikita-943", node != NULL);
  4983. + assert("nikita-944", node_plugin_by_node(node) != NULL);
  4984. +
  4985. + child = reiser4_carry_real(op->u.insert.child);
  4986. + result =
  4987. + find_new_child_ptr(node, child, op->u.insert.brother,
  4988. + op->u.insert.d->coord);
  4989. +
  4990. + build_child_ptr_data(child, op->u.insert.d->data);
  4991. + return result;
  4992. +}
  4993. +
  4994. +/* additional amount of free space in @node required to complete @op */
  4995. +static int free_space_shortage(znode * node /* node to check */ ,
  4996. + carry_op * op/* operation being performed */)
  4997. +{
  4998. + assert("nikita-1061", node != NULL);
  4999. + assert("nikita-1062", op != NULL);
  5000. +
  5001. + switch (op->op) {
  5002. + default:
  5003. + impossible("nikita-1702", "Wrong opcode");
  5004. + case COP_INSERT:
  5005. + case COP_PASTE:
  5006. + return space_needed_for_op(node, op) - znode_free_space(node);
  5007. + case COP_EXTENT:
  5008. + /* when inserting extent shift data around until insertion
  5009. + point is utmost in the node. */
  5010. + if (coord_wrt(op->u.insert.d->coord) == COORD_INSIDE)
  5011. + return +1;
  5012. + else
  5013. + return -1;
  5014. + }
  5015. +}
  5016. +
  5017. +/* helper function: update node pointer in operation after insertion
  5018. + point was probably shifted into @target. */
  5019. +static znode *sync_op(carry_op * op, carry_node * target)
  5020. +{
  5021. + znode *insertion_node;
  5022. +
  5023. + /* reget node from coord: shift might move insertion coord to
  5024. + the neighbor */
  5025. + insertion_node = op->u.insert.d->coord->node;
  5026. + /* if insertion point was actually moved into new node,
  5027. + update carry node pointer in operation. */
  5028. + if (insertion_node != reiser4_carry_real(op->node)) {
  5029. + op->node = target;
  5030. + assert("nikita-2540",
  5031. + reiser4_carry_real(target) == insertion_node);
  5032. + }
  5033. + assert("nikita-2541",
  5034. + reiser4_carry_real(op->node) == op->u.insert.d->coord->node);
  5035. + return insertion_node;
  5036. +}
  5037. +
  5038. +/*
  5039. + * complete make_space() call: update tracked lock handle if necessary. See
  5040. + * comments for fs/reiser4/carry.h:carry_track_type
  5041. + */
  5042. +static int
  5043. +make_space_tail(carry_op * op, carry_level * doing, znode * orig_node)
  5044. +{
  5045. + int result;
  5046. + carry_track_type tracking;
  5047. + znode *node;
  5048. +
  5049. + tracking = doing->track_type;
  5050. + node = op->u.insert.d->coord->node;
  5051. +
  5052. + if (tracking == CARRY_TRACK_NODE ||
  5053. + (tracking == CARRY_TRACK_CHANGE && node != orig_node)) {
  5054. + /* inserting or pasting into node different from
  5055. + original. Update lock handle supplied by caller. */
  5056. + assert("nikita-1417", doing->tracked != NULL);
  5057. + done_lh(doing->tracked);
  5058. + init_lh(doing->tracked);
  5059. + result = longterm_lock_znode(doing->tracked, node,
  5060. + ZNODE_WRITE_LOCK,
  5061. + ZNODE_LOCK_HIPRI);
  5062. + } else
  5063. + result = 0;
  5064. + return result;
  5065. +}
  5066. +
  5067. +/* This is insertion policy function. It shifts data to the left and right
  5068. + neighbors of insertion coord and allocates new nodes until there is enough
  5069. + free space to complete @op.
  5070. +
  5071. + See comments in the body.
  5072. +
  5073. + Assumes that the node format favors insertions at the right end of the node
  5074. + as node40 does.
  5075. +
  5076. + See carry_flow() on detail about flow insertion
  5077. +*/
  5078. +static int make_space(carry_op * op /* carry operation, insert or paste */ ,
  5079. + carry_level * doing /* current carry queue */ ,
  5080. + carry_level * todo/* carry queue on the parent level */)
  5081. +{
  5082. + znode *node;
  5083. + int result;
  5084. + int not_enough_space;
  5085. + int blk_alloc;
  5086. + znode *orig_node;
  5087. + __u32 flags;
  5088. +
  5089. + coord_t *coord;
  5090. +
  5091. + assert("nikita-890", op != NULL);
  5092. + assert("nikita-891", todo != NULL);
  5093. + assert("nikita-892",
  5094. + op->op == COP_INSERT ||
  5095. + op->op == COP_PASTE || op->op == COP_EXTENT);
  5096. + assert("nikita-1607",
  5097. + reiser4_carry_real(op->node) == op->u.insert.d->coord->node);
  5098. +
  5099. + flags = op->u.insert.flags;
  5100. +
  5101. + /* NOTE check that new node can only be allocated after checking left
  5102. + * and right neighbors. This is necessary for proper work of
  5103. + * find_{left,right}_neighbor(). */
  5104. + assert("nikita-3410", ergo(flags & COPI_DONT_ALLOCATE,
  5105. + flags & COPI_DONT_SHIFT_LEFT));
  5106. + assert("nikita-3411", ergo(flags & COPI_DONT_ALLOCATE,
  5107. + flags & COPI_DONT_SHIFT_RIGHT));
  5108. +
  5109. + coord = op->u.insert.d->coord;
  5110. + orig_node = node = coord->node;
  5111. +
  5112. + assert("nikita-908", node != NULL);
  5113. + assert("nikita-909", node_plugin_by_node(node) != NULL);
  5114. +
  5115. + result = 0;
  5116. + /* If there is not enough space in a node, try to shift something to
  5117. + the left neighbor. This is a bit tricky, as locking to the left is
  5118. + low priority. This is handled by restart logic in carry().
  5119. + */
  5120. + not_enough_space = free_space_shortage(node, op);
  5121. + if (not_enough_space <= 0)
  5122. + /* it is possible that carry was called when there actually
  5123. + was enough space in the node. For example, when inserting
  5124. + leftmost item so that delimiting keys have to be updated.
  5125. + */
  5126. + return make_space_tail(op, doing, orig_node);
  5127. + if (!(flags & COPI_DONT_SHIFT_LEFT)) {
  5128. + carry_node *left;
  5129. + /* make note in statistics of an attempt to move
  5130. + something into the left neighbor */
  5131. + left = find_left_neighbor(op, doing);
  5132. + if (unlikely(IS_ERR(left))) {
  5133. + if (PTR_ERR(left) == -E_REPEAT)
  5134. + return -E_REPEAT;
  5135. + else {
  5136. + /* some error other than restart request
  5137. + occurred. This shouldn't happen. Issue a
  5138. + warning and continue as if left neighbor
  5139. + weren't existing.
  5140. + */
  5141. + warning("nikita-924",
  5142. + "Error accessing left neighbor: %li",
  5143. + PTR_ERR(left));
  5144. + }
  5145. + } else if (left != NULL) {
  5146. +
  5147. + /* shift everything possible on the left of and
  5148. + including insertion coord into the left neighbor */
  5149. + result = carry_shift_data(LEFT_SIDE, coord,
  5150. + reiser4_carry_real(left),
  5151. + doing, todo,
  5152. + flags & COPI_GO_LEFT);
  5153. +
  5154. + /* reget node from coord: shift_left() might move
  5155. + insertion coord to the left neighbor */
  5156. + node = sync_op(op, left);
  5157. +
  5158. + not_enough_space = free_space_shortage(node, op);
  5159. + /* There is not enough free space in @node, but
  5160. + may be, there is enough free space in
  5161. + @left. Various balancing decisions are valid here.
  5162. + The same for the shifiting to the right.
  5163. + */
  5164. + }
  5165. + }
  5166. + /* If there still is not enough space, shift to the right */
  5167. + if (not_enough_space > 0 && !(flags & COPI_DONT_SHIFT_RIGHT)) {
  5168. + carry_node *right;
  5169. +
  5170. + right = find_right_neighbor(op, doing);
  5171. + if (IS_ERR(right)) {
  5172. + warning("nikita-1065",
  5173. + "Error accessing right neighbor: %li",
  5174. + PTR_ERR(right));
  5175. + } else if (right != NULL) {
  5176. + /* node containing insertion point, and its right
  5177. + neighbor node are write locked by now.
  5178. +
  5179. + shift everything possible on the right of but
  5180. + excluding insertion coord into the right neighbor
  5181. + */
  5182. + result = carry_shift_data(RIGHT_SIDE, coord,
  5183. + reiser4_carry_real(right),
  5184. + doing, todo,
  5185. + flags & COPI_GO_RIGHT);
  5186. + /* reget node from coord: shift_right() might move
  5187. + insertion coord to the right neighbor */
  5188. + node = sync_op(op, right);
  5189. + not_enough_space = free_space_shortage(node, op);
  5190. + }
  5191. + }
  5192. + /* If there is still not enough space, allocate new node(s).
  5193. +
  5194. + We try to allocate new blocks if COPI_DONT_ALLOCATE is not set in
  5195. + the carry operation flags (currently this is needed during flush
  5196. + only).
  5197. + */
  5198. + for (blk_alloc = 0;
  5199. + not_enough_space > 0 && result == 0 && blk_alloc < 2 &&
  5200. + !(flags & COPI_DONT_ALLOCATE); ++blk_alloc) {
  5201. + carry_node *fresh; /* new node we are allocating */
  5202. + coord_t coord_shadow; /* remembered insertion point before
  5203. + * shifting data into new node */
  5204. + carry_node *node_shadow; /* remembered insertion node
  5205. + * before shifting */
  5206. + unsigned int gointo; /* whether insertion point should move
  5207. + * into newly allocated node */
  5208. +
  5209. + /* allocate new node on the right of @node. Znode and disk
  5210. + fake block number for new node are allocated.
  5211. +
  5212. + add_new_znode() posts carry operation COP_INSERT with
  5213. + COPT_CHILD option to the parent level to add
  5214. + pointer to newly created node to its parent.
  5215. +
  5216. + Subtle point: if several new nodes are required to complete
  5217. + insertion operation at this level, they will be inserted
  5218. + into their parents in the order of creation, which means
  5219. + that @node will be valid "cookie" at the time of insertion.
  5220. +
  5221. + */
  5222. + fresh = add_new_znode(node, op->node, doing, todo);
  5223. + if (IS_ERR(fresh))
  5224. + return PTR_ERR(fresh);
  5225. +
  5226. + /* Try to shift into new node. */
  5227. + result = lock_carry_node(doing, fresh);
  5228. + zput(reiser4_carry_real(fresh));
  5229. + if (result != 0) {
  5230. + warning("nikita-947",
  5231. + "Cannot lock new node: %i", result);
  5232. + return result;
  5233. + }
  5234. +
  5235. + /* both nodes are write locked by now.
  5236. +
  5237. + shift everything possible on the right of and
  5238. + including insertion coord into the right neighbor.
  5239. + */
  5240. + coord_dup(&coord_shadow, op->u.insert.d->coord);
  5241. + node_shadow = op->node;
  5242. + /* move insertion point into newly created node if:
  5243. +
  5244. + . insertion point is rightmost in the source node, or
  5245. + . this is not the first node we are allocating in a row.
  5246. + */
  5247. + gointo =
  5248. + (blk_alloc > 0) ||
  5249. + coord_is_after_rightmost(op->u.insert.d->coord);
  5250. +
  5251. + if (gointo &&
  5252. + op->op == COP_PASTE &&
  5253. + coord_is_existing_item(op->u.insert.d->coord) &&
  5254. + is_solid_item((item_plugin_by_coord(op->u.insert.d->coord)))) {
  5255. + /* paste into solid (atomic) item, which can contain
  5256. + only one unit, so we need to shift it right, where
  5257. + insertion point supposed to be */
  5258. +
  5259. + assert("edward-1444", op->u.insert.d->data->iplug ==
  5260. + item_plugin_by_id(STATIC_STAT_DATA_ID));
  5261. + assert("edward-1445",
  5262. + op->u.insert.d->data->length >
  5263. + node_plugin_by_node(coord->node)->free_space
  5264. + (coord->node));
  5265. +
  5266. + op->u.insert.d->coord->between = BEFORE_UNIT;
  5267. + }
  5268. +
  5269. + result = carry_shift_data(RIGHT_SIDE, coord,
  5270. + reiser4_carry_real(fresh),
  5271. + doing, todo, gointo);
  5272. + /* if insertion point was actually moved into new node,
  5273. + update carry node pointer in operation. */
  5274. + node = sync_op(op, fresh);
  5275. + not_enough_space = free_space_shortage(node, op);
  5276. + if ((not_enough_space > 0) && (node != coord_shadow.node)) {
  5277. + /* there is not enough free in new node. Shift
  5278. + insertion point back to the @shadow_node so that
  5279. + next new node would be inserted between
  5280. + @shadow_node and @fresh.
  5281. + */
  5282. + coord_normalize(&coord_shadow);
  5283. + coord_dup(coord, &coord_shadow);
  5284. + node = coord->node;
  5285. + op->node = node_shadow;
  5286. + if (1 || (flags & COPI_STEP_BACK)) {
  5287. + /* still not enough space?! Maybe there is
  5288. + enough space in the source node (i.e., node
  5289. + data are moved from) now.
  5290. + */
  5291. + not_enough_space =
  5292. + free_space_shortage(node, op);
  5293. + }
  5294. + }
  5295. + }
  5296. + if (not_enough_space > 0) {
  5297. + if (!(flags & COPI_DONT_ALLOCATE))
  5298. + warning("nikita-948", "Cannot insert new item");
  5299. + result = -E_NODE_FULL;
  5300. + }
  5301. + assert("nikita-1622", ergo(result == 0,
  5302. + reiser4_carry_real(op->node) == coord->node));
  5303. + assert("nikita-2616", coord == op->u.insert.d->coord);
  5304. + if (result == 0)
  5305. + result = make_space_tail(op, doing, orig_node);
  5306. + return result;
  5307. +}
  5308. +
  5309. +/* insert_paste_common() - common part of insert and paste operations
  5310. +
  5311. + This function performs common part of COP_INSERT and COP_PASTE.
  5312. +
  5313. + There are two ways in which insertion/paste can be requested:
  5314. +
  5315. + . by directly supplying reiser4_item_data. In this case, op ->
  5316. + u.insert.type is set to COPT_ITEM_DATA.
  5317. +
  5318. + . by supplying child pointer to which is to inserted into parent. In this
  5319. + case op -> u.insert.type == COPT_CHILD.
  5320. +
  5321. + . by supplying key of new item/unit. This is currently only used during
  5322. + extent insertion
  5323. +
  5324. + This is required, because when new node is allocated we don't know at what
  5325. + position pointer to it is to be stored in the parent. Actually, we don't
  5326. + even know what its parent will be, because parent can be re-balanced
  5327. + concurrently and new node re-parented, and because parent can be full and
  5328. + pointer to the new node will go into some other node.
  5329. +
  5330. + insert_paste_common() resolves pointer to child node into position in the
  5331. + parent by calling find_new_child_coord(), that fills
  5332. + reiser4_item_data. After this, insertion/paste proceeds uniformly.
  5333. +
  5334. + Another complication is with finding free space during pasting. It may
  5335. + happen that while shifting items to the neighbors and newly allocated
  5336. + nodes, insertion coord can no longer be in the item we wanted to paste
  5337. + into. At this point, paste becomes (morphs) into insert. Moreover free
  5338. + space analysis has to be repeated, because amount of space required for
  5339. + insertion is different from that of paste (item header overhead, etc).
  5340. +
  5341. + This function "unifies" different insertion modes (by resolving child
  5342. + pointer or key into insertion coord), and then calls make_space() to free
  5343. + enough space in the node by shifting data to the left and right and by
  5344. + allocating new nodes if necessary. Carry operation knows amount of space
  5345. + required for its completion. After enough free space is obtained, caller of
  5346. + this function (carry_{insert,paste,etc.}) performs actual insertion/paste
  5347. + by calling item plugin method.
  5348. +
  5349. +*/
  5350. +static int insert_paste_common(carry_op * op /* carry operation being
  5351. + * performed */ ,
  5352. + carry_level * doing /* current carry level */ ,
  5353. + carry_level * todo /* next carry level */ ,
  5354. + carry_insert_data * cdata /* pointer to
  5355. + * cdata */ ,
  5356. + coord_t *coord /* insertion/paste coord */ ,
  5357. + reiser4_item_data * data /* data to be
  5358. + * inserted/pasted */ )
  5359. +{
  5360. + assert("nikita-981", op != NULL);
  5361. + assert("nikita-980", todo != NULL);
  5362. + assert("nikita-979", (op->op == COP_INSERT) || (op->op == COP_PASTE)
  5363. + || (op->op == COP_EXTENT));
  5364. +
  5365. + if (op->u.insert.type == COPT_PASTE_RESTARTED) {
  5366. + /* nothing to do. Fall through to make_space(). */
  5367. + ;
  5368. + } else if (op->u.insert.type == COPT_KEY) {
  5369. + node_search_result intra_node;
  5370. + znode *node;
  5371. + /* Problem with doing batching at the lowest level, is that
  5372. + operations here are given by coords where modification is
  5373. + to be performed, and one modification can invalidate coords
  5374. + of all following operations.
  5375. +
  5376. + So, we are implementing yet another type for operation that
  5377. + will use (the only) "locator" stable across shifting of
  5378. + data between nodes, etc.: key (COPT_KEY).
  5379. +
  5380. + This clause resolves key to the coord in the node.
  5381. +
  5382. + But node can change also. Probably some pieces have to be
  5383. + added to the lock_carry_node(), to lock node by its key.
  5384. +
  5385. + */
  5386. + /* NOTE-NIKITA Lookup bias is fixed to FIND_EXACT. Complain
  5387. + if you need something else. */
  5388. + op->u.insert.d->coord = coord;
  5389. + node = reiser4_carry_real(op->node);
  5390. + intra_node = node_plugin_by_node(node)->lookup
  5391. + (node, op->u.insert.d->key, FIND_EXACT,
  5392. + op->u.insert.d->coord);
  5393. + if ((intra_node != NS_FOUND) && (intra_node != NS_NOT_FOUND)) {
  5394. + warning("nikita-1715", "Intra node lookup failure: %i",
  5395. + intra_node);
  5396. + return intra_node;
  5397. + }
  5398. + } else if (op->u.insert.type == COPT_CHILD) {
  5399. + /* if we are asked to insert pointer to the child into
  5400. + internal node, first convert pointer to the child into
  5401. + coord within parent node.
  5402. + */
  5403. + znode *child;
  5404. + int result;
  5405. +
  5406. + op->u.insert.d = cdata;
  5407. + op->u.insert.d->coord = coord;
  5408. + op->u.insert.d->data = data;
  5409. + op->u.insert.d->coord->node = reiser4_carry_real(op->node);
  5410. + result = find_new_child_coord(op);
  5411. + child = reiser4_carry_real(op->u.insert.child);
  5412. + if (result != NS_NOT_FOUND) {
  5413. + warning("nikita-993",
  5414. + "Cannot find a place for child pointer: %i",
  5415. + result);
  5416. + return result;
  5417. + }
  5418. + /* This only happens when we did multiple insertions at
  5419. + the previous level, trying to insert single item and
  5420. + it so happened, that insertion of pointers to all new
  5421. + nodes before this one already caused parent node to
  5422. + split (may be several times).
  5423. +
  5424. + I am going to come up with better solution.
  5425. +
  5426. + You are not expected to understand this.
  5427. + -- v6root/usr/sys/ken/slp.c
  5428. +
  5429. + Basically, what happens here is the following: carry came
  5430. + to the parent level and is about to insert internal item
  5431. + pointing to the child node that it just inserted in the
  5432. + level below. Position where internal item is to be inserted
  5433. + was found by find_new_child_coord() above, but node of the
  5434. + current carry operation (that is, parent node of child
  5435. + inserted on the previous level), was determined earlier in
  5436. + the lock_carry_level/lock_carry_node. It could so happen
  5437. + that other carry operations already performed on the parent
  5438. + level already split parent node, so that insertion point
  5439. + moved into another node. Handle this by creating new carry
  5440. + node for insertion point if necessary.
  5441. + */
  5442. + if (reiser4_carry_real(op->node) !=
  5443. + op->u.insert.d->coord->node) {
  5444. + pool_ordering direction;
  5445. + znode *z1;
  5446. + znode *z2;
  5447. + reiser4_key k1;
  5448. + reiser4_key k2;
  5449. +
  5450. + /*
  5451. + * determine in what direction insertion point
  5452. + * moved. Do this by comparing delimiting keys.
  5453. + */
  5454. + z1 = op->u.insert.d->coord->node;
  5455. + z2 = reiser4_carry_real(op->node);
  5456. + if (keyle(leftmost_key_in_node(z1, &k1),
  5457. + leftmost_key_in_node(z2, &k2)))
  5458. + /* insertion point moved to the left */
  5459. + direction = POOLO_BEFORE;
  5460. + else
  5461. + /* insertion point moved to the right */
  5462. + direction = POOLO_AFTER;
  5463. +
  5464. + op->node = reiser4_add_carry_skip(doing,
  5465. + direction, op->node);
  5466. + if (IS_ERR(op->node))
  5467. + return PTR_ERR(op->node);
  5468. + op->node->node = op->u.insert.d->coord->node;
  5469. + op->node->free = 1;
  5470. + result = lock_carry_node(doing, op->node);
  5471. + if (result != 0)
  5472. + return result;
  5473. + }
  5474. +
  5475. + /*
  5476. + * set up key of an item being inserted: we are inserting
  5477. + * internal item and its key is (by the very definition of
  5478. + * search tree) is leftmost key in the child node.
  5479. + */
  5480. + write_lock_dk(znode_get_tree(child));
  5481. + op->u.insert.d->key = leftmost_key_in_node(child,
  5482. + znode_get_ld_key(child));
  5483. + write_unlock_dk(znode_get_tree(child));
  5484. + op->u.insert.d->data->arg = op->u.insert.brother;
  5485. + } else {
  5486. + assert("vs-243", op->u.insert.d->coord != NULL);
  5487. + op->u.insert.d->coord->node = reiser4_carry_real(op->node);
  5488. + }
  5489. +
  5490. + /* find free space. */
  5491. + return make_space(op, doing, todo);
  5492. +}
  5493. +
  5494. +/* handle carry COP_INSERT operation.
  5495. +
  5496. + Insert new item into node. New item can be given in one of two ways:
  5497. +
  5498. + - by passing &tree_coord and &reiser4_item_data as part of @op. This is
  5499. + only applicable at the leaf/twig level.
  5500. +
  5501. + - by passing a child node pointer to which is to be inserted by this
  5502. + operation.
  5503. +
  5504. +*/
  5505. +static int carry_insert(carry_op * op /* operation to perform */ ,
  5506. + carry_level * doing /* queue of operations @op
  5507. + * is part of */ ,
  5508. + carry_level * todo /* queue where new operations
  5509. + * are accumulated */ )
  5510. +{
  5511. + znode *node;
  5512. + carry_insert_data cdata;
  5513. + coord_t coord;
  5514. + reiser4_item_data data;
  5515. + carry_plugin_info info;
  5516. + int result;
  5517. +
  5518. + assert("nikita-1036", op != NULL);
  5519. + assert("nikita-1037", todo != NULL);
  5520. + assert("nikita-1038", op->op == COP_INSERT);
  5521. +
  5522. + coord_init_zero(&coord);
  5523. +
  5524. + /* perform common functionality of insert and paste. */
  5525. + result = insert_paste_common(op, doing, todo, &cdata, &coord, &data);
  5526. + if (result != 0)
  5527. + return result;
  5528. +
  5529. + node = op->u.insert.d->coord->node;
  5530. + assert("nikita-1039", node != NULL);
  5531. + assert("nikita-1040", node_plugin_by_node(node) != NULL);
  5532. +
  5533. + assert("nikita-949",
  5534. + space_needed_for_op(node, op) <= znode_free_space(node));
  5535. +
  5536. + /* ask node layout to create new item. */
  5537. + info.doing = doing;
  5538. + info.todo = todo;
  5539. + result = node_plugin_by_node(node)->create_item
  5540. + (op->u.insert.d->coord, op->u.insert.d->key, op->u.insert.d->data,
  5541. + &info);
  5542. + doing->restartable = 0;
  5543. + znode_make_dirty(node);
  5544. +
  5545. + return result;
  5546. +}
  5547. +
  5548. +/*
  5549. + * Flow insertion code. COP_INSERT_FLOW is special tree operation that is
  5550. + * supplied with a "flow" (that is, a stream of data) and inserts it into tree
  5551. + * by slicing into multiple items.
  5552. + */
  5553. +
  5554. +#define flow_insert_point(op) ((op)->u.insert_flow.insert_point)
  5555. +#define flow_insert_flow(op) ((op)->u.insert_flow.flow)
  5556. +#define flow_insert_data(op) ((op)->u.insert_flow.data)
  5557. +
  5558. +static size_t item_data_overhead(carry_op * op)
  5559. +{
  5560. + if (flow_insert_data(op)->iplug->b.estimate == NULL)
  5561. + return 0;
  5562. + return (flow_insert_data(op)->iplug->b.
  5563. + estimate(NULL /* estimate insertion */ , flow_insert_data(op)) -
  5564. + flow_insert_data(op)->length);
  5565. +}
  5566. +
  5567. +/* FIXME-VS: this is called several times during one make_flow_for_insertion
  5568. + and it will always return the same result. Some optimization could be made
  5569. + by calculating this value once at the beginning and passing it around. That
  5570. + would reduce some flexibility in future changes
  5571. +*/
  5572. +static int can_paste(coord_t *, const reiser4_key *, const reiser4_item_data *);
  5573. +static size_t flow_insertion_overhead(carry_op * op)
  5574. +{
  5575. + znode *node;
  5576. + size_t insertion_overhead;
  5577. +
  5578. + node = flow_insert_point(op)->node;
  5579. + insertion_overhead = 0;
  5580. + if (node->nplug->item_overhead &&
  5581. + !can_paste(flow_insert_point(op), &flow_insert_flow(op)->key,
  5582. + flow_insert_data(op)))
  5583. + insertion_overhead =
  5584. + node->nplug->item_overhead(node, NULL) +
  5585. + item_data_overhead(op);
  5586. + return insertion_overhead;
  5587. +}
  5588. +
  5589. +/* how many bytes of flow does fit to the node */
  5590. +static int what_can_fit_into_node(carry_op * op)
  5591. +{
  5592. + size_t free, overhead;
  5593. +
  5594. + overhead = flow_insertion_overhead(op);
  5595. + free = znode_free_space(flow_insert_point(op)->node);
  5596. + if (free <= overhead)
  5597. + return 0;
  5598. + free -= overhead;
  5599. + /* FIXME: flow->length is loff_t only to not get overflowed in case of
  5600. + expandign truncate */
  5601. + if (free < op->u.insert_flow.flow->length)
  5602. + return free;
  5603. + return (int)op->u.insert_flow.flow->length;
  5604. +}
  5605. +
  5606. +/* in make_space_for_flow_insertion we need to check either whether whole flow
  5607. + fits into a node or whether minimal fraction of flow fits into a node */
  5608. +static int enough_space_for_whole_flow(carry_op * op)
  5609. +{
  5610. + return (unsigned)what_can_fit_into_node(op) ==
  5611. + op->u.insert_flow.flow->length;
  5612. +}
  5613. +
  5614. +#define MIN_FLOW_FRACTION 1
  5615. +static int enough_space_for_min_flow_fraction(carry_op * op)
  5616. +{
  5617. + //assert("vs-902", coord_is_after_rightmost(flow_insert_point(op)));
  5618. +
  5619. + return what_can_fit_into_node(op) >= MIN_FLOW_FRACTION;
  5620. +}
  5621. +
  5622. +/* this returns 0 if left neighbor was obtained successfully and everything
  5623. + upto insertion point including it were shifted and left neighbor still has
  5624. + some free space to put minimal fraction of flow into it */
  5625. +static int
  5626. +make_space_by_shift_left(carry_op * op, carry_level * doing, carry_level * todo)
  5627. +{
  5628. + carry_node *left;
  5629. + znode *orig;
  5630. +
  5631. + left = find_left_neighbor(op, doing);
  5632. + if (unlikely(IS_ERR(left))) {
  5633. + warning("vs-899",
  5634. + "make_space_by_shift_left: "
  5635. + "error accessing left neighbor: %li", PTR_ERR(left));
  5636. + return 1;
  5637. + }
  5638. + if (left == NULL)
  5639. + /* left neighbor either does not exist or is unformatted
  5640. + node */
  5641. + return 1;
  5642. +
  5643. + orig = flow_insert_point(op)->node;
  5644. + /* try to shift content of node @orig from its head upto insert point
  5645. + including insertion point into the left neighbor */
  5646. + carry_shift_data(LEFT_SIDE, flow_insert_point(op),
  5647. + reiser4_carry_real(left), doing, todo,
  5648. + 1/* including insert point */);
  5649. + if (reiser4_carry_real(left) != flow_insert_point(op)->node) {
  5650. + /* insertion point did not move */
  5651. + return 1;
  5652. + }
  5653. +
  5654. + /* insertion point is set after last item in the node */
  5655. + assert("vs-900", coord_is_after_rightmost(flow_insert_point(op)));
  5656. +
  5657. + if (!enough_space_for_min_flow_fraction(op)) {
  5658. + /* insertion point node does not have enough free space to put
  5659. + even minimal portion of flow into it, therefore, move
  5660. + insertion point back to orig node (before first item) */
  5661. + coord_init_before_first_item(flow_insert_point(op), orig);
  5662. + return 1;
  5663. + }
  5664. +
  5665. + /* part of flow is to be written to the end of node */
  5666. + op->node = left;
  5667. + return 0;
  5668. +}
  5669. +
  5670. +/* this returns 0 if right neighbor was obtained successfully and everything to
  5671. + the right of insertion point was shifted to it and node got enough free
  5672. + space to put minimal fraction of flow into it */
  5673. +static int
  5674. +make_space_by_shift_right(carry_op * op, carry_level * doing,
  5675. + carry_level * todo)
  5676. +{
  5677. + carry_node *right;
  5678. +
  5679. + right = find_right_neighbor(op, doing);
  5680. + if (unlikely(IS_ERR(right))) {
  5681. + warning("nikita-1065", "shift_right_excluding_insert_point: "
  5682. + "error accessing right neighbor: %li", PTR_ERR(right));
  5683. + return 1;
  5684. + }
  5685. + if (right) {
  5686. + /* shift everything possible on the right of but excluding
  5687. + insertion coord into the right neighbor */
  5688. + carry_shift_data(RIGHT_SIDE, flow_insert_point(op),
  5689. + reiser4_carry_real(right), doing, todo,
  5690. + 0/* not including insert point */);
  5691. + } else {
  5692. + /* right neighbor either does not exist or is unformatted
  5693. + node */
  5694. + ;
  5695. + }
  5696. + if (coord_is_after_rightmost(flow_insert_point(op))) {
  5697. + if (enough_space_for_min_flow_fraction(op)) {
  5698. + /* part of flow is to be written to the end of node */
  5699. + return 0;
  5700. + }
  5701. + }
  5702. +
  5703. + /* new node is to be added if insert point node did not get enough
  5704. + space for whole flow */
  5705. + return 1;
  5706. +}
  5707. +
  5708. +/* this returns 0 when insert coord is set at the node end and fraction of flow
  5709. + fits into that node */
  5710. +static int
  5711. +make_space_by_new_nodes(carry_op * op, carry_level * doing, carry_level * todo)
  5712. +{
  5713. + int result;
  5714. + znode *node;
  5715. + carry_node *new;
  5716. +
  5717. + node = flow_insert_point(op)->node;
  5718. +
  5719. + if (op->u.insert_flow.new_nodes == CARRY_FLOW_NEW_NODES_LIMIT)
  5720. + return RETERR(-E_NODE_FULL);
  5721. + /* add new node after insert point node */
  5722. + new = add_new_znode(node, op->node, doing, todo);
  5723. + if (unlikely(IS_ERR(new)))
  5724. + return PTR_ERR(new);
  5725. + result = lock_carry_node(doing, new);
  5726. + zput(reiser4_carry_real(new));
  5727. + if (unlikely(result))
  5728. + return result;
  5729. + op->u.insert_flow.new_nodes++;
  5730. + if (!coord_is_after_rightmost(flow_insert_point(op))) {
  5731. + carry_shift_data(RIGHT_SIDE, flow_insert_point(op),
  5732. + reiser4_carry_real(new), doing, todo,
  5733. + 0/* not including insert point */);
  5734. + assert("vs-901",
  5735. + coord_is_after_rightmost(flow_insert_point(op)));
  5736. +
  5737. + if (enough_space_for_min_flow_fraction(op))
  5738. + return 0;
  5739. + if (op->u.insert_flow.new_nodes == CARRY_FLOW_NEW_NODES_LIMIT)
  5740. + return RETERR(-E_NODE_FULL);
  5741. +
  5742. + /* add one more new node */
  5743. + new = add_new_znode(node, op->node, doing, todo);
  5744. + if (unlikely(IS_ERR(new)))
  5745. + return PTR_ERR(new);
  5746. + result = lock_carry_node(doing, new);
  5747. + zput(reiser4_carry_real(new));
  5748. + if (unlikely(result))
  5749. + return result;
  5750. + op->u.insert_flow.new_nodes++;
  5751. + }
  5752. +
  5753. + /* move insertion point to new node */
  5754. + coord_init_before_first_item(flow_insert_point(op),
  5755. + reiser4_carry_real(new));
  5756. + op->node = new;
  5757. + return 0;
  5758. +}
  5759. +
  5760. +static int
  5761. +make_space_for_flow_insertion(carry_op * op, carry_level * doing,
  5762. + carry_level * todo)
  5763. +{
  5764. + __u32 flags = op->u.insert_flow.flags;
  5765. +
  5766. + if (enough_space_for_whole_flow(op)) {
  5767. + /* whole flow fits into insert point node */
  5768. + return 0;
  5769. + }
  5770. + if ((flags & COPI_SWEEP) &&
  5771. + enough_space_for_min_flow_fraction(op))
  5772. + /* use the rest of space in the current node */
  5773. + return 0;
  5774. +
  5775. + if (!(flags & COPI_DONT_SHIFT_LEFT)
  5776. + && (make_space_by_shift_left(op, doing, todo) == 0)) {
  5777. + /* insert point is shifted to left neighbor of original insert
  5778. + point node and is set after last unit in that node. It has
  5779. + enough space to fit at least minimal fraction of flow. */
  5780. + return 0;
  5781. + }
  5782. +
  5783. + if (enough_space_for_whole_flow(op)) {
  5784. + /* whole flow fits into insert point node */
  5785. + return 0;
  5786. + }
  5787. +
  5788. + if (!(flags & COPI_DONT_SHIFT_RIGHT)
  5789. + && (make_space_by_shift_right(op, doing, todo) == 0)) {
  5790. + /* insert point is still set to the same node, but there is
  5791. + nothing to the right of insert point. */
  5792. + return 0;
  5793. + }
  5794. +
  5795. + if (enough_space_for_whole_flow(op)) {
  5796. + /* whole flow fits into insert point node */
  5797. + return 0;
  5798. + }
  5799. +
  5800. + return make_space_by_new_nodes(op, doing, todo);
  5801. +}
  5802. +
  5803. +/* implements COP_INSERT_FLOW operation */
  5804. +static int
  5805. +carry_insert_flow(carry_op * op, carry_level * doing, carry_level * todo)
  5806. +{
  5807. + int result;
  5808. + flow_t *f;
  5809. + coord_t *insert_point;
  5810. + node_plugin *nplug;
  5811. + carry_plugin_info info;
  5812. + znode *orig_node;
  5813. + lock_handle *orig_lh;
  5814. +
  5815. + f = op->u.insert_flow.flow;
  5816. + result = 0;
  5817. +
  5818. + /* carry system needs this to work */
  5819. + info.doing = doing;
  5820. + info.todo = todo;
  5821. +
  5822. + orig_node = flow_insert_point(op)->node;
  5823. + orig_lh = doing->tracked;
  5824. +
  5825. + while (f->length) {
  5826. + result = make_space_for_flow_insertion(op, doing, todo);
  5827. + if (result)
  5828. + break;
  5829. +
  5830. + insert_point = flow_insert_point(op);
  5831. + nplug = node_plugin_by_node(insert_point->node);
  5832. +
  5833. + /* compose item data for insertion/pasting */
  5834. + flow_insert_data(op)->data = f->data;
  5835. + flow_insert_data(op)->length = what_can_fit_into_node(op);
  5836. +
  5837. + if (can_paste(insert_point, &f->key, flow_insert_data(op))) {
  5838. + /* insert point is set to item of file we are writing to
  5839. + and we have to append to it */
  5840. + assert("vs-903", insert_point->between == AFTER_UNIT);
  5841. + nplug->change_item_size(insert_point,
  5842. + flow_insert_data(op)->length);
  5843. + flow_insert_data(op)->iplug->b.paste(insert_point,
  5844. + flow_insert_data
  5845. + (op), &info);
  5846. + } else {
  5847. + /* new item must be inserted */
  5848. + pos_in_node_t new_pos;
  5849. + flow_insert_data(op)->length += item_data_overhead(op);
  5850. +
  5851. + /* FIXME-VS: this is because node40_create_item changes
  5852. + insert_point for obscure reasons */
  5853. + switch (insert_point->between) {
  5854. + case AFTER_ITEM:
  5855. + new_pos = insert_point->item_pos + 1;
  5856. + break;
  5857. + case EMPTY_NODE:
  5858. + new_pos = 0;
  5859. + break;
  5860. + case BEFORE_ITEM:
  5861. + assert("vs-905", insert_point->item_pos == 0);
  5862. + new_pos = 0;
  5863. + break;
  5864. + default:
  5865. + impossible("vs-906",
  5866. + "carry_insert_flow: invalid coord");
  5867. + new_pos = 0;
  5868. + break;
  5869. + }
  5870. +
  5871. + nplug->create_item(insert_point, &f->key,
  5872. + flow_insert_data(op), &info);
  5873. + coord_set_item_pos(insert_point, new_pos);
  5874. + }
  5875. + coord_init_after_item_end(insert_point);
  5876. + doing->restartable = 0;
  5877. + znode_make_dirty(insert_point->node);
  5878. +
  5879. + move_flow_forward(f, (unsigned)flow_insert_data(op)->length);
  5880. + }
  5881. +
  5882. + if (orig_node != flow_insert_point(op)->node) {
  5883. + /* move lock to new insert point */
  5884. + done_lh(orig_lh);
  5885. + init_lh(orig_lh);
  5886. + result =
  5887. + longterm_lock_znode(orig_lh, flow_insert_point(op)->node,
  5888. + ZNODE_WRITE_LOCK, ZNODE_LOCK_HIPRI);
  5889. + }
  5890. +
  5891. + return result;
  5892. +}
  5893. +
  5894. +/* implements COP_DELETE operation
  5895. +
  5896. + Remove pointer to @op -> u.delete.child from it's parent.
  5897. +
  5898. + This function also handles killing of a tree root is last pointer from it
  5899. + was removed. This is complicated by our handling of "twig" level: root on
  5900. + twig level is never killed.
  5901. +
  5902. +*/
  5903. +static int carry_delete(carry_op * op /* operation to be performed */ ,
  5904. + carry_level * doing UNUSED_ARG /* current carry
  5905. + * level */ ,
  5906. + carry_level * todo/* next carry level */)
  5907. +{
  5908. + int result;
  5909. + coord_t coord;
  5910. + coord_t coord2;
  5911. + znode *parent;
  5912. + znode *child;
  5913. + carry_plugin_info info;
  5914. + reiser4_tree *tree;
  5915. +
  5916. + /*
  5917. + * This operation is called to delete internal item pointing to the
  5918. + * child node that was removed by carry from the tree on the previous
  5919. + * tree level.
  5920. + */
  5921. +
  5922. + assert("nikita-893", op != NULL);
  5923. + assert("nikita-894", todo != NULL);
  5924. + assert("nikita-895", op->op == COP_DELETE);
  5925. +
  5926. + coord_init_zero(&coord);
  5927. + coord_init_zero(&coord2);
  5928. +
  5929. + parent = reiser4_carry_real(op->node);
  5930. + child = op->u.delete.child ?
  5931. + reiser4_carry_real(op->u.delete.child) : op->node->node;
  5932. + tree = znode_get_tree(child);
  5933. + read_lock_tree(tree);
  5934. +
  5935. + /*
  5936. + * @parent was determined when carry entered parent level
  5937. + * (lock_carry_level/lock_carry_node). Since then, actual parent of
  5938. + * @child node could change due to other carry operations performed on
  5939. + * the parent level. Check for this.
  5940. + */
  5941. +
  5942. + if (znode_parent(child) != parent) {
  5943. + /* NOTE-NIKITA add stat counter for this. */
  5944. + parent = znode_parent(child);
  5945. + assert("nikita-2581", find_carry_node(doing, parent));
  5946. + }
  5947. + read_unlock_tree(tree);
  5948. +
  5949. + assert("nikita-1213", znode_get_level(parent) > LEAF_LEVEL);
  5950. +
  5951. + /* Twig level horrors: tree should be of height at least 2. So, last
  5952. + pointer from the root at twig level is preserved even if child is
  5953. + empty. This is ugly, but so it was architectured.
  5954. + */
  5955. +
  5956. + if (znode_is_root(parent) &&
  5957. + znode_get_level(parent) <= REISER4_MIN_TREE_HEIGHT &&
  5958. + node_num_items(parent) == 1) {
  5959. + /* Delimiting key manipulations. */
  5960. + write_lock_dk(tree);
  5961. + znode_set_ld_key(child, znode_set_ld_key(parent, reiser4_min_key()));
  5962. + znode_set_rd_key(child, znode_set_rd_key(parent, reiser4_max_key()));
  5963. + ZF_SET(child, JNODE_DKSET);
  5964. + write_unlock_dk(tree);
  5965. +
  5966. + /* @child escaped imminent death! */
  5967. + ZF_CLR(child, JNODE_HEARD_BANSHEE);
  5968. + return 0;
  5969. + }
  5970. +
  5971. + /* convert child pointer to the coord_t */
  5972. + result = find_child_ptr(parent, child, &coord);
  5973. + if (result != NS_FOUND) {
  5974. + warning("nikita-994", "Cannot find child pointer: %i", result);
  5975. + print_coord_content("coord", &coord);
  5976. + return result;
  5977. + }
  5978. +
  5979. + coord_dup(&coord2, &coord);
  5980. + info.doing = doing;
  5981. + info.todo = todo;
  5982. + {
  5983. + /*
  5984. + * Actually kill internal item: prepare structure with
  5985. + * arguments for ->cut_and_kill() method...
  5986. + */
  5987. +
  5988. + struct carry_kill_data kdata;
  5989. + kdata.params.from = &coord;
  5990. + kdata.params.to = &coord2;
  5991. + kdata.params.from_key = NULL;
  5992. + kdata.params.to_key = NULL;
  5993. + kdata.params.smallest_removed = NULL;
  5994. + kdata.params.truncate = 1;
  5995. + kdata.flags = op->u.delete.flags;
  5996. + kdata.inode = NULL;
  5997. + kdata.left = NULL;
  5998. + kdata.right = NULL;
  5999. + kdata.buf = NULL;
  6000. + /* ... and call it. */
  6001. + result = node_plugin_by_node(parent)->cut_and_kill(&kdata,
  6002. + &info);
  6003. + }
  6004. + doing->restartable = 0;
  6005. +
  6006. + /* check whether root should be killed violently */
  6007. + if (znode_is_root(parent) &&
  6008. + /* don't kill roots at and lower than twig level */
  6009. + znode_get_level(parent) > REISER4_MIN_TREE_HEIGHT &&
  6010. + node_num_items(parent) == 1)
  6011. + result = reiser4_kill_tree_root(coord.node);
  6012. +
  6013. + return result < 0 ? result : 0;
  6014. +}
  6015. +
  6016. +/* implements COP_CUT opration
  6017. +
  6018. + Cuts part or whole content of node.
  6019. +
  6020. +*/
  6021. +static int carry_cut(carry_op * op /* operation to be performed */ ,
  6022. + carry_level * doing /* current carry level */ ,
  6023. + carry_level * todo/* next carry level */)
  6024. +{
  6025. + int result;
  6026. + carry_plugin_info info;
  6027. + node_plugin *nplug;
  6028. +
  6029. + assert("nikita-896", op != NULL);
  6030. + assert("nikita-897", todo != NULL);
  6031. + assert("nikita-898", op->op == COP_CUT);
  6032. +
  6033. + info.doing = doing;
  6034. + info.todo = todo;
  6035. +
  6036. + nplug = node_plugin_by_node(reiser4_carry_real(op->node));
  6037. + if (op->u.cut_or_kill.is_cut)
  6038. + result = nplug->cut(op->u.cut_or_kill.u.cut, &info);
  6039. + else
  6040. + result = nplug->cut_and_kill(op->u.cut_or_kill.u.kill, &info);
  6041. +
  6042. + doing->restartable = 0;
  6043. + return result < 0 ? result : 0;
  6044. +}
  6045. +
  6046. +/* helper function for carry_paste(): returns true if @op can be continued as
  6047. + paste */
  6048. +static int
  6049. +can_paste(coord_t *icoord, const reiser4_key * key,
  6050. + const reiser4_item_data * data)
  6051. +{
  6052. + coord_t circa;
  6053. + item_plugin *new_iplug;
  6054. + item_plugin *old_iplug;
  6055. + int result = 0; /* to keep gcc shut */
  6056. +
  6057. + assert("", icoord->between != AT_UNIT);
  6058. +
  6059. + /* obviously, one cannot paste when node is empty---there is nothing
  6060. + to paste into. */
  6061. + if (node_is_empty(icoord->node))
  6062. + return 0;
  6063. + /* if insertion point is at the middle of the item, then paste */
  6064. + if (!coord_is_between_items(icoord))
  6065. + return 1;
  6066. + coord_dup(&circa, icoord);
  6067. + circa.between = AT_UNIT;
  6068. +
  6069. + old_iplug = item_plugin_by_coord(&circa);
  6070. + new_iplug = data->iplug;
  6071. +
  6072. + /* check whether we can paste to the item @icoord is "at" when we
  6073. + ignore ->between field */
  6074. + if (old_iplug == new_iplug && item_can_contain_key(&circa, key, data))
  6075. + result = 1;
  6076. + else if (icoord->between == BEFORE_UNIT
  6077. + || icoord->between == BEFORE_ITEM) {
  6078. + /* otherwise, try to glue to the item at the left, if any */
  6079. + coord_dup(&circa, icoord);
  6080. + if (coord_set_to_left(&circa)) {
  6081. + result = 0;
  6082. + coord_init_before_item(icoord);
  6083. + } else {
  6084. + old_iplug = item_plugin_by_coord(&circa);
  6085. + result = (old_iplug == new_iplug)
  6086. + && item_can_contain_key(icoord, key, data);
  6087. + if (result) {
  6088. + coord_dup(icoord, &circa);
  6089. + icoord->between = AFTER_UNIT;
  6090. + }
  6091. + }
  6092. + } else if (icoord->between == AFTER_UNIT
  6093. + || icoord->between == AFTER_ITEM) {
  6094. + coord_dup(&circa, icoord);
  6095. + /* otherwise, try to glue to the item at the right, if any */
  6096. + if (coord_set_to_right(&circa)) {
  6097. + result = 0;
  6098. + coord_init_after_item(icoord);
  6099. + } else {
  6100. + int (*cck) (const coord_t *, const reiser4_key *,
  6101. + const reiser4_item_data *);
  6102. +
  6103. + old_iplug = item_plugin_by_coord(&circa);
  6104. +
  6105. + cck = old_iplug->b.can_contain_key;
  6106. + if (cck == NULL)
  6107. + /* item doesn't define ->can_contain_key
  6108. + method? So it is not expandable. */
  6109. + result = 0;
  6110. + else {
  6111. + result = (old_iplug == new_iplug)
  6112. + && cck(&circa /*icoord */ , key, data);
  6113. + if (result) {
  6114. + coord_dup(icoord, &circa);
  6115. + icoord->between = BEFORE_UNIT;
  6116. + }
  6117. + }
  6118. + }
  6119. + } else
  6120. + impossible("nikita-2513", "Nothing works");
  6121. + if (result) {
  6122. + if (icoord->between == BEFORE_ITEM) {
  6123. + assert("vs-912", icoord->unit_pos == 0);
  6124. + icoord->between = BEFORE_UNIT;
  6125. + } else if (icoord->between == AFTER_ITEM) {
  6126. + coord_init_after_item_end(icoord);
  6127. + }
  6128. + }
  6129. + return result;
  6130. +}
  6131. +
  6132. +/* implements COP_PASTE operation
  6133. +
  6134. + Paste data into existing item. This is complicated by the fact that after
  6135. + we shifted something to the left or right neighbors trying to free some
  6136. + space, item we were supposed to paste into can be in different node than
  6137. + insertion coord. If so, we are no longer doing paste, but insert. See
  6138. + comments in insert_paste_common().
  6139. +
  6140. +*/
  6141. +static int carry_paste(carry_op * op /* operation to be performed */ ,
  6142. + carry_level * doing UNUSED_ARG /* current carry
  6143. + * level */ ,
  6144. + carry_level * todo/* next carry level */)
  6145. +{
  6146. + znode *node;
  6147. + carry_insert_data cdata;
  6148. + coord_t dcoord;
  6149. + reiser4_item_data data;
  6150. + int result;
  6151. + int real_size;
  6152. + item_plugin *iplug;
  6153. + carry_plugin_info info;
  6154. + coord_t *coord;
  6155. +
  6156. + assert("nikita-982", op != NULL);
  6157. + assert("nikita-983", todo != NULL);
  6158. + assert("nikita-984", op->op == COP_PASTE);
  6159. +
  6160. + coord_init_zero(&dcoord);
  6161. +
  6162. + result = insert_paste_common(op, doing, todo, &cdata, &dcoord, &data);
  6163. + if (result != 0)
  6164. + return result;
  6165. +
  6166. + coord = op->u.insert.d->coord;
  6167. +
  6168. + /* handle case when op -> u.insert.coord doesn't point to the item
  6169. + of required type. restart as insert. */
  6170. + if (!can_paste(coord, op->u.insert.d->key, op->u.insert.d->data)) {
  6171. + op->op = COP_INSERT;
  6172. + op->u.insert.type = COPT_PASTE_RESTARTED;
  6173. + result = op_dispatch_table[COP_INSERT].handler(op, doing, todo);
  6174. +
  6175. + return result;
  6176. + }
  6177. +
  6178. + node = coord->node;
  6179. + iplug = item_plugin_by_coord(coord);
  6180. + assert("nikita-992", iplug != NULL);
  6181. +
  6182. + assert("nikita-985", node != NULL);
  6183. + assert("nikita-986", node_plugin_by_node(node) != NULL);
  6184. +
  6185. + assert("nikita-987",
  6186. + space_needed_for_op(node, op) <= znode_free_space(node));
  6187. +
  6188. + assert("nikita-1286", coord_is_existing_item(coord));
  6189. +
  6190. + /*
  6191. + * if item is expanded as a result of this operation, we should first
  6192. + * change item size, than call ->b.paste item method. If item is
  6193. + * shrunk, it should be done other way around: first call ->b.paste
  6194. + * method, then reduce item size.
  6195. + */
  6196. +
  6197. + real_size = space_needed_for_op(node, op);
  6198. + if (real_size > 0)
  6199. + node->nplug->change_item_size(coord, real_size);
  6200. +
  6201. + doing->restartable = 0;
  6202. + info.doing = doing;
  6203. + info.todo = todo;
  6204. +
  6205. + result = iplug->b.paste(coord, op->u.insert.d->data, &info);
  6206. +
  6207. + if (real_size < 0)
  6208. + node->nplug->change_item_size(coord, real_size);
  6209. +
  6210. + /* if we pasted at the beginning of the item, update item's key. */
  6211. + if (coord->unit_pos == 0 && coord->between != AFTER_UNIT)
  6212. + node->nplug->update_item_key(coord, op->u.insert.d->key, &info);
  6213. +
  6214. + znode_make_dirty(node);
  6215. + return result;
  6216. +}
  6217. +
  6218. +/* handle carry COP_EXTENT operation. */
  6219. +static int carry_extent(carry_op * op /* operation to perform */ ,
  6220. + carry_level * doing /* queue of operations @op
  6221. + * is part of */ ,
  6222. + carry_level * todo /* queue where new operations
  6223. + * are accumulated */ )
  6224. +{
  6225. + znode *node;
  6226. + carry_insert_data cdata;
  6227. + coord_t coord;
  6228. + reiser4_item_data data;
  6229. + carry_op *delete_dummy;
  6230. + carry_op *insert_extent;
  6231. + int result;
  6232. + carry_plugin_info info;
  6233. +
  6234. + assert("nikita-1751", op != NULL);
  6235. + assert("nikita-1752", todo != NULL);
  6236. + assert("nikita-1753", op->op == COP_EXTENT);
  6237. +
  6238. + /* extent insertion overview:
  6239. +
  6240. + extents live on the TWIG LEVEL, which is level one above the leaf
  6241. + one. This complicates extent insertion logic somewhat: it may
  6242. + happen (and going to happen all the time) that in logical key
  6243. + ordering extent has to be placed between items I1 and I2, located
  6244. + at the leaf level, but I1 and I2 are in the same formatted leaf
  6245. + node N1. To insert extent one has to
  6246. +
  6247. + (1) reach node N1 and shift data between N1, its neighbors and
  6248. + possibly newly allocated nodes until I1 and I2 fall into different
  6249. + nodes. Since I1 and I2 are still neighboring items in logical key
  6250. + order, they will be necessary utmost items in their respective
  6251. + nodes.
  6252. +
  6253. + (2) After this new extent item is inserted into node on the twig
  6254. + level.
  6255. +
  6256. + Fortunately this process can reuse almost all code from standard
  6257. + insertion procedure (viz. make_space() and insert_paste_common()),
  6258. + due to the following observation: make_space() only shifts data up
  6259. + to and excluding or including insertion point. It never
  6260. + "over-moves" through insertion point. Thus, one can use
  6261. + make_space() to perform step (1). All required for this is just to
  6262. + instruct free_space_shortage() to keep make_space() shifting data
  6263. + until insertion point is at the node border.
  6264. +
  6265. + */
  6266. +
  6267. + /* perform common functionality of insert and paste. */
  6268. + result = insert_paste_common(op, doing, todo, &cdata, &coord, &data);
  6269. + if (result != 0)
  6270. + return result;
  6271. +
  6272. + node = op->u.extent.d->coord->node;
  6273. + assert("nikita-1754", node != NULL);
  6274. + assert("nikita-1755", node_plugin_by_node(node) != NULL);
  6275. + assert("nikita-1700", coord_wrt(op->u.extent.d->coord) != COORD_INSIDE);
  6276. +
  6277. + /* NOTE-NIKITA add some checks here. Not assertions, -EIO. Check that
  6278. + extent fits between items. */
  6279. +
  6280. + info.doing = doing;
  6281. + info.todo = todo;
  6282. +
  6283. + /* there is another complication due to placement of extents on the
  6284. + twig level: extents are "rigid" in the sense that key-range
  6285. + occupied by extent cannot grow indefinitely to the right as it is
  6286. + for the formatted leaf nodes. Because of this when search finds two
  6287. + adjacent extents on the twig level, it has to "drill" to the leaf
  6288. + level, creating new node. Here we are removing this node.
  6289. + */
  6290. + if (node_is_empty(node)) {
  6291. + delete_dummy = node_post_carry(&info, COP_DELETE, node, 1);
  6292. + if (IS_ERR(delete_dummy))
  6293. + return PTR_ERR(delete_dummy);
  6294. + delete_dummy->u.delete.child = NULL;
  6295. + delete_dummy->u.delete.flags = DELETE_RETAIN_EMPTY;
  6296. + ZF_SET(node, JNODE_HEARD_BANSHEE);
  6297. + }
  6298. +
  6299. + /* proceed with inserting extent item into parent. We are definitely
  6300. + inserting rather than pasting if we get that far. */
  6301. + insert_extent = node_post_carry(&info, COP_INSERT, node, 1);
  6302. + if (IS_ERR(insert_extent))
  6303. + /* @delete_dummy will be automatically destroyed on the level
  6304. + exiting */
  6305. + return PTR_ERR(insert_extent);
  6306. + /* NOTE-NIKITA insertion by key is simplest option here. Another
  6307. + possibility is to insert on the left or right of already existing
  6308. + item.
  6309. + */
  6310. + insert_extent->u.insert.type = COPT_KEY;
  6311. + insert_extent->u.insert.d = op->u.extent.d;
  6312. + assert("nikita-1719", op->u.extent.d->key != NULL);
  6313. + insert_extent->u.insert.d->data->arg = op->u.extent.d->coord;
  6314. + insert_extent->u.insert.flags =
  6315. + znode_get_tree(node)->carry.new_extent_flags;
  6316. +
  6317. + /*
  6318. + * if carry was asked to track lock handle we should actually track
  6319. + * lock handle on the twig node rather than on the leaf where
  6320. + * operation was started from. Transfer tracked lock handle.
  6321. + */
  6322. + if (doing->track_type) {
  6323. + assert("nikita-3242", doing->tracked != NULL);
  6324. + assert("nikita-3244", todo->tracked == NULL);
  6325. + todo->tracked = doing->tracked;
  6326. + todo->track_type = CARRY_TRACK_NODE;
  6327. + doing->tracked = NULL;
  6328. + doing->track_type = 0;
  6329. + }
  6330. +
  6331. + return 0;
  6332. +}
  6333. +
  6334. +/* update key in @parent between pointers to @left and @right.
  6335. +
  6336. + Find coords of @left and @right and update delimiting key between them.
  6337. + This is helper function called by carry_update(). Finds position of
  6338. + internal item involved. Updates item key. Updates delimiting keys of child
  6339. + nodes involved.
  6340. +*/
  6341. +static int update_delimiting_key(znode * parent /* node key is updated
  6342. + * in */ ,
  6343. + znode * left /* child of @parent */ ,
  6344. + znode * right /* child of @parent */ ,
  6345. + carry_level * doing /* current carry
  6346. + * level */ ,
  6347. + carry_level * todo /* parent carry
  6348. + * level */ ,
  6349. + const char **error_msg /* place to
  6350. + * store error
  6351. + * message */ )
  6352. +{
  6353. + coord_t left_pos;
  6354. + coord_t right_pos;
  6355. + int result;
  6356. + reiser4_key ldkey;
  6357. + carry_plugin_info info;
  6358. +
  6359. + assert("nikita-1177", right != NULL);
  6360. + /* find position of right left child in a parent */
  6361. + result = find_child_ptr(parent, right, &right_pos);
  6362. + if (result != NS_FOUND) {
  6363. + *error_msg = "Cannot find position of right child";
  6364. + return result;
  6365. + }
  6366. +
  6367. + if ((left != NULL) && !coord_is_leftmost_unit(&right_pos)) {
  6368. + /* find position of the left child in a parent */
  6369. + result = find_child_ptr(parent, left, &left_pos);
  6370. + if (result != NS_FOUND) {
  6371. + *error_msg = "Cannot find position of left child";
  6372. + return result;
  6373. + }
  6374. + assert("nikita-1355", left_pos.node != NULL);
  6375. + } else
  6376. + left_pos.node = NULL;
  6377. +
  6378. + /* check that they are separated by exactly one key and are basically
  6379. + sane */
  6380. + if (REISER4_DEBUG) {
  6381. + if ((left_pos.node != NULL)
  6382. + && !coord_is_existing_unit(&left_pos)) {
  6383. + *error_msg = "Left child is bastard";
  6384. + return RETERR(-EIO);
  6385. + }
  6386. + if (!coord_is_existing_unit(&right_pos)) {
  6387. + *error_msg = "Right child is bastard";
  6388. + return RETERR(-EIO);
  6389. + }
  6390. + if (left_pos.node != NULL &&
  6391. + !coord_are_neighbors(&left_pos, &right_pos)) {
  6392. + *error_msg = "Children are not direct siblings";
  6393. + return RETERR(-EIO);
  6394. + }
  6395. + }
  6396. + *error_msg = NULL;
  6397. +
  6398. + info.doing = doing;
  6399. + info.todo = todo;
  6400. +
  6401. + /*
  6402. + * If child node is not empty, new key of internal item is a key of
  6403. + * leftmost item in the child node. If the child is empty, take its
  6404. + * right delimiting key as a new key of the internal item. Precise key
  6405. + * in the latter case is not important per se, because the child (and
  6406. + * the internal item) are going to be killed shortly anyway, but we
  6407. + * have to preserve correct order of keys in the parent node.
  6408. + */
  6409. +
  6410. + if (!ZF_ISSET(right, JNODE_HEARD_BANSHEE))
  6411. + leftmost_key_in_node(right, &ldkey);
  6412. + else {
  6413. + read_lock_dk(znode_get_tree(parent));
  6414. + ldkey = *znode_get_rd_key(right);
  6415. + read_unlock_dk(znode_get_tree(parent));
  6416. + }
  6417. + node_plugin_by_node(parent)->update_item_key(&right_pos, &ldkey, &info);
  6418. + doing->restartable = 0;
  6419. + znode_make_dirty(parent);
  6420. + return 0;
  6421. +}
  6422. +
  6423. +/* implements COP_UPDATE opration
  6424. +
  6425. + Update delimiting keys.
  6426. +
  6427. +*/
  6428. +static int carry_update(carry_op * op /* operation to be performed */ ,
  6429. + carry_level * doing /* current carry level */ ,
  6430. + carry_level * todo/* next carry level */)
  6431. +{
  6432. + int result;
  6433. + carry_node *missing UNUSED_ARG;
  6434. + znode *left;
  6435. + znode *right;
  6436. + carry_node *lchild;
  6437. + carry_node *rchild;
  6438. + const char *error_msg;
  6439. + reiser4_tree *tree;
  6440. +
  6441. + /*
  6442. + * This operation is called to update key of internal item. This is
  6443. + * necessary when carry shifted of cut data on the child
  6444. + * level. Arguments of this operation are:
  6445. + *
  6446. + * @right --- child node. Operation should update key of internal
  6447. + * item pointing to @right.
  6448. + *
  6449. + * @left --- left neighbor of @right. This parameter is optional.
  6450. + */
  6451. +
  6452. + assert("nikita-902", op != NULL);
  6453. + assert("nikita-903", todo != NULL);
  6454. + assert("nikita-904", op->op == COP_UPDATE);
  6455. +
  6456. + lchild = op->u.update.left;
  6457. + rchild = op->node;
  6458. +
  6459. + if (lchild != NULL) {
  6460. + assert("nikita-1001", lchild->parent);
  6461. + assert("nikita-1003", !lchild->left);
  6462. + left = reiser4_carry_real(lchild);
  6463. + } else
  6464. + left = NULL;
  6465. +
  6466. + tree = znode_get_tree(rchild->node);
  6467. + read_lock_tree(tree);
  6468. + right = znode_parent(rchild->node);
  6469. + read_unlock_tree(tree);
  6470. +
  6471. + if (right != NULL) {
  6472. + result = update_delimiting_key(right,
  6473. + lchild ? lchild->node : NULL,
  6474. + rchild->node,
  6475. + doing, todo, &error_msg);
  6476. + } else {
  6477. + error_msg = "Cannot find node to update key in";
  6478. + result = RETERR(-EIO);
  6479. + }
  6480. + /* operation will be reposted to the next level by the
  6481. + ->update_item_key() method of node plugin, if necessary. */
  6482. +
  6483. + if (result != 0) {
  6484. + warning("nikita-999", "Error updating delimiting key: %s (%i)",
  6485. + error_msg ? : "", result);
  6486. + }
  6487. + return result;
  6488. +}
  6489. +
  6490. +/* move items from @node during carry */
  6491. +static int carry_shift_data(sideof side /* in what direction to move data */ ,
  6492. + coord_t *insert_coord /* coord where new item
  6493. + * is to be inserted */,
  6494. + znode * node /* node which data are moved from */ ,
  6495. + carry_level * doing /* active carry queue */ ,
  6496. + carry_level * todo /* carry queue where new
  6497. + * operations are to be put
  6498. + * in */ ,
  6499. + unsigned int including_insert_coord_p
  6500. + /* true if @insertion_coord can be moved */ )
  6501. +{
  6502. + int result;
  6503. + znode *source;
  6504. + carry_plugin_info info;
  6505. + node_plugin *nplug;
  6506. +
  6507. + source = insert_coord->node;
  6508. +
  6509. + info.doing = doing;
  6510. + info.todo = todo;
  6511. +
  6512. + nplug = node_plugin_by_node(node);
  6513. + result = nplug->shift(insert_coord, node,
  6514. + (side == LEFT_SIDE) ? SHIFT_LEFT : SHIFT_RIGHT, 0,
  6515. + (int)including_insert_coord_p, &info);
  6516. + /* the only error ->shift() method of node plugin can return is
  6517. + -ENOMEM due to carry node/operation allocation. */
  6518. + assert("nikita-915", result >= 0 || result == -ENOMEM);
  6519. + if (result > 0) {
  6520. + /*
  6521. + * if some number of bytes was actually shifted, mark nodes
  6522. + * dirty, and carry level as non-restartable.
  6523. + */
  6524. + doing->restartable = 0;
  6525. + znode_make_dirty(source);
  6526. + znode_make_dirty(node);
  6527. + }
  6528. +
  6529. + assert("nikita-2077", coord_check(insert_coord));
  6530. + return 0;
  6531. +}
  6532. +
  6533. +typedef carry_node *(*carry_iterator) (carry_node * node);
  6534. +static carry_node *find_dir_carry(carry_node * node, carry_level * level,
  6535. + carry_iterator iterator);
  6536. +
  6537. +static carry_node *pool_level_list_prev(carry_node *node)
  6538. +{
  6539. + return list_entry(node->header.level_linkage.prev, carry_node, header.level_linkage);
  6540. +}
  6541. +
  6542. +/* look for the left neighbor of given carry node in a carry queue.
  6543. +
  6544. + This is used by find_left_neighbor(), but I am not sure that this
  6545. + really gives any advantage. More statistics required.
  6546. +
  6547. +*/
  6548. +carry_node *find_left_carry(carry_node * node /* node to find left neighbor
  6549. + * of */ ,
  6550. + carry_level * level/* level to scan */)
  6551. +{
  6552. + return find_dir_carry(node, level,
  6553. + (carry_iterator) pool_level_list_prev);
  6554. +}
  6555. +
  6556. +static carry_node *pool_level_list_next(carry_node *node)
  6557. +{
  6558. + return list_entry(node->header.level_linkage.next, carry_node, header.level_linkage);
  6559. +}
  6560. +
  6561. +/* look for the right neighbor of given carry node in a
  6562. + carry queue.
  6563. +
  6564. + This is used by find_right_neighbor(), but I am not sure that this
  6565. + really gives any advantage. More statistics required.
  6566. +
  6567. +*/
  6568. +carry_node *find_right_carry(carry_node * node /* node to find right neighbor
  6569. + * of */ ,
  6570. + carry_level * level/* level to scan */)
  6571. +{
  6572. + return find_dir_carry(node, level,
  6573. + (carry_iterator) pool_level_list_next);
  6574. +}
  6575. +
  6576. +/* look for the left or right neighbor of given carry node in a carry
  6577. + queue.
  6578. +
  6579. + Helper function used by find_{left|right}_carry().
  6580. +*/
  6581. +static carry_node *find_dir_carry(carry_node * node /* node to start
  6582. + * scanning from */ ,
  6583. + carry_level * level /* level to scan */ ,
  6584. + carry_iterator iterator /* operation to
  6585. + * move to the
  6586. + * next node */)
  6587. +{
  6588. + carry_node *neighbor;
  6589. +
  6590. + assert("nikita-1059", node != NULL);
  6591. + assert("nikita-1060", level != NULL);
  6592. +
  6593. + /* scan list of carry nodes on this list dir-ward, skipping all
  6594. + carry nodes referencing the same znode. */
  6595. + neighbor = node;
  6596. + while (1) {
  6597. + neighbor = iterator(neighbor);
  6598. + if (carry_node_end(level, neighbor))
  6599. + /* list head is reached */
  6600. + return NULL;
  6601. + if (reiser4_carry_real(neighbor) != reiser4_carry_real(node))
  6602. + return neighbor;
  6603. + }
  6604. +}
  6605. +
  6606. +/*
  6607. + * Memory reservation estimation.
  6608. + *
  6609. + * Carry process proceeds through tree levels upwards. Carry assumes that it
  6610. + * takes tree in consistent state (e.g., that search tree invariants hold),
  6611. + * and leaves tree consistent after it finishes. This means that when some
  6612. + * error occurs carry cannot simply return if there are pending carry
  6613. + * operations. Generic solution for this problem is carry-undo either as
  6614. + * transaction manager feature (requiring checkpoints and isolation), or
  6615. + * through some carry specific mechanism.
  6616. + *
  6617. + * Our current approach is to panic if carry hits an error while tree is
  6618. + * inconsistent. Unfortunately -ENOMEM can easily be triggered. To work around
  6619. + * this "memory reservation" mechanism was added.
  6620. + *
  6621. + * Memory reservation is implemented by perthread-pages.diff patch from
  6622. + * core-patches. Its API is defined in <linux/gfp.h>
  6623. + *
  6624. + * int perthread_pages_reserve(int nrpages, gfp_t gfp);
  6625. + * void perthread_pages_release(int nrpages);
  6626. + * int perthread_pages_count(void);
  6627. + *
  6628. + * carry estimates its worst case memory requirements at the entry, reserved
  6629. + * enough memory, and released unused pages before returning.
  6630. + *
  6631. + * Code below estimates worst case memory requirements for a given carry
  6632. + * queue. This is dome by summing worst case memory requirements for each
  6633. + * operation in the queue.
  6634. + *
  6635. + */
  6636. +
  6637. +/*
  6638. + * Memory memory requirements of many operations depends on the tree
  6639. + * height. For example, item insertion requires new node to be inserted at
  6640. + * each tree level in the worst case. What tree height should be used for
  6641. + * estimation? Current tree height is wrong, because tree height can change
  6642. + * between the time when estimation was done and the time when operation is
  6643. + * actually performed. Maximal possible tree height (REISER4_MAX_ZTREE_HEIGHT)
  6644. + * is also not desirable, because it would lead to the huge over-estimation
  6645. + * all the time. Plausible solution is "capped tree height": if current tree
  6646. + * height is less than some TREE_HEIGHT_CAP constant, capped tree height is
  6647. + * TREE_HEIGHT_CAP, otherwise it's current tree height. Idea behind this is
  6648. + * that if tree height is TREE_HEIGHT_CAP or larger, it's extremely unlikely
  6649. + * to be increased even more during short interval of time.
  6650. + */
  6651. +#define TREE_HEIGHT_CAP (5)
  6652. +
  6653. +/* return capped tree height for the @tree. See comment above. */
  6654. +static int cap_tree_height(reiser4_tree * tree)
  6655. +{
  6656. + return max_t(int, tree->height, TREE_HEIGHT_CAP);
  6657. +}
  6658. +
  6659. +/* return capped tree height for the current tree. */
  6660. +static int capped_height(void)
  6661. +{
  6662. + return cap_tree_height(current_tree);
  6663. +}
  6664. +
  6665. +/* return number of pages required to store given number of bytes */
  6666. +static int bytes_to_pages(int bytes)
  6667. +{
  6668. + return (bytes + PAGE_SIZE - 1) >> PAGE_SHIFT;
  6669. +}
  6670. +
  6671. +/* how many pages are required to allocate znodes during item insertion. */
  6672. +static int carry_estimate_znodes(void)
  6673. +{
  6674. + /*
  6675. + * Note, that there we have some problem here: there is no way to
  6676. + * reserve pages specifically for the given slab. This means that
  6677. + * these pages can be hijacked for some other end.
  6678. + */
  6679. +
  6680. + /* in the worst case we need 3 new znode on each tree level */
  6681. + return bytes_to_pages(capped_height() * sizeof(znode) * 3);
  6682. +}
  6683. +
  6684. +/*
  6685. + * how many pages are required to load bitmaps. One bitmap per level.
  6686. + */
  6687. +static int carry_estimate_bitmaps(void)
  6688. +{
  6689. + if (reiser4_is_set(reiser4_get_current_sb(), REISER4_DONT_LOAD_BITMAP)) {
  6690. + int bytes;
  6691. +
  6692. + bytes = capped_height() * (0 + /* bnode should be added, but
  6693. + * it is private to bitmap.c,
  6694. + * skip for now. */
  6695. + 2 * sizeof(jnode));
  6696. + /* working and commit jnodes */
  6697. + return bytes_to_pages(bytes) + 2; /* and their contents */
  6698. + } else
  6699. + /* bitmaps were pre-loaded during mount */
  6700. + return 0;
  6701. +}
  6702. +
  6703. +/* worst case item insertion memory requirements */
  6704. +static int carry_estimate_insert(carry_op * op, carry_level * level)
  6705. +{
  6706. + return carry_estimate_bitmaps() + carry_estimate_znodes() + 1 +
  6707. + /* new atom */
  6708. + capped_height() + /* new block on each level */
  6709. + 1 + /* and possibly extra new block at the leaf level */
  6710. + 3; /* loading of leaves into memory */
  6711. +}
  6712. +
  6713. +/* worst case item deletion memory requirements */
  6714. +static int carry_estimate_delete(carry_op * op, carry_level * level)
  6715. +{
  6716. + return carry_estimate_bitmaps() + carry_estimate_znodes() + 1 +
  6717. + /* new atom */
  6718. + 3; /* loading of leaves into memory */
  6719. +}
  6720. +
  6721. +/* worst case tree cut memory requirements */
  6722. +static int carry_estimate_cut(carry_op * op, carry_level * level)
  6723. +{
  6724. + return carry_estimate_bitmaps() + carry_estimate_znodes() + 1 +
  6725. + /* new atom */
  6726. + 3; /* loading of leaves into memory */
  6727. +}
  6728. +
  6729. +/* worst case memory requirements of pasting into item */
  6730. +static int carry_estimate_paste(carry_op * op, carry_level * level)
  6731. +{
  6732. + return carry_estimate_bitmaps() + carry_estimate_znodes() + 1 +
  6733. + /* new atom */
  6734. + capped_height() + /* new block on each level */
  6735. + 1 + /* and possibly extra new block at the leaf level */
  6736. + 3; /* loading of leaves into memory */
  6737. +}
  6738. +
  6739. +/* worst case memory requirements of extent insertion */
  6740. +static int carry_estimate_extent(carry_op * op, carry_level * level)
  6741. +{
  6742. + return carry_estimate_insert(op, level) + /* insert extent */
  6743. + carry_estimate_delete(op, level); /* kill leaf */
  6744. +}
  6745. +
  6746. +/* worst case memory requirements of key update */
  6747. +static int carry_estimate_update(carry_op * op, carry_level * level)
  6748. +{
  6749. + return 0;
  6750. +}
  6751. +
  6752. +/* worst case memory requirements of flow insertion */
  6753. +static int carry_estimate_insert_flow(carry_op * op, carry_level * level)
  6754. +{
  6755. + int newnodes;
  6756. +
  6757. + newnodes = min(bytes_to_pages(op->u.insert_flow.flow->length),
  6758. + CARRY_FLOW_NEW_NODES_LIMIT);
  6759. + /*
  6760. + * roughly estimate insert_flow as a sequence of insertions.
  6761. + */
  6762. + return newnodes * carry_estimate_insert(op, level);
  6763. +}
  6764. +
  6765. +/* This is dispatch table for carry operations. It can be trivially
  6766. + abstracted into useful plugin: tunable balancing policy is a good
  6767. + thing. */
  6768. +carry_op_handler op_dispatch_table[COP_LAST_OP] = {
  6769. + [COP_INSERT] = {
  6770. + .handler = carry_insert,
  6771. + .estimate = carry_estimate_insert}
  6772. + ,
  6773. + [COP_DELETE] = {
  6774. + .handler = carry_delete,
  6775. + .estimate = carry_estimate_delete}
  6776. + ,
  6777. + [COP_CUT] = {
  6778. + .handler = carry_cut,
  6779. + .estimate = carry_estimate_cut}
  6780. + ,
  6781. + [COP_PASTE] = {
  6782. + .handler = carry_paste,
  6783. + .estimate = carry_estimate_paste}
  6784. + ,
  6785. + [COP_EXTENT] = {
  6786. + .handler = carry_extent,
  6787. + .estimate = carry_estimate_extent}
  6788. + ,
  6789. + [COP_UPDATE] = {
  6790. + .handler = carry_update,
  6791. + .estimate = carry_estimate_update}
  6792. + ,
  6793. + [COP_INSERT_FLOW] = {
  6794. + .handler = carry_insert_flow,
  6795. + .estimate = carry_estimate_insert_flow}
  6796. +};
  6797. +
  6798. +/* Make Linus happy.
  6799. + Local variables:
  6800. + c-indentation-style: "K&R"
  6801. + mode-name: "LC"
  6802. + c-basic-offset: 8
  6803. + tab-width: 8
  6804. + fill-column: 120
  6805. + scroll-step: 1
  6806. + End:
  6807. +*/
  6808. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/carry_ops.h linux-4.14.2/fs/reiser4/carry_ops.h
  6809. --- linux-4.14.2.orig/fs/reiser4/carry_ops.h 1970-01-01 01:00:00.000000000 +0100
  6810. +++ linux-4.14.2/fs/reiser4/carry_ops.h 2017-11-26 22:13:09.000000000 +0100
  6811. @@ -0,0 +1,43 @@
  6812. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  6813. + reiser4/README */
  6814. +
  6815. +/* implementation of carry operations. See carry_ops.c for details. */
  6816. +
  6817. +#if !defined(__CARRY_OPS_H__)
  6818. +#define __CARRY_OPS_H__
  6819. +
  6820. +#include "forward.h"
  6821. +#include "znode.h"
  6822. +#include "carry.h"
  6823. +
  6824. +/* carry operation handlers */
  6825. +typedef struct carry_op_handler {
  6826. + /* perform operation */
  6827. + int (*handler) (carry_op * op, carry_level * doing, carry_level * todo);
  6828. + /* estimate memory requirements for @op */
  6829. + int (*estimate) (carry_op * op, carry_level * level);
  6830. +} carry_op_handler;
  6831. +
  6832. +/* This is dispatch table for carry operations. It can be trivially
  6833. + abstracted into useful plugin: tunable balancing policy is a good
  6834. + thing. */
  6835. +extern carry_op_handler op_dispatch_table[COP_LAST_OP];
  6836. +
  6837. +unsigned int space_needed(const znode * node, const coord_t *coord,
  6838. + const reiser4_item_data * data, int inserting);
  6839. +extern carry_node *find_left_carry(carry_node * node, carry_level * level);
  6840. +extern carry_node *find_right_carry(carry_node * node, carry_level * level);
  6841. +
  6842. +/* __CARRY_OPS_H__ */
  6843. +#endif
  6844. +
  6845. +/* Make Linus happy.
  6846. + Local variables:
  6847. + c-indentation-style: "K&R"
  6848. + mode-name: "LC"
  6849. + c-basic-offset: 8
  6850. + tab-width: 8
  6851. + fill-column: 120
  6852. + scroll-step: 1
  6853. + End:
  6854. +*/
  6855. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/checksum.c linux-4.14.2/fs/reiser4/checksum.c
  6856. --- linux-4.14.2.orig/fs/reiser4/checksum.c 1970-01-01 01:00:00.000000000 +0100
  6857. +++ linux-4.14.2/fs/reiser4/checksum.c 2017-11-26 22:13:09.000000000 +0100
  6858. @@ -0,0 +1,33 @@
  6859. +#include <linux/err.h>
  6860. +#include "debug.h"
  6861. +#include "checksum.h"
  6862. +
  6863. +int reiser4_init_csum_tfm(struct crypto_shash **tfm)
  6864. +{
  6865. + struct crypto_shash *new_tfm;
  6866. +
  6867. + new_tfm = crypto_alloc_shash("crc32c", 0, 0);
  6868. + if (IS_ERR(new_tfm)) {
  6869. + warning("intelfx-81", "Could not load crc32c driver");
  6870. + return PTR_ERR(new_tfm);
  6871. + }
  6872. +
  6873. + *tfm = new_tfm;
  6874. + return 0;
  6875. +}
  6876. +
  6877. +void reiser4_done_csum_tfm(struct crypto_shash *tfm)
  6878. +{
  6879. + crypto_free_shash(tfm);
  6880. +}
  6881. +
  6882. +/*
  6883. + Local variables:
  6884. + c-indentation-style: "K&R"
  6885. + mode-name: "LC"
  6886. + c-basic-offset: 8
  6887. + tab-width: 8
  6888. + fill-column: 120
  6889. + scroll-step: 1
  6890. + End:
  6891. +*/
  6892. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/checksum.h linux-4.14.2/fs/reiser4/checksum.h
  6893. --- linux-4.14.2.orig/fs/reiser4/checksum.h 1970-01-01 01:00:00.000000000 +0100
  6894. +++ linux-4.14.2/fs/reiser4/checksum.h 2017-11-26 22:13:09.000000000 +0100
  6895. @@ -0,0 +1,39 @@
  6896. +#ifndef __CHECKSUM__
  6897. +#define __CHECKSUM__
  6898. +
  6899. +#include <crypto/hash.h>
  6900. +
  6901. +int reiser4_init_csum_tfm(struct crypto_shash **tfm);
  6902. +void reiser4_done_csum_tfm(struct crypto_shash *tfm);
  6903. +u32 static inline reiser4_crc32c(struct crypto_shash *tfm,
  6904. + u32 crc, const void *address,
  6905. + unsigned int length)
  6906. +{
  6907. + struct {
  6908. + struct shash_desc shash;
  6909. + char ctx[4];
  6910. + } desc;
  6911. + int err;
  6912. +
  6913. + desc.shash.tfm = tfm;
  6914. + desc.shash.flags = 0;
  6915. + *(u32 *)desc.ctx = crc;
  6916. +
  6917. + err = crypto_shash_update(&desc.shash, address, length);
  6918. + BUG_ON(err);
  6919. + return *(u32 *)desc.ctx;
  6920. +}
  6921. +
  6922. +#endif /* __CHECKSUM__ */
  6923. +
  6924. +/*
  6925. + Local variables:
  6926. + c-indentation-style: "K&R"
  6927. + mode-name: "LC"
  6928. + c-basic-offset: 8
  6929. + tab-width: 8
  6930. + fill-column: 120
  6931. + scroll-step: 1
  6932. + End:
  6933. +*/
  6934. +
  6935. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/context.c linux-4.14.2/fs/reiser4/context.c
  6936. --- linux-4.14.2.orig/fs/reiser4/context.c 1970-01-01 01:00:00.000000000 +0100
  6937. +++ linux-4.14.2/fs/reiser4/context.c 2017-11-26 22:13:09.000000000 +0100
  6938. @@ -0,0 +1,288 @@
  6939. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  6940. +
  6941. +/* Manipulation of reiser4_context */
  6942. +
  6943. +/*
  6944. + * global context used during system call. Variable of this type is allocated
  6945. + * on the stack at the beginning of the reiser4 part of the system call and
  6946. + * pointer to it is stored in the current->fs_context. This allows us to avoid
  6947. + * passing pointer to current transaction and current lockstack (both in
  6948. + * one-to-one mapping with threads) all over the call chain.
  6949. + *
  6950. + * It's kind of like those global variables the prof used to tell you not to
  6951. + * use in CS1, except thread specific.;-) Nikita, this was a good idea.
  6952. + *
  6953. + * In some situations it is desirable to have ability to enter reiser4_context
  6954. + * more than once for the same thread (nested contexts). For example, there
  6955. + * are some functions that can be called either directly from VFS/VM or from
  6956. + * already active reiser4 context (->writepage, for example).
  6957. + *
  6958. + * In such situations "child" context acts like dummy: all activity is
  6959. + * actually performed in the top level context, and get_current_context()
  6960. + * always returns top level context.
  6961. + * Of course, reiser4_init_context()/reiser4_done_context() have to be properly
  6962. + * nested any way.
  6963. + *
  6964. + * Note that there is an important difference between reiser4 uses
  6965. + * ->fs_context and the way other file systems use it. Other file systems
  6966. + * (ext3 and reiserfs) use ->fs_context only for the duration of _transaction_
  6967. + * (this is why ->fs_context was initially called ->journal_info). This means,
  6968. + * that when ext3 or reiserfs finds that ->fs_context is not NULL on the entry
  6969. + * to the file system, they assume that some transaction is already underway,
  6970. + * and usually bail out, because starting nested transaction would most likely
  6971. + * lead to the deadlock. This gives false positives with reiser4, because we
  6972. + * set ->fs_context before starting transaction.
  6973. + */
  6974. +
  6975. +#include "debug.h"
  6976. +#include "super.h"
  6977. +#include "context.h"
  6978. +#include "vfs_ops.h" /* for reiser4_throttle_write() */
  6979. +
  6980. +#include <linux/writeback.h> /* for current_is_pdflush() */
  6981. +#include <linux/hardirq.h>
  6982. +
  6983. +static void _reiser4_init_context(reiser4_context * context,
  6984. + struct super_block *super)
  6985. +{
  6986. + memset(context, 0, sizeof(*context));
  6987. +
  6988. + context->super = super;
  6989. + context->magic = context_magic;
  6990. + context->outer = current->journal_info;
  6991. + current->journal_info = (void *)context;
  6992. + context->nr_children = 0;
  6993. + context->gfp_mask = GFP_KERNEL;
  6994. +
  6995. + init_lock_stack(&context->stack);
  6996. +
  6997. + reiser4_txn_begin(context);
  6998. +
  6999. + /* initialize head of tap list */
  7000. + INIT_LIST_HEAD(&context->taps);
  7001. +#if REISER4_DEBUG
  7002. + context->task = current;
  7003. +#endif
  7004. + grab_space_enable();
  7005. +}
  7006. +
  7007. +/* initialize context and bind it to the current thread
  7008. +
  7009. + This function should be called at the beginning of reiser4 part of
  7010. + syscall.
  7011. +*/
  7012. +reiser4_context * reiser4_init_context(struct super_block *super)
  7013. +{
  7014. + reiser4_context *context;
  7015. +
  7016. + assert("nikita-2662", !in_interrupt() && !in_irq());
  7017. + assert("nikita-3357", super != NULL);
  7018. + assert("nikita-3358", super->s_op == NULL || is_reiser4_super(super));
  7019. +
  7020. + context = get_current_context_check();
  7021. + if (context && context->super == super) {
  7022. + context = (reiser4_context *) current->journal_info;
  7023. + context->nr_children++;
  7024. + return context;
  7025. + }
  7026. +
  7027. + context = kmalloc(sizeof(*context), GFP_KERNEL);
  7028. + if (context == NULL)
  7029. + return ERR_PTR(RETERR(-ENOMEM));
  7030. +
  7031. + _reiser4_init_context(context, super);
  7032. + return context;
  7033. +}
  7034. +
  7035. +/* this is used in scan_mgr which is called with spinlock held and in
  7036. + reiser4_fill_super magic */
  7037. +void init_stack_context(reiser4_context *context, struct super_block *super)
  7038. +{
  7039. + assert("nikita-2662", !in_interrupt() && !in_irq());
  7040. + assert("nikita-3357", super != NULL);
  7041. + assert("nikita-3358", super->s_op == NULL || is_reiser4_super(super));
  7042. + assert("vs-12", !is_in_reiser4_context());
  7043. +
  7044. + _reiser4_init_context(context, super);
  7045. + context->on_stack = 1;
  7046. + return;
  7047. +}
  7048. +
  7049. +/* cast lock stack embedded into reiser4 context up to its container */
  7050. +reiser4_context *get_context_by_lock_stack(lock_stack * owner)
  7051. +{
  7052. + return container_of(owner, reiser4_context, stack);
  7053. +}
  7054. +
  7055. +/* true if there is already _any_ reiser4 context for the current thread */
  7056. +int is_in_reiser4_context(void)
  7057. +{
  7058. + reiser4_context *ctx;
  7059. +
  7060. + ctx = current->journal_info;
  7061. + return ctx != NULL && ((unsigned long)ctx->magic) == context_magic;
  7062. +}
  7063. +
  7064. +/*
  7065. + * call balance dirty pages for the current context.
  7066. + *
  7067. + * File system is expected to call balance_dirty_pages_ratelimited() whenever
  7068. + * it dirties a page. reiser4 does this for unformatted nodes (that is, during
  7069. + * write---this covers vast majority of all dirty traffic), but we cannot do
  7070. + * this immediately when formatted node is dirtied, because long term lock is
  7071. + * usually held at that time. To work around this, dirtying of formatted node
  7072. + * simply increases ->nr_marked_dirty counter in the current reiser4
  7073. + * context. When we are about to leave this context,
  7074. + * balance_dirty_pages_ratelimited() is called, if necessary.
  7075. + *
  7076. + * This introduces another problem: sometimes we do not want to run
  7077. + * balance_dirty_pages_ratelimited() when leaving a context, for example
  7078. + * because some important lock (like ->i_mutex on the parent directory) is
  7079. + * held. To achieve this, ->nobalance flag can be set in the current context.
  7080. + */
  7081. +static void reiser4_throttle_write_at(reiser4_context *context)
  7082. +{
  7083. + reiser4_super_info_data *sbinfo = get_super_private(context->super);
  7084. +
  7085. + /*
  7086. + * call balance_dirty_pages_ratelimited() to process formatted nodes
  7087. + * dirtied during this system call. Do that only if we are not in mount
  7088. + * and there were nodes dirtied in this context and we are not in
  7089. + * writepage (to avoid deadlock) and not in pdflush
  7090. + */
  7091. + if (sbinfo != NULL && sbinfo->fake != NULL &&
  7092. + context->nr_marked_dirty != 0 &&
  7093. + !(current->flags & PF_MEMALLOC) &&
  7094. + !current_is_flush_bd_task())
  7095. + reiser4_throttle_write(sbinfo->fake);
  7096. +}
  7097. +
  7098. +/* release resources associated with context.
  7099. +
  7100. + This function should be called at the end of "session" with reiser4,
  7101. + typically just before leaving reiser4 driver back to VFS.
  7102. +
  7103. + This is good place to put some degugging consistency checks, like that
  7104. + thread released all locks and closed transcrash etc.
  7105. +
  7106. +*/
  7107. +static void reiser4_done_context(reiser4_context * context)
  7108. + /* context being released */
  7109. +{
  7110. + assert("nikita-860", context != NULL);
  7111. + assert("nikita-859", context->magic == context_magic);
  7112. + assert("vs-646", (reiser4_context *) current->journal_info == context);
  7113. + assert("zam-686", !in_interrupt() && !in_irq());
  7114. +
  7115. + /* only do anything when leaving top-level reiser4 context. All nested
  7116. + * contexts are just dummies. */
  7117. + if (context->nr_children == 0) {
  7118. + assert("jmacd-673", context->trans == NULL);
  7119. + assert("jmacd-1002", lock_stack_isclean(&context->stack));
  7120. + assert("nikita-1936", reiser4_no_counters_are_held());
  7121. + assert("nikita-2626", list_empty_careful(reiser4_taps_list()));
  7122. + assert("zam-1004", ergo(get_super_private(context->super),
  7123. + get_super_private(context->super)->delete_mutex_owner !=
  7124. + current));
  7125. +
  7126. + /* release all grabbed but as yet unused blocks */
  7127. + if (context->grabbed_blocks != 0)
  7128. + all_grabbed2free();
  7129. +
  7130. + /*
  7131. + * synchronize against longterm_unlock_znode():
  7132. + * wake_up_requestor() wakes up requestors without holding
  7133. + * zlock (otherwise they will immediately bump into that lock
  7134. + * after wake up on another CPU). To work around (rare)
  7135. + * situation where requestor has been woken up asynchronously
  7136. + * and managed to run until completion (and destroy its
  7137. + * context and lock stack) before wake_up_requestor() called
  7138. + * wake_up() on it, wake_up_requestor() synchronize on lock
  7139. + * stack spin lock. It has actually been observed that spin
  7140. + * lock _was_ locked at this point, because
  7141. + * wake_up_requestor() took interrupt.
  7142. + */
  7143. + spin_lock_stack(&context->stack);
  7144. + spin_unlock_stack(&context->stack);
  7145. +
  7146. + assert("zam-684", context->nr_children == 0);
  7147. + /* restore original ->fs_context value */
  7148. + current->journal_info = context->outer;
  7149. + if (context->on_stack == 0)
  7150. + kfree(context);
  7151. + } else {
  7152. + context->nr_children--;
  7153. +#if REISER4_DEBUG
  7154. + assert("zam-685", context->nr_children >= 0);
  7155. +#endif
  7156. + }
  7157. +}
  7158. +
  7159. +/*
  7160. + * exit reiser4 context. Call balance_dirty_pages_at() if necessary. Close
  7161. + * transaction. Call done_context() to do context related book-keeping.
  7162. + */
  7163. +void reiser4_exit_context(reiser4_context * context)
  7164. +{
  7165. + assert("nikita-3021", reiser4_schedulable());
  7166. +
  7167. + if (context->nr_children == 0) {
  7168. + if (!context->nobalance)
  7169. + reiser4_throttle_write_at(context);
  7170. +
  7171. + /* if filesystem is mounted with -o sync or -o dirsync - commit
  7172. + transaction. FIXME: TXNH_DONT_COMMIT is used to avoid
  7173. + commiting on exit_context when inode semaphore is held and
  7174. + to have ktxnmgrd to do commit instead to get better
  7175. + concurrent filesystem accesses. But, when one mounts with -o
  7176. + sync, he cares more about reliability than about
  7177. + performance. So, for now we have this simple mount -o sync
  7178. + support. */
  7179. + if (context->super->s_flags & (MS_SYNCHRONOUS | MS_DIRSYNC)) {
  7180. + txn_atom *atom;
  7181. +
  7182. + atom = get_current_atom_locked_nocheck();
  7183. + if (atom) {
  7184. + atom->flags |= ATOM_FORCE_COMMIT;
  7185. + context->trans->flags &= ~TXNH_DONT_COMMIT;
  7186. + spin_unlock_atom(atom);
  7187. + }
  7188. + }
  7189. + reiser4_txn_end(context);
  7190. + }
  7191. + reiser4_done_context(context);
  7192. +}
  7193. +
  7194. +void reiser4_ctx_gfp_mask_set(void)
  7195. +{
  7196. + reiser4_context *ctx;
  7197. +
  7198. + ctx = get_current_context();
  7199. + if (ctx->entd == 0 &&
  7200. + list_empty(&ctx->stack.locks) &&
  7201. + ctx->trans->atom == NULL)
  7202. + ctx->gfp_mask = GFP_KERNEL;
  7203. + else
  7204. + ctx->gfp_mask = GFP_NOFS;
  7205. +}
  7206. +
  7207. +void reiser4_ctx_gfp_mask_force(gfp_t mask)
  7208. +{
  7209. + reiser4_context *ctx;
  7210. + ctx = get_current_context();
  7211. +
  7212. + assert("edward-1454", ctx != NULL);
  7213. +
  7214. + ctx->gfp_mask = mask;
  7215. +}
  7216. +
  7217. +/*
  7218. + * Local variables:
  7219. + * c-indentation-style: "K&R"
  7220. + * mode-name: "LC"
  7221. + * c-basic-offset: 8
  7222. + * tab-width: 8
  7223. + * fill-column: 120
  7224. + * scroll-step: 1
  7225. + * End:
  7226. + */
  7227. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/context.h linux-4.14.2/fs/reiser4/context.h
  7228. --- linux-4.14.2.orig/fs/reiser4/context.h 1970-01-01 01:00:00.000000000 +0100
  7229. +++ linux-4.14.2/fs/reiser4/context.h 2017-11-26 22:13:09.000000000 +0100
  7230. @@ -0,0 +1,233 @@
  7231. +/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
  7232. + * reiser4/README */
  7233. +
  7234. +/* Reiser4 context. See context.c for details. */
  7235. +
  7236. +#if !defined( __REISER4_CONTEXT_H__ )
  7237. +#define __REISER4_CONTEXT_H__
  7238. +
  7239. +#include "forward.h"
  7240. +#include "debug.h"
  7241. +#include "dformat.h"
  7242. +#include "tap.h"
  7243. +#include "lock.h"
  7244. +
  7245. +#include <linux/types.h> /* for __u?? */
  7246. +#include <linux/fs.h> /* for struct super_block */
  7247. +#include <linux/spinlock.h>
  7248. +#include <linux/sched.h> /* for struct task_struct */
  7249. +
  7250. +/* reiser4 per-thread context */
  7251. +struct reiser4_context {
  7252. + /* magic constant. For identification of reiser4 contexts. */
  7253. + __u32 magic;
  7254. +
  7255. + /* current lock stack. See lock.[ch]. This is where list of all
  7256. + locks taken by current thread is kept. This is also used in
  7257. + deadlock detection. */
  7258. + lock_stack stack;
  7259. +
  7260. + /* current transcrash. */
  7261. + txn_handle *trans;
  7262. + /* transaction handle embedded into reiser4_context. ->trans points
  7263. + * here by default. */
  7264. + txn_handle trans_in_ctx;
  7265. +
  7266. + /* super block we are working with. To get the current tree
  7267. + use &get_super_private (reiser4_get_current_sb ())->tree. */
  7268. + struct super_block *super;
  7269. +
  7270. + /* parent fs activation */
  7271. + struct fs_activation *outer;
  7272. +
  7273. + /* per-thread grabbed (for further allocation) blocks counter */
  7274. + reiser4_block_nr grabbed_blocks;
  7275. +
  7276. + /* list of taps currently monitored. See tap.c */
  7277. + struct list_head taps;
  7278. +
  7279. + /* grabbing space is enabled */
  7280. + unsigned int grab_enabled:1;
  7281. + /* should be set when we are write dirty nodes to disk in jnode_flush or
  7282. + * reiser4_write_logs() */
  7283. + unsigned int writeout_mode:1;
  7284. + /* true, if current thread is an ent thread */
  7285. + unsigned int entd:1;
  7286. + /* true, if balance_dirty_pages() should not be run when leaving this
  7287. + * context. This is used to avoid lengthly balance_dirty_pages()
  7288. + * operation when holding some important resource, like directory
  7289. + * ->i_mutex */
  7290. + unsigned int nobalance:1;
  7291. +
  7292. + /* this bit is used on reiser4_done_context to decide whether context is
  7293. + kmalloc-ed and has to be kfree-ed */
  7294. + unsigned int on_stack:1;
  7295. +
  7296. + /* count non-trivial jnode_set_dirty() calls */
  7297. + unsigned long nr_marked_dirty;
  7298. + /*
  7299. + * reiser4_writeback_inodes calls (via generic_writeback_sb_inodes)
  7300. + * reiser4_writepages_dispatch for each of dirty inodes.
  7301. + * Reiser4_writepages_dispatch captures pages. When number of pages
  7302. + * captured in one reiser4_writeback_inodes reaches some threshold -
  7303. + * some atoms get flushed
  7304. + */
  7305. + int nr_captured;
  7306. + int nr_children; /* number of child contexts */
  7307. + struct page *locked_page; /* page that should be unlocked in
  7308. + * reiser4_dirty_inode() before taking
  7309. + * a longterm lock (to not violate
  7310. + * reiser4 lock ordering) */
  7311. +#if REISER4_DEBUG
  7312. + /* debugging information about reiser4 locks held by the current
  7313. + * thread */
  7314. + reiser4_lock_cnt_info locks;
  7315. + struct task_struct *task; /* so we can easily find owner of the stack */
  7316. +
  7317. + /*
  7318. + * disk space grabbing debugging support
  7319. + */
  7320. + /* how many disk blocks were grabbed by the first call to
  7321. + * reiser4_grab_space() in this context */
  7322. + reiser4_block_nr grabbed_initially;
  7323. +
  7324. + /* list of all threads doing flush currently */
  7325. + struct list_head flushers_link;
  7326. + /* information about last error encountered by reiser4 */
  7327. + err_site err;
  7328. +#endif
  7329. + void *vp;
  7330. + gfp_t gfp_mask;
  7331. +};
  7332. +
  7333. +extern reiser4_context *get_context_by_lock_stack(lock_stack *);
  7334. +
  7335. +/* Debugging helps. */
  7336. +#if REISER4_DEBUG
  7337. +extern void print_contexts(void);
  7338. +#endif
  7339. +
  7340. +#define current_tree (&(get_super_private(reiser4_get_current_sb())->tree))
  7341. +#define current_blocksize reiser4_get_current_sb()->s_blocksize
  7342. +#define current_blocksize_bits reiser4_get_current_sb()->s_blocksize_bits
  7343. +
  7344. +extern reiser4_context *reiser4_init_context(struct super_block *);
  7345. +extern void init_stack_context(reiser4_context *, struct super_block *);
  7346. +extern void reiser4_exit_context(reiser4_context *);
  7347. +
  7348. +/* magic constant we store in reiser4_context allocated at the stack. Used to
  7349. + catch accesses to staled or uninitialized contexts. */
  7350. +#define context_magic ((__u32) 0x4b1b5d0b)
  7351. +
  7352. +extern int is_in_reiser4_context(void);
  7353. +
  7354. +/*
  7355. + * return reiser4_context for the thread @tsk
  7356. + */
  7357. +static inline reiser4_context *get_context(const struct task_struct *tsk)
  7358. +{
  7359. + assert("vs-1682",
  7360. + ((reiser4_context *) tsk->journal_info)->magic == context_magic);
  7361. + return (reiser4_context *) tsk->journal_info;
  7362. +}
  7363. +
  7364. +/*
  7365. + * return reiser4 context of the current thread, or NULL if there is none.
  7366. + */
  7367. +static inline reiser4_context *get_current_context_check(void)
  7368. +{
  7369. + if (is_in_reiser4_context())
  7370. + return get_context(current);
  7371. + else
  7372. + return NULL;
  7373. +}
  7374. +
  7375. +static inline reiser4_context *get_current_context(void); /* __attribute__((const)); */
  7376. +
  7377. +/* return context associated with current thread */
  7378. +static inline reiser4_context *get_current_context(void)
  7379. +{
  7380. + return get_context(current);
  7381. +}
  7382. +
  7383. +static inline gfp_t reiser4_ctx_gfp_mask_get(void)
  7384. +{
  7385. + reiser4_context *ctx;
  7386. +
  7387. + ctx = get_current_context_check();
  7388. + return (ctx == NULL) ? GFP_KERNEL : ctx->gfp_mask;
  7389. +}
  7390. +
  7391. +void reiser4_ctx_gfp_mask_set(void);
  7392. +void reiser4_ctx_gfp_mask_force (gfp_t mask);
  7393. +
  7394. +/*
  7395. + * true if current thread is in the write-out mode. Thread enters write-out
  7396. + * mode during jnode_flush and reiser4_write_logs().
  7397. + */
  7398. +static inline int is_writeout_mode(void)
  7399. +{
  7400. + return get_current_context()->writeout_mode;
  7401. +}
  7402. +
  7403. +/*
  7404. + * enter write-out mode
  7405. + */
  7406. +static inline void writeout_mode_enable(void)
  7407. +{
  7408. + assert("zam-941", !get_current_context()->writeout_mode);
  7409. + get_current_context()->writeout_mode = 1;
  7410. +}
  7411. +
  7412. +/*
  7413. + * leave write-out mode
  7414. + */
  7415. +static inline void writeout_mode_disable(void)
  7416. +{
  7417. + assert("zam-942", get_current_context()->writeout_mode);
  7418. + get_current_context()->writeout_mode = 0;
  7419. +}
  7420. +
  7421. +static inline void grab_space_enable(void)
  7422. +{
  7423. + get_current_context()->grab_enabled = 1;
  7424. +}
  7425. +
  7426. +static inline void grab_space_disable(void)
  7427. +{
  7428. + get_current_context()->grab_enabled = 0;
  7429. +}
  7430. +
  7431. +static inline void grab_space_set_enabled(int enabled)
  7432. +{
  7433. + get_current_context()->grab_enabled = enabled;
  7434. +}
  7435. +
  7436. +static inline int is_grab_enabled(reiser4_context * ctx)
  7437. +{
  7438. + return ctx->grab_enabled;
  7439. +}
  7440. +
  7441. +/* mark transaction handle in @ctx as TXNH_DONT_COMMIT, so that no commit or
  7442. + * flush would be performed when it is closed. This is necessary when handle
  7443. + * has to be closed under some coarse semaphore, like i_mutex of
  7444. + * directory. Commit will be performed by ktxnmgrd. */
  7445. +static inline void context_set_commit_async(reiser4_context * context)
  7446. +{
  7447. + context->nobalance = 1;
  7448. + context->trans->flags |= TXNH_DONT_COMMIT;
  7449. +}
  7450. +
  7451. +/* __REISER4_CONTEXT_H__ */
  7452. +#endif
  7453. +
  7454. +/* Make Linus happy.
  7455. + Local variables:
  7456. + c-indentation-style: "K&R"
  7457. + mode-name: "LC"
  7458. + c-basic-offset: 8
  7459. + tab-width: 8
  7460. + fill-column: 120
  7461. + scroll-step: 1
  7462. + End:
  7463. +*/
  7464. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/coord.c linux-4.14.2/fs/reiser4/coord.c
  7465. --- linux-4.14.2.orig/fs/reiser4/coord.c 1970-01-01 01:00:00.000000000 +0100
  7466. +++ linux-4.14.2/fs/reiser4/coord.c 2017-11-26 22:13:09.000000000 +0100
  7467. @@ -0,0 +1,928 @@
  7468. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  7469. + reiser4/README */
  7470. +
  7471. +#include "forward.h"
  7472. +#include "debug.h"
  7473. +#include "dformat.h"
  7474. +#include "tree.h"
  7475. +#include "plugin/item/item.h"
  7476. +#include "znode.h"
  7477. +#include "coord.h"
  7478. +
  7479. +/* Internal constructor. */
  7480. +static inline void
  7481. +coord_init_values(coord_t *coord, const znode * node, pos_in_node_t item_pos,
  7482. + pos_in_node_t unit_pos, between_enum between)
  7483. +{
  7484. + coord->node = (znode *) node;
  7485. + coord_set_item_pos(coord, item_pos);
  7486. + coord->unit_pos = unit_pos;
  7487. + coord->between = between;
  7488. + ON_DEBUG(coord->plug_v = 0);
  7489. + ON_DEBUG(coord->body_v = 0);
  7490. +
  7491. + /*ON_TRACE (TRACE_COORDS, "init coord %p node %p: %u %u %s\n", coord,
  7492. + node, item_pos, unit_pos, coord_tween_tostring (between)); */
  7493. +}
  7494. +
  7495. +/* after shifting of node content, coord previously set properly may become
  7496. + invalid, try to "normalize" it. */
  7497. +void coord_normalize(coord_t *coord)
  7498. +{
  7499. + znode *node;
  7500. +
  7501. + node = coord->node;
  7502. + assert("vs-683", node);
  7503. +
  7504. + coord_clear_iplug(coord);
  7505. +
  7506. + if (node_is_empty(node)) {
  7507. + coord_init_first_unit(coord, node);
  7508. + } else if ((coord->between == AFTER_ITEM)
  7509. + || (coord->between == AFTER_UNIT)) {
  7510. + return;
  7511. + } else if (coord->item_pos == coord_num_items(coord)
  7512. + && coord->between == BEFORE_ITEM) {
  7513. + coord_dec_item_pos(coord);
  7514. + coord->between = AFTER_ITEM;
  7515. + } else if (coord->unit_pos == coord_num_units(coord)
  7516. + && coord->between == BEFORE_UNIT) {
  7517. + coord->unit_pos--;
  7518. + coord->between = AFTER_UNIT;
  7519. + } else if (coord->item_pos == coord_num_items(coord)
  7520. + && coord->unit_pos == 0 && coord->between == BEFORE_UNIT) {
  7521. + coord_dec_item_pos(coord);
  7522. + coord->unit_pos = 0;
  7523. + coord->between = AFTER_ITEM;
  7524. + }
  7525. +}
  7526. +
  7527. +/* Copy a coordinate. */
  7528. +void coord_dup(coord_t *coord, const coord_t *old_coord)
  7529. +{
  7530. + assert("jmacd-9800", coord_check(old_coord));
  7531. + coord_dup_nocheck(coord, old_coord);
  7532. +}
  7533. +
  7534. +/* Copy a coordinate without check. Useful when old_coord->node is not
  7535. + loaded. As in cbk_tree_lookup -> connect_znode -> connect_one_side */
  7536. +void coord_dup_nocheck(coord_t *coord, const coord_t *old_coord)
  7537. +{
  7538. + coord->node = old_coord->node;
  7539. + coord_set_item_pos(coord, old_coord->item_pos);
  7540. + coord->unit_pos = old_coord->unit_pos;
  7541. + coord->between = old_coord->between;
  7542. + coord->iplugid = old_coord->iplugid;
  7543. + ON_DEBUG(coord->plug_v = old_coord->plug_v);
  7544. + ON_DEBUG(coord->body_v = old_coord->body_v);
  7545. +}
  7546. +
  7547. +/* Initialize an invalid coordinate. */
  7548. +void coord_init_invalid(coord_t *coord, const znode * node)
  7549. +{
  7550. + coord_init_values(coord, node, 0, 0, INVALID_COORD);
  7551. +}
  7552. +
  7553. +void coord_init_first_unit_nocheck(coord_t *coord, const znode * node)
  7554. +{
  7555. + coord_init_values(coord, node, 0, 0, AT_UNIT);
  7556. +}
  7557. +
  7558. +/* Initialize a coordinate to point at the first unit of the first item. If the
  7559. + node is empty, it is positioned at the EMPTY_NODE. */
  7560. +void coord_init_first_unit(coord_t *coord, const znode * node)
  7561. +{
  7562. + int is_empty = node_is_empty(node);
  7563. +
  7564. + coord_init_values(coord, node, 0, 0, (is_empty ? EMPTY_NODE : AT_UNIT));
  7565. +
  7566. + assert("jmacd-9801", coord_check(coord));
  7567. +}
  7568. +
  7569. +/* Initialize a coordinate to point at the last unit of the last item. If the
  7570. + node is empty, it is positioned at the EMPTY_NODE. */
  7571. +void coord_init_last_unit(coord_t *coord, const znode * node)
  7572. +{
  7573. + int is_empty = node_is_empty(node);
  7574. +
  7575. + coord_init_values(coord, node,
  7576. + (is_empty ? 0 : node_num_items(node) - 1), 0,
  7577. + (is_empty ? EMPTY_NODE : AT_UNIT));
  7578. + if (!is_empty)
  7579. + coord->unit_pos = coord_last_unit_pos(coord);
  7580. + assert("jmacd-9802", coord_check(coord));
  7581. +}
  7582. +
  7583. +/* Initialize a coordinate to before the first item. If the node is empty, it is
  7584. + positioned at the EMPTY_NODE. */
  7585. +void coord_init_before_first_item(coord_t *coord, const znode * node)
  7586. +{
  7587. + int is_empty = node_is_empty(node);
  7588. +
  7589. + coord_init_values(coord, node, 0, 0,
  7590. + (is_empty ? EMPTY_NODE : BEFORE_UNIT));
  7591. +
  7592. + assert("jmacd-9803", coord_check(coord));
  7593. +}
  7594. +
  7595. +/* Initialize a coordinate to after the last item. If the node is empty, it is
  7596. + positioned at the EMPTY_NODE. */
  7597. +void coord_init_after_last_item(coord_t *coord, const znode * node)
  7598. +{
  7599. + int is_empty = node_is_empty(node);
  7600. +
  7601. + coord_init_values(coord, node,
  7602. + (is_empty ? 0 : node_num_items(node) - 1), 0,
  7603. + (is_empty ? EMPTY_NODE : AFTER_ITEM));
  7604. +
  7605. + assert("jmacd-9804", coord_check(coord));
  7606. +}
  7607. +
  7608. +/* Initialize a coordinate to after last unit in the item. Coord must be set
  7609. + already to existing item */
  7610. +void coord_init_after_item_end(coord_t *coord)
  7611. +{
  7612. + coord->between = AFTER_UNIT;
  7613. + coord->unit_pos = coord_last_unit_pos(coord);
  7614. +}
  7615. +
  7616. +/* Initialize a coordinate to before the item. Coord must be set already to
  7617. + existing item */
  7618. +void coord_init_before_item(coord_t *coord)
  7619. +{
  7620. + coord->unit_pos = 0;
  7621. + coord->between = BEFORE_ITEM;
  7622. +}
  7623. +
  7624. +/* Initialize a coordinate to after the item. Coord must be set already to
  7625. + existing item */
  7626. +void coord_init_after_item(coord_t *coord)
  7627. +{
  7628. + coord->unit_pos = 0;
  7629. + coord->between = AFTER_ITEM;
  7630. +}
  7631. +
  7632. +/* Initialize a coordinate by 0s. Used in places where init_coord was used and
  7633. + it was not clear how actually */
  7634. +void coord_init_zero(coord_t *coord)
  7635. +{
  7636. + memset(coord, 0, sizeof(*coord));
  7637. +}
  7638. +
  7639. +/* Return the number of units at the present item.
  7640. + Asserts coord_is_existing_item(). */
  7641. +unsigned coord_num_units(const coord_t *coord)
  7642. +{
  7643. + assert("jmacd-9806", coord_is_existing_item(coord));
  7644. +
  7645. + return item_plugin_by_coord(coord)->b.nr_units(coord);
  7646. +}
  7647. +
  7648. +/* Returns true if the coord was initializewd by coord_init_invalid (). */
  7649. +/* Audited by: green(2002.06.15) */
  7650. +int coord_is_invalid(const coord_t *coord)
  7651. +{
  7652. + return coord->between == INVALID_COORD;
  7653. +}
  7654. +
  7655. +/* Returns true if the coordinate is positioned at an existing item, not before
  7656. + or after an item. It may be placed at, before, or after any unit within the
  7657. + item, whether existing or not. */
  7658. +int coord_is_existing_item(const coord_t *coord)
  7659. +{
  7660. + switch (coord->between) {
  7661. + case EMPTY_NODE:
  7662. + case BEFORE_ITEM:
  7663. + case AFTER_ITEM:
  7664. + case INVALID_COORD:
  7665. + return 0;
  7666. +
  7667. + case BEFORE_UNIT:
  7668. + case AT_UNIT:
  7669. + case AFTER_UNIT:
  7670. + return coord->item_pos < coord_num_items(coord);
  7671. + }
  7672. +
  7673. + impossible("jmacd-9900", "unreachable coord: %p", coord);
  7674. + return 0;
  7675. +}
  7676. +
  7677. +/* Returns true if the coordinate is positioned at an existing unit, not before
  7678. + or after a unit. */
  7679. +/* Audited by: green(2002.06.15) */
  7680. +int coord_is_existing_unit(const coord_t *coord)
  7681. +{
  7682. + switch (coord->between) {
  7683. + case EMPTY_NODE:
  7684. + case BEFORE_UNIT:
  7685. + case AFTER_UNIT:
  7686. + case BEFORE_ITEM:
  7687. + case AFTER_ITEM:
  7688. + case INVALID_COORD:
  7689. + return 0;
  7690. +
  7691. + case AT_UNIT:
  7692. + return (coord->item_pos < coord_num_items(coord)
  7693. + && coord->unit_pos < coord_num_units(coord));
  7694. + }
  7695. +
  7696. + impossible("jmacd-9902", "unreachable");
  7697. + return 0;
  7698. +}
  7699. +
  7700. +/* Returns true if the coordinate is positioned at the first unit of the first
  7701. + item. Not true for empty nodes nor coordinates positioned before the first
  7702. + item. */
  7703. +/* Audited by: green(2002.06.15) */
  7704. +int coord_is_leftmost_unit(const coord_t *coord)
  7705. +{
  7706. + return (coord->between == AT_UNIT && coord->item_pos == 0
  7707. + && coord->unit_pos == 0);
  7708. +}
  7709. +
  7710. +#if REISER4_DEBUG
  7711. +/* For assertions only, checks for a valid coordinate. */
  7712. +int coord_check(const coord_t *coord)
  7713. +{
  7714. + if (coord->node == NULL)
  7715. + return 0;
  7716. + if (znode_above_root(coord->node))
  7717. + return 1;
  7718. +
  7719. + switch (coord->between) {
  7720. + default:
  7721. + case INVALID_COORD:
  7722. + return 0;
  7723. + case EMPTY_NODE:
  7724. + if (!node_is_empty(coord->node))
  7725. + return 0;
  7726. + return coord->item_pos == 0 && coord->unit_pos == 0;
  7727. +
  7728. + case BEFORE_UNIT:
  7729. + case AFTER_UNIT:
  7730. + if (node_is_empty(coord->node) && (coord->item_pos == 0)
  7731. + && (coord->unit_pos == 0))
  7732. + return 1;
  7733. + case AT_UNIT:
  7734. + break;
  7735. + case AFTER_ITEM:
  7736. + case BEFORE_ITEM:
  7737. + /* before/after item should not set unit_pos. */
  7738. + if (coord->unit_pos != 0)
  7739. + return 0;
  7740. + break;
  7741. + }
  7742. +
  7743. + if (coord->item_pos >= node_num_items(coord->node))
  7744. + return 0;
  7745. +
  7746. + /* FIXME-VS: we are going to check unit_pos. This makes no sense when
  7747. + between is set either AFTER_ITEM or BEFORE_ITEM */
  7748. + if (coord->between == AFTER_ITEM || coord->between == BEFORE_ITEM)
  7749. + return 1;
  7750. +
  7751. + if (coord_is_iplug_set(coord) &&
  7752. + coord->unit_pos >
  7753. + item_plugin_by_coord(coord)->b.nr_units(coord) - 1)
  7754. + return 0;
  7755. + return 1;
  7756. +}
  7757. +#endif
  7758. +
  7759. +/* Adjust coordinate boundaries based on the number of items prior to
  7760. + coord_next/prev. Returns 1 if the new position is does not exist. */
  7761. +static int coord_adjust_items(coord_t *coord, unsigned items, int is_next)
  7762. +{
  7763. + /* If the node is invalid, leave it. */
  7764. + if (coord->between == INVALID_COORD)
  7765. + return 1;
  7766. +
  7767. + /* If the node is empty, set it appropriately. */
  7768. + if (items == 0) {
  7769. + coord->between = EMPTY_NODE;
  7770. + coord_set_item_pos(coord, 0);
  7771. + coord->unit_pos = 0;
  7772. + return 1;
  7773. + }
  7774. +
  7775. + /* If it was empty and it no longer is, set to BEFORE/AFTER_ITEM. */
  7776. + if (coord->between == EMPTY_NODE) {
  7777. + coord->between = (is_next ? BEFORE_ITEM : AFTER_ITEM);
  7778. + coord_set_item_pos(coord, 0);
  7779. + coord->unit_pos = 0;
  7780. + return 0;
  7781. + }
  7782. +
  7783. + /* If the item_pos is out-of-range, set it appropriatly. */
  7784. + if (coord->item_pos >= items) {
  7785. + coord->between = AFTER_ITEM;
  7786. + coord_set_item_pos(coord, items - 1);
  7787. + coord->unit_pos = 0;
  7788. + /* If is_next, return 1 (can't go any further). */
  7789. + return is_next;
  7790. + }
  7791. +
  7792. + return 0;
  7793. +}
  7794. +
  7795. +/* Advances the coordinate by one unit to the right. If empty, no change. If
  7796. + coord_is_rightmost_unit, advances to AFTER THE LAST ITEM. Returns 0 if new
  7797. + position is an existing unit. */
  7798. +int coord_next_unit(coord_t *coord)
  7799. +{
  7800. + unsigned items = coord_num_items(coord);
  7801. +
  7802. + if (coord_adjust_items(coord, items, 1) == 1)
  7803. + return 1;
  7804. +
  7805. + switch (coord->between) {
  7806. + case BEFORE_UNIT:
  7807. + /* Now it is positioned at the same unit. */
  7808. + coord->between = AT_UNIT;
  7809. + return 0;
  7810. +
  7811. + case AFTER_UNIT:
  7812. + case AT_UNIT:
  7813. + /* If it was at or after a unit and there are more units in this
  7814. + item, advance to the next one. */
  7815. + if (coord->unit_pos < coord_last_unit_pos(coord)) {
  7816. + coord->unit_pos += 1;
  7817. + coord->between = AT_UNIT;
  7818. + return 0;
  7819. + }
  7820. +
  7821. + /* Otherwise, it is crossing an item boundary and treated as if
  7822. + it was after the current item. */
  7823. + coord->between = AFTER_ITEM;
  7824. + coord->unit_pos = 0;
  7825. + /* FALLTHROUGH */
  7826. +
  7827. + case AFTER_ITEM:
  7828. + /* Check for end-of-node. */
  7829. + if (coord->item_pos == items - 1)
  7830. + return 1;
  7831. +
  7832. + coord_inc_item_pos(coord);
  7833. + coord->unit_pos = 0;
  7834. + coord->between = AT_UNIT;
  7835. + return 0;
  7836. +
  7837. + case BEFORE_ITEM:
  7838. + /* The adjust_items checks ensure that we are valid here. */
  7839. + coord->unit_pos = 0;
  7840. + coord->between = AT_UNIT;
  7841. + return 0;
  7842. +
  7843. + case INVALID_COORD:
  7844. + case EMPTY_NODE:
  7845. + /* Handled in coord_adjust_items(). */
  7846. + break;
  7847. + }
  7848. +
  7849. + impossible("jmacd-9902", "unreachable");
  7850. + return 0;
  7851. +}
  7852. +
  7853. +/* Advances the coordinate by one item to the right. If empty, no change. If
  7854. + coord_is_rightmost_unit, advances to AFTER THE LAST ITEM. Returns 0 if new
  7855. + position is an existing item. */
  7856. +int coord_next_item(coord_t *coord)
  7857. +{
  7858. + unsigned items = coord_num_items(coord);
  7859. +
  7860. + if (coord_adjust_items(coord, items, 1) == 1)
  7861. + return 1;
  7862. +
  7863. + switch (coord->between) {
  7864. + case AFTER_UNIT:
  7865. + case AT_UNIT:
  7866. + case BEFORE_UNIT:
  7867. + case AFTER_ITEM:
  7868. + /* Check for end-of-node. */
  7869. + if (coord->item_pos == items - 1) {
  7870. + coord->between = AFTER_ITEM;
  7871. + coord->unit_pos = 0;
  7872. + coord_clear_iplug(coord);
  7873. + return 1;
  7874. + }
  7875. +
  7876. + /* Anywhere in an item, go to the next one. */
  7877. + coord->between = AT_UNIT;
  7878. + coord_inc_item_pos(coord);
  7879. + coord->unit_pos = 0;
  7880. + return 0;
  7881. +
  7882. + case BEFORE_ITEM:
  7883. + /* The out-of-range check ensures that we are valid here. */
  7884. + coord->unit_pos = 0;
  7885. + coord->between = AT_UNIT;
  7886. + return 0;
  7887. + case INVALID_COORD:
  7888. + case EMPTY_NODE:
  7889. + /* Handled in coord_adjust_items(). */
  7890. + break;
  7891. + }
  7892. +
  7893. + impossible("jmacd-9903", "unreachable");
  7894. + return 0;
  7895. +}
  7896. +
  7897. +/* Advances the coordinate by one unit to the left. If empty, no change. If
  7898. + coord_is_leftmost_unit, advances to BEFORE THE FIRST ITEM. Returns 0 if new
  7899. + position is an existing unit. */
  7900. +int coord_prev_unit(coord_t *coord)
  7901. +{
  7902. + unsigned items = coord_num_items(coord);
  7903. +
  7904. + if (coord_adjust_items(coord, items, 0) == 1)
  7905. + return 1;
  7906. +
  7907. + switch (coord->between) {
  7908. + case AT_UNIT:
  7909. + case BEFORE_UNIT:
  7910. + if (coord->unit_pos > 0) {
  7911. + coord->unit_pos -= 1;
  7912. + coord->between = AT_UNIT;
  7913. + return 0;
  7914. + }
  7915. +
  7916. + if (coord->item_pos == 0) {
  7917. + coord->between = BEFORE_ITEM;
  7918. + return 1;
  7919. + }
  7920. +
  7921. + coord_dec_item_pos(coord);
  7922. + coord->unit_pos = coord_last_unit_pos(coord);
  7923. + coord->between = AT_UNIT;
  7924. + return 0;
  7925. +
  7926. + case AFTER_UNIT:
  7927. + /* What if unit_pos is out-of-range? */
  7928. + assert("jmacd-5442",
  7929. + coord->unit_pos <= coord_last_unit_pos(coord));
  7930. + coord->between = AT_UNIT;
  7931. + return 0;
  7932. +
  7933. + case BEFORE_ITEM:
  7934. + if (coord->item_pos == 0)
  7935. + return 1;
  7936. +
  7937. + coord_dec_item_pos(coord);
  7938. + /* FALLTHROUGH */
  7939. +
  7940. + case AFTER_ITEM:
  7941. + coord->between = AT_UNIT;
  7942. + coord->unit_pos = coord_last_unit_pos(coord);
  7943. + return 0;
  7944. +
  7945. + case INVALID_COORD:
  7946. + case EMPTY_NODE:
  7947. + break;
  7948. + }
  7949. +
  7950. + impossible("jmacd-9904", "unreachable");
  7951. + return 0;
  7952. +}
  7953. +
  7954. +/* Advances the coordinate by one item to the left. If empty, no change. If
  7955. + coord_is_leftmost_unit, advances to BEFORE THE FIRST ITEM. Returns 0 if new
  7956. + position is an existing item. */
  7957. +int coord_prev_item(coord_t *coord)
  7958. +{
  7959. + unsigned items = coord_num_items(coord);
  7960. +
  7961. + if (coord_adjust_items(coord, items, 0) == 1)
  7962. + return 1;
  7963. +
  7964. + switch (coord->between) {
  7965. + case AT_UNIT:
  7966. + case AFTER_UNIT:
  7967. + case BEFORE_UNIT:
  7968. + case BEFORE_ITEM:
  7969. +
  7970. + if (coord->item_pos == 0) {
  7971. + coord->between = BEFORE_ITEM;
  7972. + coord->unit_pos = 0;
  7973. + return 1;
  7974. + }
  7975. +
  7976. + coord_dec_item_pos(coord);
  7977. + coord->unit_pos = 0;
  7978. + coord->between = AT_UNIT;
  7979. + return 0;
  7980. +
  7981. + case AFTER_ITEM:
  7982. + coord->between = AT_UNIT;
  7983. + coord->unit_pos = 0;
  7984. + return 0;
  7985. +
  7986. + case INVALID_COORD:
  7987. + case EMPTY_NODE:
  7988. + break;
  7989. + }
  7990. +
  7991. + impossible("jmacd-9905", "unreachable");
  7992. + return 0;
  7993. +}
  7994. +
  7995. +/* Calls either coord_init_first_unit or coord_init_last_unit depending on
  7996. + sideof argument. */
  7997. +void coord_init_sideof_unit(coord_t *coord, const znode * node, sideof dir)
  7998. +{
  7999. + assert("jmacd-9821", dir == LEFT_SIDE || dir == RIGHT_SIDE);
  8000. + if (dir == LEFT_SIDE) {
  8001. + coord_init_first_unit(coord, node);
  8002. + } else {
  8003. + coord_init_last_unit(coord, node);
  8004. + }
  8005. +}
  8006. +
  8007. +/* Calls either coord_is_before_leftmost or coord_is_after_rightmost depending
  8008. + on sideof argument. */
  8009. +/* Audited by: green(2002.06.15) */
  8010. +int coord_is_after_sideof_unit(coord_t *coord, sideof dir)
  8011. +{
  8012. + assert("jmacd-9822", dir == LEFT_SIDE || dir == RIGHT_SIDE);
  8013. + if (dir == LEFT_SIDE) {
  8014. + return coord_is_before_leftmost(coord);
  8015. + } else {
  8016. + return coord_is_after_rightmost(coord);
  8017. + }
  8018. +}
  8019. +
  8020. +/* Calls either coord_next_unit or coord_prev_unit depending on sideof argument.
  8021. + */
  8022. +/* Audited by: green(2002.06.15) */
  8023. +int coord_sideof_unit(coord_t *coord, sideof dir)
  8024. +{
  8025. + assert("jmacd-9823", dir == LEFT_SIDE || dir == RIGHT_SIDE);
  8026. + if (dir == LEFT_SIDE) {
  8027. + return coord_prev_unit(coord);
  8028. + } else {
  8029. + return coord_next_unit(coord);
  8030. + }
  8031. +}
  8032. +
  8033. +#if REISER4_DEBUG
  8034. +int coords_equal(const coord_t *c1, const coord_t *c2)
  8035. +{
  8036. + assert("nikita-2840", c1 != NULL);
  8037. + assert("nikita-2841", c2 != NULL);
  8038. +
  8039. + return
  8040. + c1->node == c2->node &&
  8041. + c1->item_pos == c2->item_pos &&
  8042. + c1->unit_pos == c2->unit_pos && c1->between == c2->between;
  8043. +}
  8044. +#endif /* REISER4_DEBUG */
  8045. +
  8046. +/* If coord_is_after_rightmost return NCOORD_ON_THE_RIGHT, if
  8047. + coord_is_after_leftmost return NCOORD_ON_THE_LEFT, otherwise return
  8048. + NCOORD_INSIDE. */
  8049. +/* Audited by: green(2002.06.15) */
  8050. +coord_wrt_node coord_wrt(const coord_t *coord)
  8051. +{
  8052. + if (coord_is_before_leftmost(coord))
  8053. + return COORD_ON_THE_LEFT;
  8054. +
  8055. + if (coord_is_after_rightmost(coord))
  8056. + return COORD_ON_THE_RIGHT;
  8057. +
  8058. + return COORD_INSIDE;
  8059. +}
  8060. +
  8061. +/* Returns true if the coordinate is positioned after the last item or after the
  8062. + last unit of the last item or it is an empty node. */
  8063. +/* Audited by: green(2002.06.15) */
  8064. +int coord_is_after_rightmost(const coord_t *coord)
  8065. +{
  8066. + assert("jmacd-7313", coord_check(coord));
  8067. +
  8068. + switch (coord->between) {
  8069. + case INVALID_COORD:
  8070. + case AT_UNIT:
  8071. + case BEFORE_UNIT:
  8072. + case BEFORE_ITEM:
  8073. + return 0;
  8074. +
  8075. + case EMPTY_NODE:
  8076. + return 1;
  8077. +
  8078. + case AFTER_ITEM:
  8079. + return (coord->item_pos == node_num_items(coord->node) - 1);
  8080. +
  8081. + case AFTER_UNIT:
  8082. + return ((coord->item_pos == node_num_items(coord->node) - 1) &&
  8083. + coord->unit_pos == coord_last_unit_pos(coord));
  8084. + }
  8085. +
  8086. + impossible("jmacd-9908", "unreachable");
  8087. + return 0;
  8088. +}
  8089. +
  8090. +/* Returns true if the coordinate is positioned before the first item or it is
  8091. + an empty node. */
  8092. +int coord_is_before_leftmost(const coord_t *coord)
  8093. +{
  8094. + /* FIXME-VS: coord_check requires node to be loaded whereas it is not
  8095. + necessary to check if coord is set before leftmost
  8096. + assert ("jmacd-7313", coord_check (coord)); */
  8097. + switch (coord->between) {
  8098. + case INVALID_COORD:
  8099. + case AT_UNIT:
  8100. + case AFTER_ITEM:
  8101. + case AFTER_UNIT:
  8102. + return 0;
  8103. +
  8104. + case EMPTY_NODE:
  8105. + return 1;
  8106. +
  8107. + case BEFORE_ITEM:
  8108. + case BEFORE_UNIT:
  8109. + return (coord->item_pos == 0) && (coord->unit_pos == 0);
  8110. + }
  8111. +
  8112. + impossible("jmacd-9908", "unreachable");
  8113. + return 0;
  8114. +}
  8115. +
  8116. +/* Returns true if the coordinate is positioned after a item, before a item,
  8117. + after the last unit of an item, before the first unit of an item, or at an
  8118. + empty node. */
  8119. +/* Audited by: green(2002.06.15) */
  8120. +int coord_is_between_items(const coord_t *coord)
  8121. +{
  8122. + assert("jmacd-7313", coord_check(coord));
  8123. +
  8124. + switch (coord->between) {
  8125. + case INVALID_COORD:
  8126. + case AT_UNIT:
  8127. + return 0;
  8128. +
  8129. + case AFTER_ITEM:
  8130. + case BEFORE_ITEM:
  8131. + case EMPTY_NODE:
  8132. + return 1;
  8133. +
  8134. + case BEFORE_UNIT:
  8135. + return coord->unit_pos == 0;
  8136. +
  8137. + case AFTER_UNIT:
  8138. + return coord->unit_pos == coord_last_unit_pos(coord);
  8139. + }
  8140. +
  8141. + impossible("jmacd-9908", "unreachable");
  8142. + return 0;
  8143. +}
  8144. +
  8145. +#if REISER4_DEBUG
  8146. +/* Returns true if the coordinates are positioned at adjacent units, regardless
  8147. + of before-after or item boundaries. */
  8148. +int coord_are_neighbors(coord_t *c1, coord_t *c2)
  8149. +{
  8150. + coord_t *left;
  8151. + coord_t *right;
  8152. +
  8153. + assert("nikita-1241", c1 != NULL);
  8154. + assert("nikita-1242", c2 != NULL);
  8155. + assert("nikita-1243", c1->node == c2->node);
  8156. + assert("nikita-1244", coord_is_existing_unit(c1));
  8157. + assert("nikita-1245", coord_is_existing_unit(c2));
  8158. +
  8159. + left = right = NULL;
  8160. + switch (coord_compare(c1, c2)) {
  8161. + case COORD_CMP_ON_LEFT:
  8162. + left = c1;
  8163. + right = c2;
  8164. + break;
  8165. + case COORD_CMP_ON_RIGHT:
  8166. + left = c2;
  8167. + right = c1;
  8168. + break;
  8169. + case COORD_CMP_SAME:
  8170. + return 0;
  8171. + default:
  8172. + wrong_return_value("nikita-1246", "compare_coords()");
  8173. + }
  8174. + assert("vs-731", left && right);
  8175. + if (left->item_pos == right->item_pos) {
  8176. + return left->unit_pos + 1 == right->unit_pos;
  8177. + } else if (left->item_pos + 1 == right->item_pos) {
  8178. + return (left->unit_pos == coord_last_unit_pos(left))
  8179. + && (right->unit_pos == 0);
  8180. + } else {
  8181. + return 0;
  8182. + }
  8183. +}
  8184. +#endif /* REISER4_DEBUG */
  8185. +
  8186. +/* Assuming two coordinates are positioned in the same node, return
  8187. + COORD_CMP_ON_RIGHT, COORD_CMP_ON_LEFT, or COORD_CMP_SAME depending on c1's
  8188. + position relative to c2. */
  8189. +/* Audited by: green(2002.06.15) */
  8190. +coord_cmp coord_compare(coord_t *c1, coord_t *c2)
  8191. +{
  8192. + assert("vs-209", c1->node == c2->node);
  8193. + assert("vs-194", coord_is_existing_unit(c1)
  8194. + && coord_is_existing_unit(c2));
  8195. +
  8196. + if (c1->item_pos > c2->item_pos)
  8197. + return COORD_CMP_ON_RIGHT;
  8198. + if (c1->item_pos < c2->item_pos)
  8199. + return COORD_CMP_ON_LEFT;
  8200. + if (c1->unit_pos > c2->unit_pos)
  8201. + return COORD_CMP_ON_RIGHT;
  8202. + if (c1->unit_pos < c2->unit_pos)
  8203. + return COORD_CMP_ON_LEFT;
  8204. + return COORD_CMP_SAME;
  8205. +}
  8206. +
  8207. +/* If the coordinate is between items, shifts it to the right. Returns 0 on
  8208. + success and non-zero if there is no position to the right. */
  8209. +int coord_set_to_right(coord_t *coord)
  8210. +{
  8211. + unsigned items = coord_num_items(coord);
  8212. +
  8213. + if (coord_adjust_items(coord, items, 1) == 1)
  8214. + return 1;
  8215. +
  8216. + switch (coord->between) {
  8217. + case AT_UNIT:
  8218. + return 0;
  8219. +
  8220. + case BEFORE_ITEM:
  8221. + case BEFORE_UNIT:
  8222. + coord->between = AT_UNIT;
  8223. + return 0;
  8224. +
  8225. + case AFTER_UNIT:
  8226. + if (coord->unit_pos < coord_last_unit_pos(coord)) {
  8227. + coord->unit_pos += 1;
  8228. + coord->between = AT_UNIT;
  8229. + return 0;
  8230. + } else {
  8231. +
  8232. + coord->unit_pos = 0;
  8233. +
  8234. + if (coord->item_pos == items - 1) {
  8235. + coord->between = AFTER_ITEM;
  8236. + return 1;
  8237. + }
  8238. +
  8239. + coord_inc_item_pos(coord);
  8240. + coord->between = AT_UNIT;
  8241. + return 0;
  8242. + }
  8243. +
  8244. + case AFTER_ITEM:
  8245. + if (coord->item_pos == items - 1)
  8246. + return 1;
  8247. +
  8248. + coord_inc_item_pos(coord);
  8249. + coord->unit_pos = 0;
  8250. + coord->between = AT_UNIT;
  8251. + return 0;
  8252. +
  8253. + case EMPTY_NODE:
  8254. + return 1;
  8255. +
  8256. + case INVALID_COORD:
  8257. + break;
  8258. + }
  8259. +
  8260. + impossible("jmacd-9920", "unreachable");
  8261. + return 0;
  8262. +}
  8263. +
  8264. +/* If the coordinate is between items, shifts it to the left. Returns 0 on
  8265. + success and non-zero if there is no position to the left. */
  8266. +int coord_set_to_left(coord_t *coord)
  8267. +{
  8268. + unsigned items = coord_num_items(coord);
  8269. +
  8270. + if (coord_adjust_items(coord, items, 0) == 1)
  8271. + return 1;
  8272. +
  8273. + switch (coord->between) {
  8274. + case AT_UNIT:
  8275. + return 0;
  8276. +
  8277. + case AFTER_UNIT:
  8278. + coord->between = AT_UNIT;
  8279. + return 0;
  8280. +
  8281. + case AFTER_ITEM:
  8282. + coord->between = AT_UNIT;
  8283. + coord->unit_pos = coord_last_unit_pos(coord);
  8284. + return 0;
  8285. +
  8286. + case BEFORE_UNIT:
  8287. + if (coord->unit_pos > 0) {
  8288. + coord->unit_pos -= 1;
  8289. + coord->between = AT_UNIT;
  8290. + return 0;
  8291. + } else {
  8292. +
  8293. + if (coord->item_pos == 0) {
  8294. + coord->between = BEFORE_ITEM;
  8295. + return 1;
  8296. + }
  8297. +
  8298. + coord->unit_pos = coord_last_unit_pos(coord);
  8299. + coord_dec_item_pos(coord);
  8300. + coord->between = AT_UNIT;
  8301. + return 0;
  8302. + }
  8303. +
  8304. + case BEFORE_ITEM:
  8305. + if (coord->item_pos == 0)
  8306. + return 1;
  8307. +
  8308. + coord_dec_item_pos(coord);
  8309. + coord->unit_pos = coord_last_unit_pos(coord);
  8310. + coord->between = AT_UNIT;
  8311. + return 0;
  8312. +
  8313. + case EMPTY_NODE:
  8314. + return 1;
  8315. +
  8316. + case INVALID_COORD:
  8317. + break;
  8318. + }
  8319. +
  8320. + impossible("jmacd-9920", "unreachable");
  8321. + return 0;
  8322. +}
  8323. +
  8324. +static const char *coord_tween_tostring(between_enum n)
  8325. +{
  8326. + switch (n) {
  8327. + case BEFORE_UNIT:
  8328. + return "before unit";
  8329. + case BEFORE_ITEM:
  8330. + return "before item";
  8331. + case AT_UNIT:
  8332. + return "at unit";
  8333. + case AFTER_UNIT:
  8334. + return "after unit";
  8335. + case AFTER_ITEM:
  8336. + return "after item";
  8337. + case EMPTY_NODE:
  8338. + return "empty node";
  8339. + case INVALID_COORD:
  8340. + return "invalid";
  8341. + default:
  8342. + {
  8343. + static char buf[30];
  8344. +
  8345. + sprintf(buf, "unknown: %i", n);
  8346. + return buf;
  8347. + }
  8348. + }
  8349. +}
  8350. +
  8351. +void print_coord(const char *mes, const coord_t *coord, int node)
  8352. +{
  8353. + if (coord == NULL) {
  8354. + printk("%s: null\n", mes);
  8355. + return;
  8356. + }
  8357. + printk("%s: item_pos = %d, unit_pos %d, tween=%s, iplug=%d\n",
  8358. + mes, coord->item_pos, coord->unit_pos,
  8359. + coord_tween_tostring(coord->between), coord->iplugid);
  8360. +}
  8361. +
  8362. +int
  8363. +item_utmost_child_real_block(const coord_t *coord, sideof side,
  8364. + reiser4_block_nr * blk)
  8365. +{
  8366. + return item_plugin_by_coord(coord)->f.utmost_child_real_block(coord,
  8367. + side,
  8368. + blk);
  8369. +}
  8370. +
  8371. +int item_utmost_child(const coord_t *coord, sideof side, jnode ** child)
  8372. +{
  8373. + return item_plugin_by_coord(coord)->f.utmost_child(coord, side, child);
  8374. +}
  8375. +
  8376. +/* @count bytes of flow @f got written, update correspondingly f->length,
  8377. + f->data and f->key */
  8378. +void move_flow_forward(flow_t *f, unsigned count)
  8379. +{
  8380. + if (f->data)
  8381. + f->data += count;
  8382. + f->length -= count;
  8383. + set_key_offset(&f->key, get_key_offset(&f->key) + count);
  8384. +}
  8385. +
  8386. +/*
  8387. + Local variables:
  8388. + c-indentation-style: "K&R"
  8389. + mode-name: "LC"
  8390. + c-basic-offset: 8
  8391. + tab-width: 8
  8392. + fill-column: 120
  8393. + scroll-step: 1
  8394. + End:
  8395. +*/
  8396. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/coord.h linux-4.14.2/fs/reiser4/coord.h
  8397. --- linux-4.14.2.orig/fs/reiser4/coord.h 1970-01-01 01:00:00.000000000 +0100
  8398. +++ linux-4.14.2/fs/reiser4/coord.h 2017-11-26 22:13:09.000000000 +0100
  8399. @@ -0,0 +1,399 @@
  8400. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  8401. + reiser4/README */
  8402. +
  8403. +/* Coords */
  8404. +
  8405. +#if !defined(__REISER4_COORD_H__)
  8406. +#define __REISER4_COORD_H__
  8407. +
  8408. +#include "forward.h"
  8409. +#include "debug.h"
  8410. +#include "dformat.h"
  8411. +#include "key.h"
  8412. +
  8413. +/* insertions happen between coords in the tree, so we need some means
  8414. + of specifying the sense of betweenness. */
  8415. +typedef enum {
  8416. + BEFORE_UNIT, /* Note: we/init_coord depends on this value being zero. */
  8417. + AT_UNIT,
  8418. + AFTER_UNIT,
  8419. + BEFORE_ITEM,
  8420. + AFTER_ITEM,
  8421. + INVALID_COORD,
  8422. + EMPTY_NODE,
  8423. +} between_enum;
  8424. +
  8425. +/* location of coord w.r.t. its node */
  8426. +typedef enum {
  8427. + COORD_ON_THE_LEFT = -1,
  8428. + COORD_ON_THE_RIGHT = +1,
  8429. + COORD_INSIDE = 0
  8430. +} coord_wrt_node;
  8431. +
  8432. +typedef enum {
  8433. + COORD_CMP_SAME = 0, COORD_CMP_ON_LEFT = -1, COORD_CMP_ON_RIGHT = +1
  8434. +} coord_cmp;
  8435. +
  8436. +struct coord {
  8437. + /* node in a tree */
  8438. + /* 0 */ znode *node;
  8439. +
  8440. + /* position of item within node */
  8441. + /* 4 */ pos_in_node_t item_pos;
  8442. + /* position of unit within item */
  8443. + /* 6 */ pos_in_node_t unit_pos;
  8444. + /* optimization: plugin of item is stored in coord_t. Until this was
  8445. + implemented, item_plugin_by_coord() was major CPU consumer. ->iplugid
  8446. + is invalidated (set to 0xff) on each modification of ->item_pos,
  8447. + and all such modifications are funneled through coord_*_item_pos()
  8448. + functions below.
  8449. + */
  8450. + /* 8 */ char iplugid;
  8451. + /* position of coord w.r.t. to neighboring items and/or units.
  8452. + Values are taken from &between_enum above.
  8453. + */
  8454. + /* 9 */ char between;
  8455. + /* padding. It will be added by the compiler anyway to conform to the
  8456. + * C language alignment requirements. We keep it here to be on the
  8457. + * safe side and to have a clear picture of the memory layout of this
  8458. + * structure. */
  8459. + /* 10 */ __u16 pad;
  8460. + /* 12 */ int offset;
  8461. +#if REISER4_DEBUG
  8462. + unsigned long plug_v;
  8463. + unsigned long body_v;
  8464. +#endif
  8465. +};
  8466. +
  8467. +#define INVALID_PLUGID ((char)((1 << 8) - 1))
  8468. +#define INVALID_OFFSET -1
  8469. +
  8470. +static inline void coord_clear_iplug(coord_t *coord)
  8471. +{
  8472. + assert("nikita-2835", coord != NULL);
  8473. + coord->iplugid = INVALID_PLUGID;
  8474. + coord->offset = INVALID_OFFSET;
  8475. +}
  8476. +
  8477. +static inline int coord_is_iplug_set(const coord_t *coord)
  8478. +{
  8479. + assert("nikita-2836", coord != NULL);
  8480. + return coord->iplugid != INVALID_PLUGID;
  8481. +}
  8482. +
  8483. +static inline void coord_set_item_pos(coord_t *coord, pos_in_node_t pos)
  8484. +{
  8485. + assert("nikita-2478", coord != NULL);
  8486. + coord->item_pos = pos;
  8487. + coord_clear_iplug(coord);
  8488. +}
  8489. +
  8490. +static inline void coord_dec_item_pos(coord_t *coord)
  8491. +{
  8492. + assert("nikita-2480", coord != NULL);
  8493. + --coord->item_pos;
  8494. + coord_clear_iplug(coord);
  8495. +}
  8496. +
  8497. +static inline void coord_inc_item_pos(coord_t *coord)
  8498. +{
  8499. + assert("nikita-2481", coord != NULL);
  8500. + ++coord->item_pos;
  8501. + coord_clear_iplug(coord);
  8502. +}
  8503. +
  8504. +static inline void coord_add_item_pos(coord_t *coord, int delta)
  8505. +{
  8506. + assert("nikita-2482", coord != NULL);
  8507. + coord->item_pos += delta;
  8508. + coord_clear_iplug(coord);
  8509. +}
  8510. +
  8511. +static inline void coord_invalid_item_pos(coord_t *coord)
  8512. +{
  8513. + assert("nikita-2832", coord != NULL);
  8514. + coord->item_pos = (unsigned short)~0;
  8515. + coord_clear_iplug(coord);
  8516. +}
  8517. +
  8518. +/* Reverse a direction. */
  8519. +static inline sideof sideof_reverse(sideof side)
  8520. +{
  8521. + return side == LEFT_SIDE ? RIGHT_SIDE : LEFT_SIDE;
  8522. +}
  8523. +
  8524. +/* NOTE: There is a somewhat odd mixture of the following opposed terms:
  8525. +
  8526. + "first" and "last"
  8527. + "next" and "prev"
  8528. + "before" and "after"
  8529. + "leftmost" and "rightmost"
  8530. +
  8531. + But I think the chosen names are decent the way they are.
  8532. +*/
  8533. +
  8534. +/* COORD INITIALIZERS */
  8535. +
  8536. +/* Initialize an invalid coordinate. */
  8537. +extern void coord_init_invalid(coord_t *coord, const znode * node);
  8538. +
  8539. +extern void coord_init_first_unit_nocheck(coord_t *coord, const znode * node);
  8540. +
  8541. +/* Initialize a coordinate to point at the first unit of the first item. If the
  8542. + node is empty, it is positioned at the EMPTY_NODE. */
  8543. +extern void coord_init_first_unit(coord_t *coord, const znode * node);
  8544. +
  8545. +/* Initialize a coordinate to point at the last unit of the last item. If the
  8546. + node is empty, it is positioned at the EMPTY_NODE. */
  8547. +extern void coord_init_last_unit(coord_t *coord, const znode * node);
  8548. +
  8549. +/* Initialize a coordinate to before the first item. If the node is empty, it is
  8550. + positioned at the EMPTY_NODE. */
  8551. +extern void coord_init_before_first_item(coord_t *coord, const znode * node);
  8552. +
  8553. +/* Initialize a coordinate to after the last item. If the node is empty, it is
  8554. + positioned at the EMPTY_NODE. */
  8555. +extern void coord_init_after_last_item(coord_t *coord, const znode * node);
  8556. +
  8557. +/* Initialize a coordinate to after last unit in the item. Coord must be set
  8558. + already to existing item */
  8559. +void coord_init_after_item_end(coord_t *coord);
  8560. +
  8561. +/* Initialize a coordinate to before the item. Coord must be set already to
  8562. + existing item */
  8563. +void coord_init_before_item(coord_t *);
  8564. +/* Initialize a coordinate to after the item. Coord must be set already to
  8565. + existing item */
  8566. +void coord_init_after_item(coord_t *);
  8567. +
  8568. +/* Calls either coord_init_first_unit or coord_init_last_unit depending on
  8569. + sideof argument. */
  8570. +extern void coord_init_sideof_unit(coord_t *coord, const znode * node,
  8571. + sideof dir);
  8572. +
  8573. +/* Initialize a coordinate by 0s. Used in places where init_coord was used and
  8574. + it was not clear how actually
  8575. + FIXME-VS: added by vs (2002, june, 8) */
  8576. +extern void coord_init_zero(coord_t *coord);
  8577. +
  8578. +/* COORD METHODS */
  8579. +
  8580. +/* after shifting of node content, coord previously set properly may become
  8581. + invalid, try to "normalize" it. */
  8582. +void coord_normalize(coord_t *coord);
  8583. +
  8584. +/* Copy a coordinate. */
  8585. +extern void coord_dup(coord_t *coord, const coord_t *old_coord);
  8586. +
  8587. +/* Copy a coordinate without check. */
  8588. +void coord_dup_nocheck(coord_t *coord, const coord_t *old_coord);
  8589. +
  8590. +unsigned coord_num_units(const coord_t *coord);
  8591. +
  8592. +/* Return the last valid unit number at the present item (i.e.,
  8593. + coord_num_units() - 1). */
  8594. +static inline unsigned coord_last_unit_pos(const coord_t *coord)
  8595. +{
  8596. + return coord_num_units(coord) - 1;
  8597. +}
  8598. +
  8599. +#if REISER4_DEBUG
  8600. +/* For assertions only, checks for a valid coordinate. */
  8601. +extern int coord_check(const coord_t *coord);
  8602. +
  8603. +extern unsigned long znode_times_locked(const znode * z);
  8604. +
  8605. +static inline void coord_update_v(coord_t *coord)
  8606. +{
  8607. + coord->plug_v = coord->body_v = znode_times_locked(coord->node);
  8608. +}
  8609. +#endif
  8610. +
  8611. +extern int coords_equal(const coord_t *c1, const coord_t *c2);
  8612. +
  8613. +extern void print_coord(const char *mes, const coord_t *coord, int print_node);
  8614. +
  8615. +/* If coord_is_after_rightmost return NCOORD_ON_THE_RIGHT, if
  8616. + coord_is_after_leftmost return NCOORD_ON_THE_LEFT, otherwise return
  8617. + NCOORD_INSIDE. */
  8618. +extern coord_wrt_node coord_wrt(const coord_t *coord);
  8619. +
  8620. +/* Returns true if the coordinates are positioned at adjacent units, regardless
  8621. + of before-after or item boundaries. */
  8622. +extern int coord_are_neighbors(coord_t *c1, coord_t *c2);
  8623. +
  8624. +/* Assuming two coordinates are positioned in the same node, return
  8625. + NCOORD_CMP_ON_RIGHT, NCOORD_CMP_ON_LEFT, or NCOORD_CMP_SAME depending on c1's
  8626. + position relative to c2. */
  8627. +extern coord_cmp coord_compare(coord_t *c1, coord_t *c2);
  8628. +
  8629. +/* COORD PREDICATES */
  8630. +
  8631. +/* Returns true if the coord was initializewd by coord_init_invalid (). */
  8632. +extern int coord_is_invalid(const coord_t *coord);
  8633. +
  8634. +/* Returns true if the coordinate is positioned at an existing item, not before
  8635. + or after an item. It may be placed at, before, or after any unit within the
  8636. + item, whether existing or not. If this is true you can call methods of the
  8637. + item plugin. */
  8638. +extern int coord_is_existing_item(const coord_t *coord);
  8639. +
  8640. +/* Returns true if the coordinate is positioned after a item, before a item,
  8641. + after the last unit of an item, before the first unit of an item, or at an
  8642. + empty node. */
  8643. +extern int coord_is_between_items(const coord_t *coord);
  8644. +
  8645. +/* Returns true if the coordinate is positioned at an existing unit, not before
  8646. + or after a unit. */
  8647. +extern int coord_is_existing_unit(const coord_t *coord);
  8648. +
  8649. +/* Returns true if the coordinate is positioned at an empty node. */
  8650. +extern int coord_is_empty(const coord_t *coord);
  8651. +
  8652. +/* Returns true if the coordinate is positioned at the first unit of the first
  8653. + item. Not true for empty nodes nor coordinates positioned before the first
  8654. + item. */
  8655. +extern int coord_is_leftmost_unit(const coord_t *coord);
  8656. +
  8657. +/* Returns true if the coordinate is positioned after the last item or after the
  8658. + last unit of the last item or it is an empty node. */
  8659. +extern int coord_is_after_rightmost(const coord_t *coord);
  8660. +
  8661. +/* Returns true if the coordinate is positioned before the first item or it is
  8662. + an empty node. */
  8663. +extern int coord_is_before_leftmost(const coord_t *coord);
  8664. +
  8665. +/* Calls either coord_is_before_leftmost or coord_is_after_rightmost depending
  8666. + on sideof argument. */
  8667. +extern int coord_is_after_sideof_unit(coord_t *coord, sideof dir);
  8668. +
  8669. +/* COORD MODIFIERS */
  8670. +
  8671. +/* Advances the coordinate by one unit to the right. If empty, no change. If
  8672. + coord_is_rightmost_unit, advances to AFTER THE LAST ITEM. Returns 0 if new
  8673. + position is an existing unit. */
  8674. +extern int coord_next_unit(coord_t *coord);
  8675. +
  8676. +/* Advances the coordinate by one item to the right. If empty, no change. If
  8677. + coord_is_rightmost_unit, advances to AFTER THE LAST ITEM. Returns 0 if new
  8678. + position is an existing item. */
  8679. +extern int coord_next_item(coord_t *coord);
  8680. +
  8681. +/* Advances the coordinate by one unit to the left. If empty, no change. If
  8682. + coord_is_leftmost_unit, advances to BEFORE THE FIRST ITEM. Returns 0 if new
  8683. + position is an existing unit. */
  8684. +extern int coord_prev_unit(coord_t *coord);
  8685. +
  8686. +/* Advances the coordinate by one item to the left. If empty, no change. If
  8687. + coord_is_leftmost_unit, advances to BEFORE THE FIRST ITEM. Returns 0 if new
  8688. + position is an existing item. */
  8689. +extern int coord_prev_item(coord_t *coord);
  8690. +
  8691. +/* If the coordinate is between items, shifts it to the right. Returns 0 on
  8692. + success and non-zero if there is no position to the right. */
  8693. +extern int coord_set_to_right(coord_t *coord);
  8694. +
  8695. +/* If the coordinate is between items, shifts it to the left. Returns 0 on
  8696. + success and non-zero if there is no position to the left. */
  8697. +extern int coord_set_to_left(coord_t *coord);
  8698. +
  8699. +/* If the coordinate is at an existing unit, set to after that unit. Returns 0
  8700. + on success and non-zero if the unit did not exist. */
  8701. +extern int coord_set_after_unit(coord_t *coord);
  8702. +
  8703. +/* Calls either coord_next_unit or coord_prev_unit depending on sideof
  8704. + argument. */
  8705. +extern int coord_sideof_unit(coord_t *coord, sideof dir);
  8706. +
  8707. +/* iterate over all units in @node */
  8708. +#define for_all_units(coord, node) \
  8709. + for (coord_init_before_first_item((coord), (node)) ; \
  8710. + coord_next_unit(coord) == 0 ;)
  8711. +
  8712. +/* iterate over all items in @node */
  8713. +#define for_all_items(coord, node) \
  8714. + for (coord_init_before_first_item((coord), (node)) ; \
  8715. + coord_next_item(coord) == 0 ;)
  8716. +
  8717. +/* COORD/ITEM METHODS */
  8718. +
  8719. +extern int item_utmost_child_real_block(const coord_t *coord, sideof side,
  8720. + reiser4_block_nr * blk);
  8721. +extern int item_utmost_child(const coord_t *coord, sideof side,
  8722. + jnode ** child);
  8723. +
  8724. +/* a flow is a sequence of bytes being written to or read from the tree. The
  8725. + tree will slice the flow into items while storing it into nodes, but all of
  8726. + that is hidden from anything outside the tree. */
  8727. +
  8728. +struct flow {
  8729. + reiser4_key key; /* key of start of flow's sequence of bytes */
  8730. + loff_t length; /* length of flow's sequence of bytes */
  8731. + char *data; /* start of flow's sequence of bytes */
  8732. + int user; /* if 1 data is user space, 0 - kernel space */
  8733. + rw_op op; /* NIKITA-FIXME-HANS: comment is where? */
  8734. +};
  8735. +
  8736. +void move_flow_forward(flow_t *f, unsigned count);
  8737. +
  8738. +/* &reiser4_item_data - description of data to be inserted or pasted
  8739. +
  8740. + Q: articulate the reasons for the difference between this and flow.
  8741. +
  8742. + A: Becides flow we insert into tree other things: stat data, directory
  8743. + entry, etc. To insert them into tree one has to provide this structure. If
  8744. + one is going to insert flow - he can use insert_flow, where this structure
  8745. + does not have to be created
  8746. +*/
  8747. +struct reiser4_item_data {
  8748. + /* actual data to be inserted. If NULL, ->create_item() will not
  8749. + do xmemcpy itself, leaving this up to the caller. This can
  8750. + save some amount of unnecessary memory copying, for example,
  8751. + during insertion of stat data.
  8752. +
  8753. + */
  8754. + char *data;
  8755. + /* 1 if 'char * data' contains pointer to user space and 0 if it is
  8756. + kernel space */
  8757. + int user;
  8758. + /* amount of data we are going to insert or paste */
  8759. + int length;
  8760. + /* "Arg" is opaque data that is passed down to the
  8761. + ->create_item() method of node layout, which in turn
  8762. + hands it to the ->create_hook() of item being created. This
  8763. + arg is currently used by:
  8764. +
  8765. + . ->create_hook() of internal item
  8766. + (fs/reiser4/plugin/item/internal.c:internal_create_hook()),
  8767. + . ->paste() method of directory item.
  8768. + . ->create_hook() of extent item
  8769. +
  8770. + For internal item, this is left "brother" of new node being
  8771. + inserted and it is used to add new node into sibling list
  8772. + after parent to it was just inserted into parent.
  8773. +
  8774. + While ->arg does look somewhat of unnecessary compication,
  8775. + it actually saves a lot of headache in many places, because
  8776. + all data necessary to insert or paste new data into tree are
  8777. + collected in one place, and this eliminates a lot of extra
  8778. + argument passing and storing everywhere.
  8779. +
  8780. + */
  8781. + void *arg;
  8782. + /* plugin of item we are inserting */
  8783. + item_plugin *iplug;
  8784. +};
  8785. +
  8786. +/* __REISER4_COORD_H__ */
  8787. +#endif
  8788. +
  8789. +/* Make Linus happy.
  8790. + Local variables:
  8791. + c-indentation-style: "K&R"
  8792. + mode-name: "LC"
  8793. + c-basic-offset: 8
  8794. + tab-width: 8
  8795. + fill-column: 120
  8796. + scroll-step: 1
  8797. + End:
  8798. +*/
  8799. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/debug.c linux-4.14.2/fs/reiser4/debug.c
  8800. --- linux-4.14.2.orig/fs/reiser4/debug.c 1970-01-01 01:00:00.000000000 +0100
  8801. +++ linux-4.14.2/fs/reiser4/debug.c 2017-11-26 22:13:09.000000000 +0100
  8802. @@ -0,0 +1,309 @@
  8803. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  8804. + * reiser4/README */
  8805. +
  8806. +/* Debugging facilities. */
  8807. +
  8808. +/*
  8809. + * This file contains generic debugging functions used by reiser4. Roughly
  8810. + * following:
  8811. + *
  8812. + * panicking: reiser4_do_panic(), reiser4_print_prefix().
  8813. + *
  8814. + * locking:
  8815. + * reiser4_schedulable(), reiser4_lock_counters(), print_lock_counters(),
  8816. + * reiser4_no_counters_are_held(), reiser4_commit_check_locks()
  8817. + *
  8818. + * error code monitoring (see comment before RETERR macro):
  8819. + * reiser4_return_err(), reiser4_report_err().
  8820. + *
  8821. + * stack back-tracing: fill_backtrace()
  8822. + *
  8823. + * miscellaneous: reiser4_preempt_point(), call_on_each_assert(),
  8824. + * reiser4_debugtrap().
  8825. + *
  8826. + */
  8827. +
  8828. +#include "reiser4.h"
  8829. +#include "context.h"
  8830. +#include "super.h"
  8831. +#include "txnmgr.h"
  8832. +#include "znode.h"
  8833. +
  8834. +#include <linux/sysfs.h>
  8835. +#include <linux/slab.h>
  8836. +#include <linux/types.h>
  8837. +#include <linux/fs.h>
  8838. +#include <linux/spinlock.h>
  8839. +#include <linux/kallsyms.h>
  8840. +#include <linux/vmalloc.h>
  8841. +#include <linux/ctype.h>
  8842. +#include <linux/sysctl.h>
  8843. +#include <linux/hardirq.h>
  8844. +#include <linux/sched/signal.h> /* signal_pending() */
  8845. +
  8846. +#if 0
  8847. +#if REISER4_DEBUG
  8848. +static void reiser4_report_err(void);
  8849. +#else
  8850. +#define reiser4_report_err() noop
  8851. +#endif
  8852. +#endif /* 0 */
  8853. +
  8854. +/*
  8855. + * global buffer where message given to reiser4_panic is formatted.
  8856. + */
  8857. +static char panic_buf[REISER4_PANIC_MSG_BUFFER_SIZE];
  8858. +
  8859. +/*
  8860. + * lock protecting consistency of panic_buf under concurrent panics
  8861. + */
  8862. +static DEFINE_SPINLOCK(panic_guard);
  8863. +
  8864. +/* Your best friend. Call it on each occasion. This is called by
  8865. + fs/reiser4/debug.h:reiser4_panic(). */
  8866. +void reiser4_do_panic(const char *format/* format string */ , ... /* rest */)
  8867. +{
  8868. + static int in_panic = 0;
  8869. + va_list args;
  8870. +
  8871. + /*
  8872. + * check for recursive panic.
  8873. + */
  8874. + if (in_panic == 0) {
  8875. + in_panic = 1;
  8876. +
  8877. + spin_lock(&panic_guard);
  8878. + va_start(args, format);
  8879. + vsnprintf(panic_buf, sizeof(panic_buf), format, args);
  8880. + va_end(args);
  8881. + printk(KERN_EMERG "reiser4 panicked cowardly: %s", panic_buf);
  8882. + spin_unlock(&panic_guard);
  8883. +
  8884. + /*
  8885. + * if kernel debugger is configured---drop in. Early dropping
  8886. + * into kgdb is not always convenient, because panic message
  8887. + * is not yet printed most of the times. But:
  8888. + *
  8889. + * (1) message can be extracted from printk_buf[]
  8890. + * (declared static inside of printk()), and
  8891. + *
  8892. + * (2) sometimes serial/kgdb combo dies while printing
  8893. + * long panic message, so it's more prudent to break into
  8894. + * debugger earlier.
  8895. + *
  8896. + */
  8897. + DEBUGON(1);
  8898. + }
  8899. + /* to make gcc happy about noreturn attribute */
  8900. + panic("%s", panic_buf);
  8901. +}
  8902. +
  8903. +#if 0
  8904. +void
  8905. +reiser4_print_prefix(const char *level, int reperr, const char *mid,
  8906. + const char *function, const char *file, int lineno)
  8907. +{
  8908. + const char *comm;
  8909. + int pid;
  8910. +
  8911. + if (unlikely(in_interrupt() || in_irq())) {
  8912. + comm = "interrupt";
  8913. + pid = 0;
  8914. + } else {
  8915. + comm = current->comm;
  8916. + pid = current->pid;
  8917. + }
  8918. + printk("%sreiser4[%.16s(%i)]: %s (%s:%i)[%s]:\n",
  8919. + level, comm, pid, function, file, lineno, mid);
  8920. + if (reperr)
  8921. + reiser4_report_err();
  8922. +}
  8923. +#endif /* 0 */
  8924. +
  8925. +/* Preemption point: this should be called periodically during long running
  8926. + operations (carry, allocate, and squeeze are best examples) */
  8927. +int reiser4_preempt_point(void)
  8928. +{
  8929. + assert("nikita-3008", reiser4_schedulable());
  8930. + cond_resched();
  8931. + return signal_pending(current);
  8932. +}
  8933. +
  8934. +#if REISER4_DEBUG
  8935. +/* Debugging aid: return struct where information about locks taken by current
  8936. + thread is accumulated. This can be used to formulate lock ordering
  8937. + constraints and various assertions.
  8938. +
  8939. +*/
  8940. +reiser4_lock_cnt_info *reiser4_lock_counters(void)
  8941. +{
  8942. + reiser4_context *ctx = get_current_context();
  8943. + assert("jmacd-1123", ctx != NULL);
  8944. + return &ctx->locks;
  8945. +}
  8946. +
  8947. +/*
  8948. + * print human readable information about locks held by the reiser4 context.
  8949. + */
  8950. +static void print_lock_counters(const char *prefix,
  8951. + const reiser4_lock_cnt_info * info)
  8952. +{
  8953. + printk("%s: jnode: %i, tree: %i (r:%i,w:%i), dk: %i (r:%i,w:%i)\n"
  8954. + "jload: %i, "
  8955. + "txnh: %i, atom: %i, stack: %i, txnmgr: %i, "
  8956. + "ktxnmgrd: %i, fq: %i\n"
  8957. + "inode: %i, "
  8958. + "cbk_cache: %i (r:%i,w%i), "
  8959. + "eflush: %i, "
  8960. + "zlock: %i,\n"
  8961. + "spin: %i, long: %i inode_sem: (r:%i,w:%i)\n"
  8962. + "d: %i, x: %i, t: %i\n", prefix,
  8963. + info->spin_locked_jnode,
  8964. + info->rw_locked_tree, info->read_locked_tree,
  8965. + info->write_locked_tree,
  8966. + info->rw_locked_dk, info->read_locked_dk, info->write_locked_dk,
  8967. + info->spin_locked_jload,
  8968. + info->spin_locked_txnh,
  8969. + info->spin_locked_atom, info->spin_locked_stack,
  8970. + info->spin_locked_txnmgr, info->spin_locked_ktxnmgrd,
  8971. + info->spin_locked_fq,
  8972. + info->spin_locked_inode,
  8973. + info->rw_locked_cbk_cache,
  8974. + info->read_locked_cbk_cache,
  8975. + info->write_locked_cbk_cache,
  8976. + info->spin_locked_super_eflush,
  8977. + info->spin_locked_zlock,
  8978. + info->spin_locked,
  8979. + info->long_term_locked_znode,
  8980. + info->inode_sem_r, info->inode_sem_w,
  8981. + info->d_refs, info->x_refs, info->t_refs);
  8982. +}
  8983. +
  8984. +/* check that no spinlocks are held */
  8985. +int reiser4_schedulable(void)
  8986. +{
  8987. + if (get_current_context_check() != NULL) {
  8988. + if (!LOCK_CNT_NIL(spin_locked)) {
  8989. + print_lock_counters("in atomic", reiser4_lock_counters());
  8990. + return 0;
  8991. + }
  8992. + }
  8993. + might_sleep();
  8994. + return 1;
  8995. +}
  8996. +/*
  8997. + * return true, iff no locks are held.
  8998. + */
  8999. +int reiser4_no_counters_are_held(void)
  9000. +{
  9001. + reiser4_lock_cnt_info *counters;
  9002. +
  9003. + counters = reiser4_lock_counters();
  9004. + return
  9005. + (counters->spin_locked_zlock == 0) &&
  9006. + (counters->spin_locked_jnode == 0) &&
  9007. + (counters->rw_locked_tree == 0) &&
  9008. + (counters->read_locked_tree == 0) &&
  9009. + (counters->write_locked_tree == 0) &&
  9010. + (counters->rw_locked_dk == 0) &&
  9011. + (counters->read_locked_dk == 0) &&
  9012. + (counters->write_locked_dk == 0) &&
  9013. + (counters->spin_locked_txnh == 0) &&
  9014. + (counters->spin_locked_atom == 0) &&
  9015. + (counters->spin_locked_stack == 0) &&
  9016. + (counters->spin_locked_txnmgr == 0) &&
  9017. + (counters->spin_locked_inode == 0) &&
  9018. + (counters->spin_locked == 0) &&
  9019. + (counters->long_term_locked_znode == 0) &&
  9020. + (counters->inode_sem_r == 0) &&
  9021. + (counters->inode_sem_w == 0) && (counters->d_refs == 0);
  9022. +}
  9023. +
  9024. +/*
  9025. + * return true, iff transaction commit can be done under locks held by the
  9026. + * current thread.
  9027. + */
  9028. +int reiser4_commit_check_locks(void)
  9029. +{
  9030. + reiser4_lock_cnt_info *counters;
  9031. + int inode_sem_r;
  9032. + int inode_sem_w;
  9033. + int result;
  9034. +
  9035. + /*
  9036. + * inode's read/write semaphore is the only reiser4 lock that can be
  9037. + * held during commit.
  9038. + */
  9039. +
  9040. + counters = reiser4_lock_counters();
  9041. + inode_sem_r = counters->inode_sem_r;
  9042. + inode_sem_w = counters->inode_sem_w;
  9043. +
  9044. + counters->inode_sem_r = counters->inode_sem_w = 0;
  9045. + result = reiser4_no_counters_are_held();
  9046. + counters->inode_sem_r = inode_sem_r;
  9047. + counters->inode_sem_w = inode_sem_w;
  9048. + return result;
  9049. +}
  9050. +
  9051. +/*
  9052. + * fill "error site" in the current reiser4 context. See comment before RETERR
  9053. + * macro for more details.
  9054. + */
  9055. +void reiser4_return_err(int code, const char *file, int line)
  9056. +{
  9057. + if (code < 0 && is_in_reiser4_context()) {
  9058. + reiser4_context *ctx = get_current_context();
  9059. +
  9060. + if (ctx != NULL) {
  9061. + ctx->err.code = code;
  9062. + ctx->err.file = file;
  9063. + ctx->err.line = line;
  9064. + }
  9065. + }
  9066. +}
  9067. +
  9068. +#if 0
  9069. +/*
  9070. + * report error information recorder by reiser4_return_err().
  9071. + */
  9072. +static void reiser4_report_err(void)
  9073. +{
  9074. + reiser4_context *ctx = get_current_context_check();
  9075. +
  9076. + if (ctx != NULL) {
  9077. + if (ctx->err.code != 0) {
  9078. + printk("code: %i at %s:%i\n",
  9079. + ctx->err.code, ctx->err.file, ctx->err.line);
  9080. + }
  9081. + }
  9082. +}
  9083. +#endif /* 0 */
  9084. +
  9085. +#endif /* REISER4_DEBUG */
  9086. +
  9087. +#if KERNEL_DEBUGGER
  9088. +
  9089. +/*
  9090. + * this functions just drops into kernel debugger. It is a convenient place to
  9091. + * put breakpoint in.
  9092. + */
  9093. +void reiser4_debugtrap(void)
  9094. +{
  9095. + /* do nothing. Put break point here. */
  9096. +#if defined(CONFIG_KGDB) && !defined(CONFIG_REISER4_FS_MODULE)
  9097. + extern void kgdb_breakpoint(void);
  9098. + kgdb_breakpoint();
  9099. +#endif
  9100. +}
  9101. +#endif
  9102. +
  9103. +/* Make Linus happy.
  9104. + Local variables:
  9105. + c-indentation-style: "K&R"
  9106. + mode-name: "LC"
  9107. + c-basic-offset: 8
  9108. + tab-width: 8
  9109. + fill-column: 120
  9110. + End:
  9111. +*/
  9112. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/debug.h linux-4.14.2/fs/reiser4/debug.h
  9113. --- linux-4.14.2.orig/fs/reiser4/debug.h 1970-01-01 01:00:00.000000000 +0100
  9114. +++ linux-4.14.2/fs/reiser4/debug.h 2017-11-26 22:13:09.000000000 +0100
  9115. @@ -0,0 +1,353 @@
  9116. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  9117. + reiser4/README */
  9118. +
  9119. +/* Declarations of debug macros. */
  9120. +
  9121. +#if !defined(__FS_REISER4_DEBUG_H__)
  9122. +#define __FS_REISER4_DEBUG_H__
  9123. +
  9124. +#include "forward.h"
  9125. +#include "reiser4.h"
  9126. +
  9127. +/**
  9128. + * generic function to produce formatted output, decorating it with
  9129. + * whatever standard prefixes/postfixes we want. "Fun" is a function
  9130. + * that will be actually called, can be printk, panic etc.
  9131. + * This is for use by other debugging macros, not by users.
  9132. + */
  9133. +#define DCALL(lev, fun, reperr, label, format, ...) \
  9134. +({ \
  9135. + fun(lev "reiser4[%.16s(%i)]: %s (%s:%i)[%s]:\n" format "\n" , \
  9136. + current->comm, current->pid, __FUNCTION__, \
  9137. + __FILE__, __LINE__, label, ## __VA_ARGS__); \
  9138. +})
  9139. +
  9140. +/*
  9141. + * cause kernel to crash
  9142. + */
  9143. +#define reiser4_panic(mid, format, ...) \
  9144. + DCALL("", reiser4_do_panic, 1, mid, format , ## __VA_ARGS__)
  9145. +
  9146. +/* print message with indication of current process, file, line and
  9147. + function */
  9148. +#define reiser4_log(label, format, ...) \
  9149. + DCALL(KERN_DEBUG, printk, 0, label, format , ## __VA_ARGS__)
  9150. +
  9151. +/* Assertion checked during compilation.
  9152. + If "cond" is false (0) we get duplicate case label in switch.
  9153. + Use this to check something like famous
  9154. + cassert (sizeof(struct reiserfs_journal_commit) == 4096) ;
  9155. + in 3.x journal.c. If cassertion fails you get compiler error,
  9156. + so no "maintainer-id".
  9157. +*/
  9158. +#define cassert(cond) ({ switch (-1) { case (cond): case 0: break; } })
  9159. +
  9160. +#define noop do {; } while (0)
  9161. +
  9162. +#if REISER4_DEBUG
  9163. +/* version of info that only actually prints anything when _d_ebugging
  9164. + is on */
  9165. +#define dinfo(format, ...) printk(format , ## __VA_ARGS__)
  9166. +/* macro to catch logical errors. Put it into `default' clause of
  9167. + switch() statement. */
  9168. +#define impossible(label, format, ...) \
  9169. + reiser4_panic(label, "impossible: " format , ## __VA_ARGS__)
  9170. +/* assert assures that @cond is true. If it is not, reiser4_panic() is
  9171. + called. Use this for checking logical consistency and _never_ call
  9172. + this to check correctness of external data: disk blocks and user-input . */
  9173. +#define assert(label, cond) \
  9174. +({ \
  9175. + /* call_on_each_assert(); */ \
  9176. + if (cond) { \
  9177. + /* put negated check to avoid using !(cond) that would lose \
  9178. + * warnings for things like assert(a = b); */ \
  9179. + ; \
  9180. + } else { \
  9181. + DEBUGON(1); \
  9182. + reiser4_panic(label, "assertion failed: %s", #cond); \
  9183. + } \
  9184. +})
  9185. +
  9186. +/* like assertion, but @expr is evaluated even if REISER4_DEBUG is off. */
  9187. +#define check_me(label, expr) assert(label, (expr))
  9188. +
  9189. +#define ON_DEBUG(exp) exp
  9190. +
  9191. +extern int reiser4_schedulable(void);
  9192. +extern void call_on_each_assert(void);
  9193. +
  9194. +#else
  9195. +
  9196. +#define dinfo(format, args...) noop
  9197. +#define impossible(label, format, args...) noop
  9198. +#define assert(label, cond) noop
  9199. +#define check_me(label, expr) ((void) (expr))
  9200. +#define ON_DEBUG(exp)
  9201. +#define reiser4_schedulable() might_sleep()
  9202. +
  9203. +/* REISER4_DEBUG */
  9204. +#endif
  9205. +
  9206. +#if REISER4_DEBUG
  9207. +/* per-thread information about lock acquired by this thread. Used by lock
  9208. + * ordering checking in spin_macros.h */
  9209. +typedef struct reiser4_lock_cnt_info {
  9210. + int rw_locked_tree;
  9211. + int read_locked_tree;
  9212. + int write_locked_tree;
  9213. +
  9214. + int rw_locked_dk;
  9215. + int read_locked_dk;
  9216. + int write_locked_dk;
  9217. +
  9218. + int rw_locked_cbk_cache;
  9219. + int read_locked_cbk_cache;
  9220. + int write_locked_cbk_cache;
  9221. +
  9222. + int spin_locked_zlock;
  9223. + int spin_locked_jnode;
  9224. + int spin_locked_jload;
  9225. + int spin_locked_txnh;
  9226. + int spin_locked_atom;
  9227. + int spin_locked_stack;
  9228. + int spin_locked_txnmgr;
  9229. + int spin_locked_ktxnmgrd;
  9230. + int spin_locked_fq;
  9231. + int spin_locked_inode;
  9232. + int spin_locked_super_eflush;
  9233. + int spin_locked;
  9234. + int long_term_locked_znode;
  9235. +
  9236. + int inode_sem_r;
  9237. + int inode_sem_w;
  9238. +
  9239. + int d_refs;
  9240. + int x_refs;
  9241. + int t_refs;
  9242. +} reiser4_lock_cnt_info;
  9243. +
  9244. +extern struct reiser4_lock_cnt_info *reiser4_lock_counters(void);
  9245. +#define IN_CONTEXT(a, b) (is_in_reiser4_context() ? (a) : (b))
  9246. +
  9247. +/* increment lock-counter @counter, if present */
  9248. +#define LOCK_CNT_INC(counter) \
  9249. + IN_CONTEXT(++(reiser4_lock_counters()->counter), 0)
  9250. +
  9251. +/* decrement lock-counter @counter, if present */
  9252. +#define LOCK_CNT_DEC(counter) \
  9253. + IN_CONTEXT(--(reiser4_lock_counters()->counter), 0)
  9254. +
  9255. +/* check that lock-counter is zero. This is for use in assertions */
  9256. +#define LOCK_CNT_NIL(counter) \
  9257. + IN_CONTEXT(reiser4_lock_counters()->counter == 0, 1)
  9258. +
  9259. +/* check that lock-counter is greater than zero. This is for use in
  9260. + * assertions */
  9261. +#define LOCK_CNT_GTZ(counter) \
  9262. + IN_CONTEXT(reiser4_lock_counters()->counter > 0, 1)
  9263. +#define LOCK_CNT_LT(counter,n) \
  9264. + IN_CONTEXT(reiser4_lock_counters()->counter < n, 1)
  9265. +
  9266. +#else /* REISER4_DEBUG */
  9267. +
  9268. +/* no-op versions on the above */
  9269. +
  9270. +typedef struct reiser4_lock_cnt_info {
  9271. +} reiser4_lock_cnt_info;
  9272. +
  9273. +#define reiser4_lock_counters() ((reiser4_lock_cnt_info *)NULL)
  9274. +#define LOCK_CNT_INC(counter) noop
  9275. +#define LOCK_CNT_DEC(counter) noop
  9276. +#define LOCK_CNT_NIL(counter) (1)
  9277. +#define LOCK_CNT_GTZ(counter) (1)
  9278. +#define LOCK_CNT_LT(counter, n) (1)
  9279. +
  9280. +#endif /* REISER4_DEBUG */
  9281. +
  9282. +#define assert_spin_not_locked(lock) BUG_ON(0)
  9283. +#define assert_rw_write_locked(lock) BUG_ON(0)
  9284. +#define assert_rw_read_locked(lock) BUG_ON(0)
  9285. +#define assert_rw_locked(lock) BUG_ON(0)
  9286. +#define assert_rw_not_write_locked(lock) BUG_ON(0)
  9287. +#define assert_rw_not_read_locked(lock) BUG_ON(0)
  9288. +#define assert_rw_not_locked(lock) BUG_ON(0)
  9289. +
  9290. +/* flags controlling debugging behavior. Are set through debug_flags=N mount
  9291. + option. */
  9292. +typedef enum {
  9293. + /* print a lot of information during panic. When this is on all jnodes
  9294. + * are listed. This can be *very* large output. Usually you don't want
  9295. + * this. Especially over serial line. */
  9296. + REISER4_VERBOSE_PANIC = 0x00000001,
  9297. + /* print a lot of information during umount */
  9298. + REISER4_VERBOSE_UMOUNT = 0x00000002,
  9299. + /* print gathered statistics on umount */
  9300. + REISER4_STATS_ON_UMOUNT = 0x00000004,
  9301. + /* check node consistency */
  9302. + REISER4_CHECK_NODE = 0x00000008
  9303. +} reiser4_debug_flags;
  9304. +
  9305. +extern int is_in_reiser4_context(void);
  9306. +
  9307. +/*
  9308. + * evaluate expression @e only if with reiser4 context
  9309. + */
  9310. +#define ON_CONTEXT(e) do { \
  9311. + if (is_in_reiser4_context()) { \
  9312. + e; \
  9313. + } } while (0)
  9314. +
  9315. +/*
  9316. + * evaluate expression @e only when within reiser4_context and debugging is
  9317. + * on.
  9318. + */
  9319. +#define ON_DEBUG_CONTEXT(e) ON_DEBUG(ON_CONTEXT(e))
  9320. +
  9321. +/*
  9322. + * complain about unexpected function result and crash. Used in "default"
  9323. + * branches of switch statements and alike to assert that invalid results are
  9324. + * not silently ignored.
  9325. + */
  9326. +#define wrong_return_value(label, function) \
  9327. + impossible(label, "wrong return value from " function)
  9328. +
  9329. +/* Issue different types of reiser4 messages to the console */
  9330. +#define warning(label, format, ...) \
  9331. + DCALL(KERN_WARNING, \
  9332. + printk, 1, label, "WARNING: " format , ## __VA_ARGS__)
  9333. +#define notice(label, format, ...) \
  9334. + DCALL(KERN_NOTICE, \
  9335. + printk, 1, label, "NOTICE: " format , ## __VA_ARGS__)
  9336. +
  9337. +/* mark not yet implemented functionality */
  9338. +#define not_yet(label, format, ...) \
  9339. + reiser4_panic(label, "NOT YET IMPLEMENTED: " format , ## __VA_ARGS__)
  9340. +
  9341. +extern void reiser4_do_panic(const char *format, ...)
  9342. + __attribute__ ((noreturn, format(printf, 1, 2)));
  9343. +
  9344. +extern int reiser4_preempt_point(void);
  9345. +extern void reiser4_print_stats(void);
  9346. +
  9347. +#if REISER4_DEBUG
  9348. +extern int reiser4_no_counters_are_held(void);
  9349. +extern int reiser4_commit_check_locks(void);
  9350. +#else
  9351. +#define reiser4_no_counters_are_held() (1)
  9352. +#define reiser4_commit_check_locks() (1)
  9353. +#endif
  9354. +
  9355. +/* true if @i is power-of-two. Useful for rate-limited warnings, etc. */
  9356. +#define IS_POW(i) \
  9357. +({ \
  9358. + typeof(i) __i; \
  9359. + \
  9360. + __i = (i); \
  9361. + !(__i & (__i - 1)); \
  9362. +})
  9363. +
  9364. +#define KERNEL_DEBUGGER (1)
  9365. +
  9366. +#if KERNEL_DEBUGGER
  9367. +
  9368. +extern void reiser4_debugtrap(void);
  9369. +
  9370. +/*
  9371. + * Check condition @cond and drop into kernel debugger (kgdb) if it's true. If
  9372. + * kgdb is not compiled in, do nothing.
  9373. + */
  9374. +#define DEBUGON(cond) \
  9375. +({ \
  9376. + if (unlikely(cond)) \
  9377. + reiser4_debugtrap(); \
  9378. +})
  9379. +#else
  9380. +#define DEBUGON(cond) noop
  9381. +#endif
  9382. +
  9383. +/*
  9384. + * Error code tracing facility. (Idea is borrowed from XFS code.)
  9385. + *
  9386. + * Suppose some strange and/or unexpected code is returned from some function
  9387. + * (for example, write(2) returns -EEXIST). It is possible to place a
  9388. + * breakpoint in the reiser4_write(), but it is too late here. How to find out
  9389. + * in what particular place -EEXIST was generated first?
  9390. + *
  9391. + * In reiser4 all places where actual error codes are produced (that is,
  9392. + * statements of the form
  9393. + *
  9394. + * return -EFOO; // (1), or
  9395. + *
  9396. + * result = -EFOO; // (2)
  9397. + *
  9398. + * are replaced with
  9399. + *
  9400. + * return RETERR(-EFOO); // (1a), and
  9401. + *
  9402. + * result = RETERR(-EFOO); // (2a) respectively
  9403. + *
  9404. + * RETERR() macro fills a backtrace in reiser4_context. This back-trace is
  9405. + * printed in error and warning messages. Moreover, it's possible to put a
  9406. + * conditional breakpoint in reiser4_return_err (low-level function called
  9407. + * by RETERR() to do the actual work) to break into debugger immediately
  9408. + * when particular error happens.
  9409. + *
  9410. + */
  9411. +
  9412. +#if REISER4_DEBUG
  9413. +
  9414. +/*
  9415. + * data-type to store information about where error happened ("error site").
  9416. + */
  9417. +typedef struct err_site {
  9418. + int code; /* error code */
  9419. + const char *file; /* source file, filled by __FILE__ */
  9420. + int line; /* source file line, filled by __LINE__ */
  9421. +} err_site;
  9422. +
  9423. +extern void reiser4_return_err(int code, const char *file, int line);
  9424. +
  9425. +/*
  9426. + * fill &get_current_context()->err_site with error information.
  9427. + */
  9428. +#define RETERR(code) \
  9429. +({ \
  9430. + typeof(code) __code; \
  9431. + \
  9432. + __code = (code); \
  9433. + reiser4_return_err(__code, __FILE__, __LINE__); \
  9434. + __code; \
  9435. +})
  9436. +
  9437. +#else
  9438. +
  9439. +/*
  9440. + * no-op versions of the above
  9441. + */
  9442. +
  9443. +typedef struct err_site {
  9444. +} err_site;
  9445. +#define RETERR(code) code
  9446. +#endif
  9447. +
  9448. +#if REISER4_LARGE_KEY
  9449. +/*
  9450. + * conditionally compile arguments only if REISER4_LARGE_KEY is on.
  9451. + */
  9452. +#define ON_LARGE_KEY(...) __VA_ARGS__
  9453. +#else
  9454. +#define ON_LARGE_KEY(...)
  9455. +#endif
  9456. +
  9457. +/* __FS_REISER4_DEBUG_H__ */
  9458. +#endif
  9459. +
  9460. +/* Make Linus happy.
  9461. + Local variables:
  9462. + c-indentation-style: "K&R"
  9463. + mode-name: "LC"
  9464. + c-basic-offset: 8
  9465. + tab-width: 8
  9466. + fill-column: 120
  9467. + End:
  9468. +*/
  9469. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/dformat.h linux-4.14.2/fs/reiser4/dformat.h
  9470. --- linux-4.14.2.orig/fs/reiser4/dformat.h 1970-01-01 01:00:00.000000000 +0100
  9471. +++ linux-4.14.2/fs/reiser4/dformat.h 2017-11-26 22:13:09.000000000 +0100
  9472. @@ -0,0 +1,73 @@
  9473. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  9474. + reiser4/README */
  9475. +
  9476. +/* Formats of on-disk data and conversion functions. */
  9477. +
  9478. +/* put all item formats in the files describing the particular items,
  9479. + our model is, everything you need to do to add an item to reiser4,
  9480. + (excepting the changes to the plugin that uses the item which go
  9481. + into the file defining that plugin), you put into one file. */
  9482. +/* Data on disk are stored in little-endian format.
  9483. + To declare fields of on-disk structures, use d8, d16, d32 and d64.
  9484. + d??tocpu() and cputod??() to convert. */
  9485. +
  9486. +#if !defined(__FS_REISER4_DFORMAT_H__)
  9487. +#define __FS_REISER4_DFORMAT_H__
  9488. +
  9489. +#include "debug.h"
  9490. +
  9491. +#include <asm/byteorder.h>
  9492. +#include <asm/unaligned.h>
  9493. +#include <linux/types.h>
  9494. +
  9495. +typedef __u8 d8;
  9496. +typedef __le16 d16;
  9497. +typedef __le32 d32;
  9498. +typedef __le64 d64;
  9499. +
  9500. +#define PACKED __attribute__((packed))
  9501. +
  9502. +/* data-type for block number */
  9503. +typedef __u64 reiser4_block_nr;
  9504. +
  9505. +/* data-type for block number on disk, disk format */
  9506. +typedef __le64 reiser4_dblock_nr;
  9507. +
  9508. +/**
  9509. + * disk_addr_eq - compare disk addresses
  9510. + * @b1: pointer to block number ot compare
  9511. + * @b2: pointer to block number ot compare
  9512. + *
  9513. + * Returns true if if disk addresses are the same
  9514. + */
  9515. +static inline int disk_addr_eq(const reiser4_block_nr * b1,
  9516. + const reiser4_block_nr * b2)
  9517. +{
  9518. + assert("nikita-1033", b1 != NULL);
  9519. + assert("nikita-1266", b2 != NULL);
  9520. +
  9521. + return !memcmp(b1, b2, sizeof *b1);
  9522. +}
  9523. +
  9524. +/* structure of master reiser4 super block */
  9525. +typedef struct reiser4_master_sb {
  9526. + char magic[16]; /* "ReIsEr4" */
  9527. + __le16 disk_plugin_id; /* id of disk layout plugin */
  9528. + __le16 blocksize;
  9529. + char uuid[16]; /* unique id */
  9530. + char label[16]; /* filesystem label */
  9531. + __le64 diskmap; /* location of the diskmap. 0 if not present */
  9532. +} reiser4_master_sb;
  9533. +
  9534. +/* __FS_REISER4_DFORMAT_H__ */
  9535. +#endif
  9536. +
  9537. +/*
  9538. + * Local variables:
  9539. + * c-indentation-style: "K&R"
  9540. + * mode-name: "LC"
  9541. + * c-basic-offset: 8
  9542. + * tab-width: 8
  9543. + * fill-column: 79
  9544. + * End:
  9545. + */
  9546. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/discard.c linux-4.14.2/fs/reiser4/discard.c
  9547. --- linux-4.14.2.orig/fs/reiser4/discard.c 1970-01-01 01:00:00.000000000 +0100
  9548. +++ linux-4.14.2/fs/reiser4/discard.c 2017-11-26 22:13:09.000000000 +0100
  9549. @@ -0,0 +1,179 @@
  9550. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  9551. + * reiser4/README */
  9552. +
  9553. +/* TRIM/discard interoperation subsystem for reiser4. */
  9554. +
  9555. +/*
  9556. + * This subsystem is responsible for populating an atom's ->discard_set and
  9557. + * (later) converting it into a series of discard calls to the kernel.
  9558. + *
  9559. + * The discard is an in-kernel interface for notifying the storage
  9560. + * hardware about blocks that are being logically freed by the filesystem.
  9561. + * This is done via calling the blkdev_issue_discard() function. There are
  9562. + * restrictions on block ranges: they should constitute at least one erase unit
  9563. + * in length and be correspondingly aligned. Otherwise a discard request will
  9564. + * be ignored.
  9565. + *
  9566. + * The erase unit size is kept in struct queue_limits as discard_granularity.
  9567. + * The offset from the partition start to the first erase unit is kept in
  9568. + * struct queue_limits as discard_alignment.
  9569. + *
  9570. + * At atom level, we record numbers of all blocks that happen to be deallocated
  9571. + * during the transaction. Then we read the generated set, filter out any blocks
  9572. + * that have since been allocated again and issue discards for everything still
  9573. + * valid. This is what discard.[ch] is here for.
  9574. + *
  9575. + * However, simply iterating through the recorded extents is not enough:
  9576. + * - if a single extent is smaller than the erase unit, then this particular
  9577. + * extent won't be discarded even if it is surrounded by enough free blocks
  9578. + * to constitute a whole erase unit;
  9579. + * - we won't be able to merge small adjacent extents forming an extent long
  9580. + * enough to be discarded.
  9581. + *
  9582. + * MECHANISM:
  9583. + *
  9584. + * During the transaction deallocated extents are recorded in atom's delete
  9585. + * set. In reiser4, there are two methods to deallocate a block:
  9586. + * 1. deferred deallocation, enabled by BA_DEFER flag to reiser4_dealloc_block().
  9587. + * In this mode, blocks are stored to delete set instead of being marked free
  9588. + * immediately. After committing the transaction, the delete set is "applied"
  9589. + * by the block allocator and all these blocks are marked free in memory
  9590. + * (see reiser4_post_write_back_hook()).
  9591. + * Space management plugins also read the delete set to update on-disk
  9592. + * allocation records (see reiser4_pre_commit_hook()).
  9593. + * 2. immediate deallocation (the opposite).
  9594. + * In this mode, blocks are marked free immediately. This is used by the
  9595. + * journal subsystem to manage space used by the journal records, so these
  9596. + * allocations are not visible to the space management plugins and never hit
  9597. + * the disk.
  9598. + *
  9599. + * When discard is enabled, all immediate deallocations become deferred. This
  9600. + * is OK because journal's allocations happen after reiser4_pre_commit_hook()
  9601. + * where the on-disk space allocation records are updated. So, in this mode
  9602. + * the atom's delete set becomes "the discard set" -- list of blocks that have
  9603. + * to be considered for discarding.
  9604. + *
  9605. + * Discarding is performed before completing deferred deallocations, hence all
  9606. + * extents in the discard set are still marked as allocated and cannot contain
  9607. + * any data. Thus we can avoid any checks for blocks directly present in the
  9608. + * discard set.
  9609. + *
  9610. + * For now, we don't perform "padding" of extents to erase unit boundaries.
  9611. + * This means if extents are not aligned with the device's erase unit lattice,
  9612. + * the partial erase units at head and tail of extents are truncated by kernel
  9613. + * (in blkdev_issue_discard()).
  9614. + *
  9615. + * So, at commit time the following actions take place:
  9616. + * - delete sets are merged to form the discard set;
  9617. + * - elements of the discard set are sorted;
  9618. + * - the discard set is iterated, joining any adjacent extents;
  9619. + * - for each extent, a single call to blkdev_issue_discard() is done.
  9620. + */
  9621. +
  9622. +#include "discard.h"
  9623. +#include "context.h"
  9624. +#include "debug.h"
  9625. +#include "txnmgr.h"
  9626. +#include "super.h"
  9627. +
  9628. +#include <linux/slab.h>
  9629. +#include <linux/fs.h>
  9630. +#include <linux/blkdev.h>
  9631. +
  9632. +static int __discard_extent(struct block_device *bdev, sector_t start,
  9633. + sector_t len)
  9634. +{
  9635. + assert("intelfx-21", bdev != NULL);
  9636. +
  9637. + return blkdev_issue_discard(bdev, start, len, reiser4_ctx_gfp_mask_get(),
  9638. + 0);
  9639. +}
  9640. +
  9641. +static int discard_extent(txn_atom *atom UNUSED_ARG,
  9642. + const reiser4_block_nr* start,
  9643. + const reiser4_block_nr* len,
  9644. + void *data UNUSED_ARG)
  9645. +{
  9646. + struct super_block *sb = reiser4_get_current_sb();
  9647. + struct block_device *bdev = sb->s_bdev;
  9648. +
  9649. + sector_t extent_start_sec, extent_len_sec;
  9650. +
  9651. + const int sec_per_blk = sb->s_blocksize >> 9;
  9652. +
  9653. + /* we assume block = N * sector */
  9654. + assert("intelfx-7", sec_per_blk > 0);
  9655. +
  9656. + /* convert extent to sectors */
  9657. + extent_start_sec = *start * sec_per_blk;
  9658. + extent_len_sec = *len * sec_per_blk;
  9659. +
  9660. + /* discard the extent, don't pad it to erase unit boundaries for now */
  9661. + return __discard_extent(bdev, extent_start_sec, extent_len_sec);
  9662. +}
  9663. +
  9664. +int discard_atom(txn_atom *atom, struct list_head *processed_set)
  9665. +{
  9666. + int ret;
  9667. + struct list_head discard_set;
  9668. +
  9669. + if (!reiser4_is_set(reiser4_get_current_sb(), REISER4_DISCARD)) {
  9670. + spin_unlock_atom(atom);
  9671. + return 0;
  9672. + }
  9673. +
  9674. + assert("intelfx-28", atom != NULL);
  9675. + assert("intelfx-59", processed_set != NULL);
  9676. +
  9677. + if (list_empty(&atom->discard.delete_set)) {
  9678. + /* Nothing left to discard. */
  9679. + spin_unlock_atom(atom);
  9680. + return 0;
  9681. + }
  9682. +
  9683. + /* Take the delete sets from the atom in order to release atom spinlock. */
  9684. + blocknr_list_init(&discard_set);
  9685. + blocknr_list_merge(&atom->discard.delete_set, &discard_set);
  9686. + spin_unlock_atom(atom);
  9687. +
  9688. + /* Sort the discard list, joining adjacent and overlapping extents. */
  9689. + blocknr_list_sort_and_join(&discard_set);
  9690. +
  9691. + /* Perform actual dirty work. */
  9692. + ret = blocknr_list_iterator(NULL, &discard_set, &discard_extent, NULL, 0);
  9693. +
  9694. + /* Add processed extents to the temporary list. */
  9695. + blocknr_list_merge(&discard_set, processed_set);
  9696. +
  9697. + if (ret != 0) {
  9698. + return ret;
  9699. + }
  9700. +
  9701. + /* Let's do this again for any new extents in the atom's discard set. */
  9702. + return -E_REPEAT;
  9703. +}
  9704. +
  9705. +void discard_atom_post(txn_atom *atom, struct list_head *processed_set)
  9706. +{
  9707. + assert("intelfx-60", atom != NULL);
  9708. + assert("intelfx-61", processed_set != NULL);
  9709. +
  9710. + if (!reiser4_is_set(reiser4_get_current_sb(), REISER4_DISCARD)) {
  9711. + spin_unlock_atom(atom);
  9712. + return;
  9713. + }
  9714. +
  9715. + blocknr_list_merge(processed_set, &atom->discard.delete_set);
  9716. + spin_unlock_atom(atom);
  9717. +}
  9718. +
  9719. +/* Make Linus happy.
  9720. + Local variables:
  9721. + c-indentation-style: "K&R"
  9722. + mode-name: "LC"
  9723. + c-basic-offset: 8
  9724. + tab-width: 8
  9725. + fill-column: 120
  9726. + scroll-step: 1
  9727. + End:
  9728. +*/
  9729. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/discard.h linux-4.14.2/fs/reiser4/discard.h
  9730. --- linux-4.14.2.orig/fs/reiser4/discard.h 1970-01-01 01:00:00.000000000 +0100
  9731. +++ linux-4.14.2/fs/reiser4/discard.h 2017-11-26 22:13:09.000000000 +0100
  9732. @@ -0,0 +1,42 @@
  9733. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  9734. + * reiser4/README */
  9735. +
  9736. +/* TRIM/discard interoperation subsystem for reiser4. */
  9737. +
  9738. +#if !defined(__FS_REISER4_DISCARD_H__)
  9739. +#define __FS_REISER4_DISCARD_H__
  9740. +
  9741. +#include "forward.h"
  9742. +#include "dformat.h"
  9743. +
  9744. +/**
  9745. + * Issue discard requests for all block extents recorded in @atom's delete sets,
  9746. + * if discard is enabled. The extents processed are removed from the @atom's
  9747. + * delete sets and stored in @processed_set.
  9748. + *
  9749. + * @atom must be locked on entry and is unlocked on exit.
  9750. + * @processed_set must be initialized with blocknr_list_init().
  9751. + */
  9752. +extern int discard_atom(txn_atom *atom, struct list_head *processed_set);
  9753. +
  9754. +/**
  9755. + * Splices @processed_set back to @atom's delete set.
  9756. + * Must be called after discard_atom() loop, using the same @processed_set.
  9757. + *
  9758. + * @atom must be locked on entry and is unlocked on exit.
  9759. + * @processed_set must be the same as passed to discard_atom().
  9760. + */
  9761. +extern void discard_atom_post(txn_atom *atom, struct list_head *processed_set);
  9762. +
  9763. +/* __FS_REISER4_DISCARD_H__ */
  9764. +#endif
  9765. +
  9766. +/* Make Linus happy.
  9767. + Local variables:
  9768. + c-indentation-style: "K&R"
  9769. + mode-name: "LC"
  9770. + c-basic-offset: 8
  9771. + tab-width: 8
  9772. + fill-column: 120
  9773. + End:
  9774. +*/
  9775. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/dscale.c linux-4.14.2/fs/reiser4/dscale.c
  9776. --- linux-4.14.2.orig/fs/reiser4/dscale.c 1970-01-01 01:00:00.000000000 +0100
  9777. +++ linux-4.14.2/fs/reiser4/dscale.c 2017-11-26 22:13:09.000000000 +0100
  9778. @@ -0,0 +1,192 @@
  9779. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  9780. + * reiser4/README */
  9781. +
  9782. +/* Scalable on-disk integers */
  9783. +
  9784. +/*
  9785. + * Various on-disk structures contain integer-like structures. Stat-data
  9786. + * contain [yes, "data" is plural, check the dictionary] file size, link
  9787. + * count; extent unit contains extent width etc. To accommodate for general
  9788. + * case enough space is reserved to keep largest possible value. 64 bits in
  9789. + * all cases above. But in overwhelming majority of cases numbers actually
  9790. + * stored in these fields will be comparatively small and reserving 8 bytes is
  9791. + * a waste of precious disk bandwidth.
  9792. + *
  9793. + * Scalable integers are one way to solve this problem. dscale_write()
  9794. + * function stores __u64 value in the given area consuming from 1 to 9 bytes,
  9795. + * depending on the magnitude of the value supplied. dscale_read() reads value
  9796. + * previously stored by dscale_write().
  9797. + *
  9798. + * dscale_write() produces format not completely unlike of UTF: two highest
  9799. + * bits of the first byte are used to store "tag". One of 4 possible tag
  9800. + * values is chosen depending on the number being encoded:
  9801. + *
  9802. + * 0 ... 0x3f => 0 [table 1]
  9803. + * 0x40 ... 0x3fff => 1
  9804. + * 0x4000 ... 0x3fffffff => 2
  9805. + * 0x40000000 ... 0xffffffffffffffff => 3
  9806. + *
  9807. + * (see dscale_range() function)
  9808. + *
  9809. + * Values in the range 0x40000000 ... 0xffffffffffffffff require 8 full bytes
  9810. + * to be stored, so in this case there is no place in the first byte to store
  9811. + * tag. For such values tag is stored in an extra 9th byte.
  9812. + *
  9813. + * As _highest_ bits are used for the test (which is natural) scaled integers
  9814. + * are stored in BIG-ENDIAN format in contrast with the rest of reiser4 which
  9815. + * uses LITTLE-ENDIAN.
  9816. + *
  9817. + */
  9818. +
  9819. +#include "debug.h"
  9820. +#include "dscale.h"
  9821. +
  9822. +/* return tag of scaled integer stored at @address */
  9823. +static int gettag(const unsigned char *address)
  9824. +{
  9825. + /* tag is stored in two highest bits */
  9826. + return (*address) >> 6;
  9827. +}
  9828. +
  9829. +/* clear tag from value. Clear tag embedded into @value. */
  9830. +static void cleartag(__u64 *value, int tag)
  9831. +{
  9832. + /*
  9833. + * W-w-what ?!
  9834. + *
  9835. + * Actually, this is rather simple: @value passed here was read by
  9836. + * dscale_read(), converted from BIG-ENDIAN, and padded to __u64 by
  9837. + * zeroes. Tag is still stored in the highest (arithmetically)
  9838. + * non-zero bits of @value, but relative position of tag within __u64
  9839. + * depends on @tag.
  9840. + *
  9841. + * For example if @tag is 0, it's stored 2 highest bits of lowest
  9842. + * byte, and its offset (counting from lowest bit) is 8 - 2 == 6 bits.
  9843. + *
  9844. + * If tag is 1, it's stored in two highest bits of 2nd lowest byte,
  9845. + * and it's offset if (2 * 8) - 2 == 14 bits.
  9846. + *
  9847. + * See table 1 above for details.
  9848. + *
  9849. + * All these cases are captured by the formula:
  9850. + */
  9851. + *value &= ~(3 << (((1 << tag) << 3) - 2));
  9852. + /*
  9853. + * That is, clear two (3 == 0t11) bits at the offset
  9854. + *
  9855. + * 8 * (2 ^ tag) - 2,
  9856. + *
  9857. + * that is, two highest bits of (2 ^ tag)-th byte of @value.
  9858. + */
  9859. +}
  9860. +
  9861. +/* return tag for @value. See table 1 above for details. */
  9862. +static int dscale_range(__u64 value)
  9863. +{
  9864. + if (value > 0x3fffffff)
  9865. + return 3;
  9866. + if (value > 0x3fff)
  9867. + return 2;
  9868. + if (value > 0x3f)
  9869. + return 1;
  9870. + return 0;
  9871. +}
  9872. +
  9873. +/* restore value stored at @adderss by dscale_write() and return number of
  9874. + * bytes consumed */
  9875. +int dscale_read(unsigned char *address, __u64 *value)
  9876. +{
  9877. + int tag;
  9878. +
  9879. + /* read tag */
  9880. + tag = gettag(address);
  9881. + switch (tag) {
  9882. + case 3:
  9883. + /* In this case tag is stored in an extra byte, skip this byte
  9884. + * and decode value stored in the next 8 bytes.*/
  9885. + *value = __be64_to_cpu(get_unaligned((__be64 *)(address + 1)));
  9886. + /* worst case: 8 bytes for value itself plus one byte for
  9887. + * tag. */
  9888. + return 9;
  9889. + case 0:
  9890. + *value = get_unaligned(address);
  9891. + break;
  9892. + case 1:
  9893. + *value = __be16_to_cpu(get_unaligned((__be16 *)address));
  9894. + break;
  9895. + case 2:
  9896. + *value = __be32_to_cpu(get_unaligned((__be32 *)address));
  9897. + break;
  9898. + default:
  9899. + return RETERR(-EIO);
  9900. + }
  9901. + /* clear tag embedded into @value */
  9902. + cleartag(value, tag);
  9903. + /* number of bytes consumed is (2 ^ tag)---see table 1. */
  9904. + return 1 << tag;
  9905. +}
  9906. +
  9907. +/* number of bytes consumed */
  9908. +int dscale_bytes_to_read(unsigned char *address)
  9909. +{
  9910. + int tag;
  9911. +
  9912. + tag = gettag(address);
  9913. + switch (tag) {
  9914. + case 0:
  9915. + case 1:
  9916. + case 2:
  9917. + return 1 << tag;
  9918. + case 3:
  9919. + return 9;
  9920. + default:
  9921. + return RETERR(-EIO);
  9922. + }
  9923. +}
  9924. +
  9925. +/* store @value at @address and return number of bytes consumed */
  9926. +int dscale_write(unsigned char *address, __u64 value)
  9927. +{
  9928. + int tag;
  9929. + int shift;
  9930. + __be64 v;
  9931. + unsigned char *valarr;
  9932. +
  9933. + tag = dscale_range(value);
  9934. + v = __cpu_to_be64(value);
  9935. + valarr = (unsigned char *)&v;
  9936. + shift = (tag == 3) ? 1 : 0;
  9937. + memcpy(address + shift, valarr + sizeof v - (1 << tag), 1 << tag);
  9938. + *address |= (tag << 6);
  9939. + return shift + (1 << tag);
  9940. +}
  9941. +
  9942. +/* number of bytes required to store @value */
  9943. +int dscale_bytes_to_write(__u64 value)
  9944. +{
  9945. + int bytes;
  9946. +
  9947. + bytes = 1 << dscale_range(value);
  9948. + if (bytes == 8)
  9949. + ++bytes;
  9950. + return bytes;
  9951. +}
  9952. +
  9953. +/* returns true if @value and @other require the same number of bytes to be
  9954. + * stored. Used by detect when data structure (like stat-data) has to be
  9955. + * expanded or contracted. */
  9956. +int dscale_fit(__u64 value, __u64 other)
  9957. +{
  9958. + return dscale_range(value) == dscale_range(other);
  9959. +}
  9960. +
  9961. +/* Make Linus happy.
  9962. + Local variables:
  9963. + c-indentation-style: "K&R"
  9964. + mode-name: "LC"
  9965. + c-basic-offset: 8
  9966. + tab-width: 8
  9967. + fill-column: 120
  9968. + scroll-step: 1
  9969. + End:
  9970. +*/
  9971. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/dscale.h linux-4.14.2/fs/reiser4/dscale.h
  9972. --- linux-4.14.2.orig/fs/reiser4/dscale.h 1970-01-01 01:00:00.000000000 +0100
  9973. +++ linux-4.14.2/fs/reiser4/dscale.h 2017-11-26 22:13:09.000000000 +0100
  9974. @@ -0,0 +1,28 @@
  9975. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  9976. + * reiser4/README */
  9977. +
  9978. +/* Scalable on-disk integers. See dscale.h for details. */
  9979. +
  9980. +#if !defined(__FS_REISER4_DSCALE_H__)
  9981. +#define __FS_REISER4_DSCALE_H__
  9982. +
  9983. +#include "dformat.h"
  9984. +
  9985. +extern int dscale_read(unsigned char *address, __u64 *value);
  9986. +extern int dscale_write(unsigned char *address, __u64 value);
  9987. +extern int dscale_bytes_to_read(unsigned char *address);
  9988. +extern int dscale_bytes_to_write(__u64 value);
  9989. +extern int dscale_fit(__u64 value, __u64 other);
  9990. +
  9991. +/* __FS_REISER4_DSCALE_H__ */
  9992. +#endif
  9993. +
  9994. +/* Make Linus happy.
  9995. + Local variables:
  9996. + c-indentation-style: "K&R"
  9997. + mode-name: "LC"
  9998. + c-basic-offset: 8
  9999. + tab-width: 8
  10000. + fill-column: 120
  10001. + End:
  10002. +*/
  10003. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/entd.c linux-4.14.2/fs/reiser4/entd.c
  10004. --- linux-4.14.2.orig/fs/reiser4/entd.c 1970-01-01 01:00:00.000000000 +0100
  10005. +++ linux-4.14.2/fs/reiser4/entd.c 2017-11-26 22:13:09.000000000 +0100
  10006. @@ -0,0 +1,361 @@
  10007. +/* Copyright 2003, 2004 by Hans Reiser, licensing governed by
  10008. + * reiser4/README */
  10009. +
  10010. +/* Ent daemon. */
  10011. +
  10012. +#include "debug.h"
  10013. +#include "txnmgr.h"
  10014. +#include "tree.h"
  10015. +#include "entd.h"
  10016. +#include "super.h"
  10017. +#include "context.h"
  10018. +#include "reiser4.h"
  10019. +#include "vfs_ops.h"
  10020. +#include "page_cache.h"
  10021. +#include "inode.h"
  10022. +
  10023. +#include <linux/sched.h> /* struct task_struct */
  10024. +#include <linux/suspend.h>
  10025. +#include <linux/kernel.h>
  10026. +#include <linux/writeback.h>
  10027. +#include <linux/time.h> /* INITIAL_JIFFIES */
  10028. +#include <linux/backing-dev.h> /* bdi_write_congested */
  10029. +#include <linux/wait.h>
  10030. +#include <linux/kthread.h>
  10031. +#include <linux/freezer.h>
  10032. +
  10033. +#define DEF_PRIORITY 12
  10034. +#define MAX_ENTD_ITERS 10
  10035. +
  10036. +static void entd_flush(struct super_block *, struct wbq *);
  10037. +static int entd(void *arg);
  10038. +
  10039. +/*
  10040. + * set ->comm field of end thread to make its state visible to the user level
  10041. + */
  10042. +#define entd_set_comm(state) \
  10043. + snprintf(current->comm, sizeof(current->comm), \
  10044. + "ent:%s%s", super->s_id, (state))
  10045. +
  10046. +/**
  10047. + * reiser4_init_entd - initialize entd context and start kernel daemon
  10048. + * @super: super block to start ent thread for
  10049. + *
  10050. + * Creates entd contexts, starts kernel thread and waits until it
  10051. + * initializes.
  10052. + */
  10053. +int reiser4_init_entd(struct super_block *super)
  10054. +{
  10055. + entd_context *ctx;
  10056. +
  10057. + assert("nikita-3104", super != NULL);
  10058. +
  10059. + ctx = get_entd_context(super);
  10060. +
  10061. + memset(ctx, 0, sizeof *ctx);
  10062. + spin_lock_init(&ctx->guard);
  10063. + init_waitqueue_head(&ctx->wait);
  10064. +#if REISER4_DEBUG
  10065. + INIT_LIST_HEAD(&ctx->flushers_list);
  10066. +#endif
  10067. + /* lists of writepage requests */
  10068. + INIT_LIST_HEAD(&ctx->todo_list);
  10069. + INIT_LIST_HEAD(&ctx->done_list);
  10070. + /* start entd */
  10071. + ctx->tsk = kthread_run(entd, super, "ent:%s", super->s_id);
  10072. + if (IS_ERR(ctx->tsk))
  10073. + return PTR_ERR(ctx->tsk);
  10074. + return 0;
  10075. +}
  10076. +
  10077. +static void put_wbq(struct wbq *rq)
  10078. +{
  10079. + iput(rq->mapping->host);
  10080. + complete(&rq->completion);
  10081. +}
  10082. +
  10083. +/* ent should be locked */
  10084. +static struct wbq *__get_wbq(entd_context * ent)
  10085. +{
  10086. + struct wbq *wbq;
  10087. +
  10088. + if (list_empty(&ent->todo_list))
  10089. + return NULL;
  10090. +
  10091. + ent->nr_todo_reqs--;
  10092. + wbq = list_entry(ent->todo_list.next, struct wbq, link);
  10093. + list_del_init(&wbq->link);
  10094. + return wbq;
  10095. +}
  10096. +
  10097. +/* ent thread function */
  10098. +static int entd(void *arg)
  10099. +{
  10100. + struct super_block *super;
  10101. + entd_context *ent;
  10102. + int done = 0;
  10103. +
  10104. + super = arg;
  10105. + /* do_fork() just copies task_struct into the new
  10106. + thread. ->fs_context shouldn't be copied of course. This shouldn't
  10107. + be a problem for the rest of the code though.
  10108. + */
  10109. + current->journal_info = NULL;
  10110. +
  10111. + ent = get_entd_context(super);
  10112. +
  10113. + while (!done) {
  10114. + try_to_freeze();
  10115. +
  10116. + spin_lock(&ent->guard);
  10117. + while (ent->nr_todo_reqs != 0) {
  10118. + struct wbq *rq;
  10119. +
  10120. + assert("", list_empty(&ent->done_list));
  10121. +
  10122. + /* take request from the queue head */
  10123. + rq = __get_wbq(ent);
  10124. + assert("", rq != NULL);
  10125. + ent->cur_request = rq;
  10126. + spin_unlock(&ent->guard);
  10127. +
  10128. + entd_set_comm("!");
  10129. + entd_flush(super, rq);
  10130. +
  10131. + put_wbq(rq);
  10132. +
  10133. + /*
  10134. + * wakeup all requestors and iput their inodes
  10135. + */
  10136. + spin_lock(&ent->guard);
  10137. + while (!list_empty(&ent->done_list)) {
  10138. + rq = list_entry(ent->done_list.next, struct wbq, link);
  10139. + list_del_init(&rq->link);
  10140. + ent->nr_done_reqs--;
  10141. + spin_unlock(&ent->guard);
  10142. + assert("", rq->written == 1);
  10143. + put_wbq(rq);
  10144. + spin_lock(&ent->guard);
  10145. + }
  10146. + }
  10147. + spin_unlock(&ent->guard);
  10148. +
  10149. + entd_set_comm(".");
  10150. +
  10151. + {
  10152. + DEFINE_WAIT(__wait);
  10153. +
  10154. + do {
  10155. + prepare_to_wait(&ent->wait, &__wait, TASK_INTERRUPTIBLE);
  10156. + if (kthread_should_stop()) {
  10157. + done = 1;
  10158. + break;
  10159. + }
  10160. + if (ent->nr_todo_reqs != 0)
  10161. + break;
  10162. + schedule();
  10163. + } while (0);
  10164. + finish_wait(&ent->wait, &__wait);
  10165. + }
  10166. + }
  10167. + BUG_ON(ent->nr_todo_reqs != 0);
  10168. + return 0;
  10169. +}
  10170. +
  10171. +/**
  10172. + * reiser4_done_entd - stop entd kernel thread
  10173. + * @super: super block to stop ent thread for
  10174. + *
  10175. + * It is called on umount. Sends stop signal to entd and wait until it handles
  10176. + * it.
  10177. + */
  10178. +void reiser4_done_entd(struct super_block *super)
  10179. +{
  10180. + entd_context *ent;
  10181. +
  10182. + assert("nikita-3103", super != NULL);
  10183. +
  10184. + ent = get_entd_context(super);
  10185. + assert("zam-1055", ent->tsk != NULL);
  10186. + kthread_stop(ent->tsk);
  10187. +}
  10188. +
  10189. +/* called at the beginning of jnode_flush to register flusher thread with ent
  10190. + * daemon */
  10191. +void reiser4_enter_flush(struct super_block *super)
  10192. +{
  10193. + entd_context *ent;
  10194. +
  10195. + assert("zam-1029", super != NULL);
  10196. + ent = get_entd_context(super);
  10197. +
  10198. + assert("zam-1030", ent != NULL);
  10199. +
  10200. + spin_lock(&ent->guard);
  10201. + ent->flushers++;
  10202. +#if REISER4_DEBUG
  10203. + list_add(&get_current_context()->flushers_link, &ent->flushers_list);
  10204. +#endif
  10205. + spin_unlock(&ent->guard);
  10206. +}
  10207. +
  10208. +/* called at the end of jnode_flush */
  10209. +void reiser4_leave_flush(struct super_block *super)
  10210. +{
  10211. + entd_context *ent;
  10212. + int wake_up_ent;
  10213. +
  10214. + assert("zam-1027", super != NULL);
  10215. + ent = get_entd_context(super);
  10216. +
  10217. + assert("zam-1028", ent != NULL);
  10218. +
  10219. + spin_lock(&ent->guard);
  10220. + ent->flushers--;
  10221. + wake_up_ent = (ent->flushers == 0 && ent->nr_todo_reqs != 0);
  10222. +#if REISER4_DEBUG
  10223. + list_del_init(&get_current_context()->flushers_link);
  10224. +#endif
  10225. + spin_unlock(&ent->guard);
  10226. + if (wake_up_ent)
  10227. + wake_up_process(ent->tsk);
  10228. +}
  10229. +
  10230. +#define ENTD_CAPTURE_APAGE_BURST SWAP_CLUSTER_MAX
  10231. +
  10232. +static void entd_flush(struct super_block *super, struct wbq *rq)
  10233. +{
  10234. + reiser4_context ctx;
  10235. +
  10236. + init_stack_context(&ctx, super);
  10237. + ctx.entd = 1;
  10238. + ctx.gfp_mask = GFP_NOFS;
  10239. +
  10240. + rq->wbc->range_start = page_offset(rq->page);
  10241. + rq->wbc->range_end = rq->wbc->range_start +
  10242. + (ENTD_CAPTURE_APAGE_BURST << PAGE_SHIFT);
  10243. +
  10244. +
  10245. + rq->mapping->a_ops->writepages(rq->mapping, rq->wbc);
  10246. +
  10247. + if (rq->wbc->nr_to_write > 0) {
  10248. + long result;
  10249. + struct bdi_writeback *wb;
  10250. + struct wb_writeback_work work = {
  10251. + .sb = super,
  10252. + .sync_mode = WB_SYNC_NONE,
  10253. + .nr_pages = LONG_MAX,
  10254. + .range_cyclic = 0,
  10255. + .reason = WB_REASON_VMSCAN,
  10256. + };
  10257. + rq->wbc->sync_mode = work.sync_mode,
  10258. + rq->wbc->range_cyclic = work.range_cyclic,
  10259. + rq->wbc->range_start = 0;
  10260. + rq->wbc->range_end = LLONG_MAX;
  10261. + /*
  10262. + * we don't need to pin superblock for writeback:
  10263. + * this is implicitly pinned by write_page_by_ent
  10264. + * (via igrab), so that shutdown_super() will wait
  10265. + * (on reiser4_put_super) for entd completion.
  10266. + */
  10267. + wb = &inode_to_bdi(rq->mapping->host)->wb;
  10268. +
  10269. + spin_lock(&wb->list_lock);
  10270. + result = generic_writeback_sb_inodes(super,
  10271. + wb,
  10272. + rq->wbc,
  10273. + &work,
  10274. + true);
  10275. + spin_unlock(&wb->list_lock);
  10276. + }
  10277. + rq->wbc->nr_to_write = ENTD_CAPTURE_APAGE_BURST;
  10278. +
  10279. + reiser4_writeout(super, rq->wbc);
  10280. + context_set_commit_async(&ctx);
  10281. + reiser4_exit_context(&ctx);
  10282. +}
  10283. +
  10284. +/**
  10285. + * write_page_by_ent - ask entd thread to flush this page as part of slum
  10286. + * @page: page to be written
  10287. + * @wbc: writeback control passed to reiser4_writepage
  10288. + *
  10289. + * Creates a request, puts it on entd list of requests, wakeups entd if
  10290. + * necessary, waits until entd completes with the request.
  10291. + */
  10292. +int write_page_by_ent(struct page *page, struct writeback_control *wbc)
  10293. +{
  10294. + struct super_block *sb;
  10295. + struct inode *inode;
  10296. + entd_context *ent;
  10297. + struct wbq rq;
  10298. +
  10299. + assert("", PageLocked(page));
  10300. + assert("", page->mapping != NULL);
  10301. +
  10302. + sb = page->mapping->host->i_sb;
  10303. + ent = get_entd_context(sb);
  10304. + assert("", ent && ent->done == 0);
  10305. +
  10306. + /*
  10307. + * we are going to unlock page and ask ent thread to write the
  10308. + * page. Re-dirty page before unlocking so that if ent thread fails to
  10309. + * write it - it will remain dirty
  10310. + */
  10311. + set_page_dirty_notag(page);
  10312. + account_page_redirty(page);
  10313. +
  10314. + /*
  10315. + * pin inode in memory, unlock page, entd_flush will iput. We can not
  10316. + * iput here becasue we can not allow delete_inode to be called here
  10317. + */
  10318. + inode = igrab(page->mapping->host);
  10319. + unlock_page(page);
  10320. + if (inode == NULL)
  10321. + /* inode is getting freed */
  10322. + return 0;
  10323. +
  10324. + /* init wbq */
  10325. + INIT_LIST_HEAD(&rq.link);
  10326. + rq.magic = WBQ_MAGIC;
  10327. + rq.wbc = wbc;
  10328. + rq.page = page;
  10329. + rq.mapping = inode->i_mapping;
  10330. + rq.node = NULL;
  10331. + rq.written = 0;
  10332. + init_completion(&rq.completion);
  10333. +
  10334. + /* add request to entd's list of writepage requests */
  10335. + spin_lock(&ent->guard);
  10336. + ent->nr_todo_reqs++;
  10337. + list_add_tail(&rq.link, &ent->todo_list);
  10338. + if (ent->nr_todo_reqs == 1)
  10339. + wake_up_process(ent->tsk);
  10340. +
  10341. + spin_unlock(&ent->guard);
  10342. +
  10343. + /* wait until entd finishes */
  10344. + wait_for_completion(&rq.completion);
  10345. +
  10346. + if (rq.written)
  10347. + /* Eventually ENTD has written the page to disk. */
  10348. + return 0;
  10349. + return 0;
  10350. +}
  10351. +
  10352. +int wbq_available(void)
  10353. +{
  10354. + struct super_block *sb = reiser4_get_current_sb();
  10355. + entd_context *ent = get_entd_context(sb);
  10356. + return ent->nr_todo_reqs;
  10357. +}
  10358. +
  10359. +/*
  10360. + * Local variables:
  10361. + * c-indentation-style: "K&R"
  10362. + * mode-name: "LC"
  10363. + * c-basic-offset: 8
  10364. + * tab-width: 8
  10365. + * fill-column: 79
  10366. + * End:
  10367. + */
  10368. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/entd.h linux-4.14.2/fs/reiser4/entd.h
  10369. --- linux-4.14.2.orig/fs/reiser4/entd.h 1970-01-01 01:00:00.000000000 +0100
  10370. +++ linux-4.14.2/fs/reiser4/entd.h 2017-11-26 22:13:09.000000000 +0100
  10371. @@ -0,0 +1,90 @@
  10372. +/* Copyright 2003 by Hans Reiser, licensing governed by reiser4/README */
  10373. +
  10374. +/* Ent daemon. */
  10375. +
  10376. +#ifndef __ENTD_H__
  10377. +#define __ENTD_H__
  10378. +
  10379. +#include "context.h"
  10380. +
  10381. +#include <linux/fs.h>
  10382. +#include <linux/completion.h>
  10383. +#include <linux/wait.h>
  10384. +#include <linux/spinlock.h>
  10385. +#include <linux/sched.h> /* for struct task_struct */
  10386. +
  10387. +#define WBQ_MAGIC 0x7876dc76
  10388. +
  10389. +/* write-back request. */
  10390. +struct wbq {
  10391. + int magic;
  10392. + struct list_head link; /* list head of this list is in entd context */
  10393. + struct writeback_control *wbc;
  10394. + struct page *page;
  10395. + struct address_space *mapping;
  10396. + struct completion completion;
  10397. + jnode *node; /* set if ent thread captured requested page */
  10398. + int written; /* set if ent thread wrote requested page */
  10399. +};
  10400. +
  10401. +/* ent-thread context. This is used to synchronize starting/stopping ent
  10402. + * threads. */
  10403. +typedef struct entd_context {
  10404. + /* wait queue that ent thread waits on for more work. It's
  10405. + * signaled by write_page_by_ent(). */
  10406. + wait_queue_head_t wait;
  10407. + /* spinlock protecting other fields */
  10408. + spinlock_t guard;
  10409. + /* ent thread */
  10410. + struct task_struct *tsk;
  10411. + /* set to indicate that ent thread should leave. */
  10412. + int done;
  10413. + /* counter of active flushers */
  10414. + int flushers;
  10415. + /*
  10416. + * when reiser4_writepage asks entd to write a page - it adds struct
  10417. + * wbq to this list
  10418. + */
  10419. + struct list_head todo_list;
  10420. + /* number of elements on the above list */
  10421. + int nr_todo_reqs;
  10422. +
  10423. + struct wbq *cur_request;
  10424. + /*
  10425. + * when entd writes a page it moves write-back request from todo_list
  10426. + * to done_list. This list is used at the end of entd iteration to
  10427. + * wakeup requestors and iput inodes.
  10428. + */
  10429. + struct list_head done_list;
  10430. + /* number of elements on the above list */
  10431. + int nr_done_reqs;
  10432. +
  10433. +#if REISER4_DEBUG
  10434. + /* list of all active flushers */
  10435. + struct list_head flushers_list;
  10436. +#endif
  10437. +} entd_context;
  10438. +
  10439. +extern int reiser4_init_entd(struct super_block *);
  10440. +extern void reiser4_done_entd(struct super_block *);
  10441. +
  10442. +extern void reiser4_enter_flush(struct super_block *);
  10443. +extern void reiser4_leave_flush(struct super_block *);
  10444. +
  10445. +extern int write_page_by_ent(struct page *, struct writeback_control *);
  10446. +extern int wbq_available(void);
  10447. +extern void ent_writes_page(struct super_block *, struct page *);
  10448. +
  10449. +extern jnode *get_jnode_by_wbq(struct super_block *, struct wbq *);
  10450. +/* __ENTD_H__ */
  10451. +#endif
  10452. +
  10453. +/* Make Linus happy.
  10454. + Local variables:
  10455. + c-indentation-style: "K&R"
  10456. + mode-name: "LC"
  10457. + c-basic-offset: 8
  10458. + tab-width: 8
  10459. + fill-column: 120
  10460. + End:
  10461. +*/
  10462. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/eottl.c linux-4.14.2/fs/reiser4/eottl.c
  10463. --- linux-4.14.2.orig/fs/reiser4/eottl.c 1970-01-01 01:00:00.000000000 +0100
  10464. +++ linux-4.14.2/fs/reiser4/eottl.c 2017-11-26 22:13:09.000000000 +0100
  10465. @@ -0,0 +1,510 @@
  10466. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  10467. + reiser4/README */
  10468. +
  10469. +#include "forward.h"
  10470. +#include "debug.h"
  10471. +#include "key.h"
  10472. +#include "coord.h"
  10473. +#include "plugin/item/item.h"
  10474. +#include "plugin/node/node.h"
  10475. +#include "znode.h"
  10476. +#include "block_alloc.h"
  10477. +#include "tree_walk.h"
  10478. +#include "tree_mod.h"
  10479. +#include "carry.h"
  10480. +#include "tree.h"
  10481. +#include "super.h"
  10482. +
  10483. +#include <linux/types.h> /* for __u?? */
  10484. +
  10485. +/*
  10486. + * Extents on the twig level (EOTTL) handling.
  10487. + *
  10488. + * EOTTL poses some problems to the tree traversal, that are better explained
  10489. + * by example.
  10490. + *
  10491. + * Suppose we have block B1 on the twig level with the following items:
  10492. + *
  10493. + * 0. internal item I0 with key (0:0:0:0) (locality, key-type, object-id,
  10494. + * offset)
  10495. + * 1. extent item E1 with key (1:4:100:0), having 10 blocks of 4k each
  10496. + * 2. internal item I2 with key (10:0:0:0)
  10497. + *
  10498. + * We are trying to insert item with key (5:0:0:0). Lookup finds node B1, and
  10499. + * then intra-node lookup is done. This lookup finished on the E1, because the
  10500. + * key we are looking for is larger than the key of E1 and is smaller than key
  10501. + * the of I2.
  10502. + *
  10503. + * Here search is stuck.
  10504. + *
  10505. + * After some thought it is clear what is wrong here: extents on the twig level
  10506. + * break some basic property of the *search* tree (on the pretext, that they
  10507. + * restore property of balanced tree).
  10508. + *
  10509. + * Said property is the following: if in the internal node of the search tree
  10510. + * we have [ ... Key1 Pointer Key2 ... ] then, all data that are or will be
  10511. + * keyed in the tree with the Key such that Key1 <= Key < Key2 are accessible
  10512. + * through the Pointer.
  10513. + *
  10514. + * This is not true, when Pointer is Extent-Pointer, simply because extent
  10515. + * cannot expand indefinitely to the right to include any item with
  10516. + *
  10517. + * Key1 <= Key <= Key2.
  10518. + *
  10519. + * For example, our E1 extent is only responsible for the data with keys
  10520. + *
  10521. + * (1:4:100:0) <= key <= (1:4:100:0xffffffffffffffff), and
  10522. + *
  10523. + * so, key range
  10524. + *
  10525. + * ( (1:4:100:0xffffffffffffffff), (10:0:0:0) )
  10526. + *
  10527. + * is orphaned: there is no way to get there from the tree root.
  10528. + *
  10529. + * In other words, extent pointers are different than normal child pointers as
  10530. + * far as search tree is concerned, and this creates such problems.
  10531. + *
  10532. + * Possible solution for this problem is to insert our item into node pointed
  10533. + * to by I2. There are some problems through:
  10534. + *
  10535. + * (1) I2 can be in a different node.
  10536. + * (2) E1 can be immediately followed by another extent E2.
  10537. + *
  10538. + * (1) is solved by calling reiser4_get_right_neighbor() and accounting
  10539. + * for locks/coords as necessary.
  10540. + *
  10541. + * (2) is more complex. Solution here is to insert new empty leaf node and
  10542. + * insert internal item between E1 and E2 pointing to said leaf node. This is
  10543. + * further complicated by possibility that E2 is in a different node, etc.
  10544. + *
  10545. + * Problems:
  10546. + *
  10547. + * (1) if there was internal item I2 immediately on the right of an extent E1
  10548. + * we and we decided to insert new item S1 into node N2 pointed to by I2, then
  10549. + * key of S1 will be less than smallest key in the N2. Normally, search key
  10550. + * checks that key we are looking for is in the range of keys covered by the
  10551. + * node key is being looked in. To work around of this situation, while
  10552. + * preserving useful consistency check new flag CBK_TRUST_DK was added to the
  10553. + * cbk falgs bitmask. This flag is automatically set on entrance to the
  10554. + * coord_by_key() and is only cleared when we are about to enter situation
  10555. + * described above.
  10556. + *
  10557. + * (2) If extent E1 is immediately followed by another extent E2 and we are
  10558. + * searching for the key that is between E1 and E2 we only have to insert new
  10559. + * empty leaf node when coord_by_key was called for insertion, rather than just
  10560. + * for lookup. To distinguish these cases, new flag CBK_FOR_INSERT was added to
  10561. + * the cbk falgs bitmask. This flag is automatically set by coord_by_key calls
  10562. + * performed by insert_by_key() and friends.
  10563. + *
  10564. + * (3) Insertion of new empty leaf node (possibly) requires balancing. In any
  10565. + * case it requires modification of node content which is only possible under
  10566. + * write lock. It may well happen that we only have read lock on the node where
  10567. + * new internal pointer is to be inserted (common case: lookup of non-existent
  10568. + * stat-data that fells between two extents). If only read lock is held, tree
  10569. + * traversal is restarted with lock_level modified so that next time we hit
  10570. + * this problem, write lock will be held. Once we have write lock, balancing
  10571. + * will be performed.
  10572. + */
  10573. +
  10574. +/**
  10575. + * is_next_item_internal - check whether next item is internal
  10576. + * @coord: coordinate of extent item in twig node
  10577. + * @key: search key
  10578. + * @lh: twig node lock handle
  10579. + *
  10580. + * Looks at the unit next to @coord. If it is an internal one - 1 is returned,
  10581. + * @coord is set to that unit. If that unit is in right neighbor, @lh is moved
  10582. + * to that node, @coord is set to its first unit. If next item is not internal
  10583. + * or does not exist then 0 is returned, @coord and @lh are left unchanged. 2
  10584. + * is returned if search restart has to be done.
  10585. + */
  10586. +static int
  10587. +is_next_item_internal(coord_t *coord, const reiser4_key * key,
  10588. + lock_handle * lh)
  10589. +{
  10590. + coord_t next;
  10591. + lock_handle rn;
  10592. + int result;
  10593. +
  10594. + coord_dup(&next, coord);
  10595. + if (coord_next_unit(&next) == 0) {
  10596. + /* next unit is in this node */
  10597. + if (item_is_internal(&next)) {
  10598. + coord_dup(coord, &next);
  10599. + return 1;
  10600. + }
  10601. + assert("vs-3", item_is_extent(&next));
  10602. + return 0;
  10603. + }
  10604. +
  10605. + /*
  10606. + * next unit either does not exist or is in right neighbor. If it is in
  10607. + * right neighbor we have to check right delimiting key because
  10608. + * concurrent thread could get their first and insert item with a key
  10609. + * smaller than @key
  10610. + */
  10611. + read_lock_dk(current_tree);
  10612. + result = keycmp(key, znode_get_rd_key(coord->node));
  10613. + read_unlock_dk(current_tree);
  10614. + assert("vs-6", result != EQUAL_TO);
  10615. + if (result == GREATER_THAN)
  10616. + return 2;
  10617. +
  10618. + /* lock right neighbor */
  10619. + init_lh(&rn);
  10620. + result = reiser4_get_right_neighbor(&rn, coord->node,
  10621. + znode_is_wlocked(coord->node) ?
  10622. + ZNODE_WRITE_LOCK : ZNODE_READ_LOCK,
  10623. + GN_CAN_USE_UPPER_LEVELS);
  10624. + if (result == -E_NO_NEIGHBOR) {
  10625. + /* we are on the rightmost edge of the tree */
  10626. + done_lh(&rn);
  10627. + return 0;
  10628. + }
  10629. +
  10630. + if (result) {
  10631. + assert("vs-4", result < 0);
  10632. + done_lh(&rn);
  10633. + return result;
  10634. + }
  10635. +
  10636. + /*
  10637. + * check whether concurrent thread managed to insert item with a key
  10638. + * smaller than @key
  10639. + */
  10640. + read_lock_dk(current_tree);
  10641. + result = keycmp(key, znode_get_ld_key(rn.node));
  10642. + read_unlock_dk(current_tree);
  10643. + assert("vs-6", result != EQUAL_TO);
  10644. + if (result == GREATER_THAN) {
  10645. + done_lh(&rn);
  10646. + return 2;
  10647. + }
  10648. +
  10649. + result = zload(rn.node);
  10650. + if (result) {
  10651. + assert("vs-5", result < 0);
  10652. + done_lh(&rn);
  10653. + return result;
  10654. + }
  10655. +
  10656. + coord_init_first_unit(&next, rn.node);
  10657. + if (item_is_internal(&next)) {
  10658. + /*
  10659. + * next unit is in right neighbor and it is an unit of internal
  10660. + * item. Unlock coord->node. Move @lh to right neighbor. @coord
  10661. + * is set to the first unit of right neighbor.
  10662. + */
  10663. + coord_dup(coord, &next);
  10664. + zrelse(rn.node);
  10665. + done_lh(lh);
  10666. + move_lh(lh, &rn);
  10667. + return 1;
  10668. + }
  10669. +
  10670. + /*
  10671. + * next unit is unit of extent item. Return without chaning @lh and
  10672. + * @coord.
  10673. + */
  10674. + assert("vs-6", item_is_extent(&next));
  10675. + zrelse(rn.node);
  10676. + done_lh(&rn);
  10677. + return 0;
  10678. +}
  10679. +
  10680. +/**
  10681. + * rd_key - calculate key of an item next to the given one
  10682. + * @coord: position in a node
  10683. + * @key: storage for result key
  10684. + *
  10685. + * @coord is set between items or after the last item in a node. Calculate key
  10686. + * of item to the right of @coord.
  10687. + */
  10688. +static reiser4_key *rd_key(const coord_t *coord, reiser4_key *key)
  10689. +{
  10690. + coord_t dup;
  10691. +
  10692. + assert("nikita-2281", coord_is_between_items(coord));
  10693. + coord_dup(&dup, coord);
  10694. +
  10695. + if (coord_set_to_right(&dup) == 0)
  10696. + /* next item is in this node. Return its key. */
  10697. + unit_key_by_coord(&dup, key);
  10698. + else {
  10699. + /*
  10700. + * next item either does not exist or is in right
  10701. + * neighbor. Return znode's right delimiting key.
  10702. + */
  10703. + read_lock_dk(current_tree);
  10704. + *key = *znode_get_rd_key(coord->node);
  10705. + read_unlock_dk(current_tree);
  10706. + }
  10707. + return key;
  10708. +}
  10709. +
  10710. +/**
  10711. + * add_empty_leaf - insert empty leaf between two extents
  10712. + * @insert_coord: position in twig node between two extents
  10713. + * @lh: twig node lock handle
  10714. + * @key: left delimiting key of new node
  10715. + * @rdkey: right delimiting key of new node
  10716. + *
  10717. + * Inserts empty leaf node between two extent items. It is necessary when we
  10718. + * have to insert an item on leaf level between two extents (items on the twig
  10719. + * level).
  10720. + */
  10721. +static int
  10722. +add_empty_leaf(coord_t *insert_coord, lock_handle *lh,
  10723. + const reiser4_key *key, const reiser4_key *rdkey)
  10724. +{
  10725. + int result;
  10726. + carry_pool *pool;
  10727. + carry_level *todo;
  10728. + reiser4_item_data *item;
  10729. + carry_insert_data *cdata;
  10730. + carry_op *op;
  10731. + znode *node;
  10732. + reiser4_tree *tree;
  10733. +
  10734. + assert("vs-49827", znode_contains_key_lock(insert_coord->node, key));
  10735. + tree = znode_get_tree(insert_coord->node);
  10736. + node = reiser4_new_node(insert_coord->node, LEAF_LEVEL);
  10737. + if (IS_ERR(node))
  10738. + return PTR_ERR(node);
  10739. +
  10740. + /* setup delimiting keys for node being inserted */
  10741. + write_lock_dk(tree);
  10742. + znode_set_ld_key(node, key);
  10743. + znode_set_rd_key(node, rdkey);
  10744. + ON_DEBUG(node->creator = current);
  10745. + ON_DEBUG(node->first_key = *key);
  10746. + write_unlock_dk(tree);
  10747. +
  10748. + ZF_SET(node, JNODE_ORPHAN);
  10749. +
  10750. + /*
  10751. + * allocate carry_pool, 3 carry_level-s, reiser4_item_data and
  10752. + * carry_insert_data
  10753. + */
  10754. + pool = init_carry_pool(sizeof(*pool) + 3 * sizeof(*todo) +
  10755. + sizeof(*item) + sizeof(*cdata));
  10756. + if (IS_ERR(pool))
  10757. + return PTR_ERR(pool);
  10758. + todo = (carry_level *) (pool + 1);
  10759. + init_carry_level(todo, pool);
  10760. +
  10761. + item = (reiser4_item_data *) (todo + 3);
  10762. + cdata = (carry_insert_data *) (item + 1);
  10763. +
  10764. + op = reiser4_post_carry(todo, COP_INSERT, insert_coord->node, 0);
  10765. + if (!IS_ERR(op)) {
  10766. + cdata->coord = insert_coord;
  10767. + cdata->key = key;
  10768. + cdata->data = item;
  10769. + op->u.insert.d = cdata;
  10770. + op->u.insert.type = COPT_ITEM_DATA;
  10771. + build_child_ptr_data(node, item);
  10772. + item->arg = NULL;
  10773. + /* have @insert_coord to be set at inserted item after
  10774. + insertion is done */
  10775. + todo->track_type = CARRY_TRACK_CHANGE;
  10776. + todo->tracked = lh;
  10777. +
  10778. + result = reiser4_carry(todo, NULL);
  10779. + if (result == 0) {
  10780. + /*
  10781. + * pin node in memory. This is necessary for
  10782. + * znode_make_dirty() below.
  10783. + */
  10784. + result = zload(node);
  10785. + if (result == 0) {
  10786. + lock_handle local_lh;
  10787. +
  10788. + /*
  10789. + * if we inserted new child into tree we have
  10790. + * to mark it dirty so that flush will be able
  10791. + * to process it.
  10792. + */
  10793. + init_lh(&local_lh);
  10794. + result = longterm_lock_znode(&local_lh, node,
  10795. + ZNODE_WRITE_LOCK,
  10796. + ZNODE_LOCK_LOPRI);
  10797. + if (result == 0) {
  10798. + znode_make_dirty(node);
  10799. +
  10800. + /*
  10801. + * when internal item pointing to @node
  10802. + * was inserted into twig node
  10803. + * create_hook_internal did not connect
  10804. + * it properly because its right
  10805. + * neighbor was not known. Do it
  10806. + * here
  10807. + */
  10808. + write_lock_tree(tree);
  10809. + assert("nikita-3312",
  10810. + znode_is_right_connected(node));
  10811. + assert("nikita-2984",
  10812. + node->right == NULL);
  10813. + ZF_CLR(node, JNODE_RIGHT_CONNECTED);
  10814. + write_unlock_tree(tree);
  10815. + result =
  10816. + connect_znode(insert_coord, node);
  10817. + ON_DEBUG(if (result == 0) check_dkeys(node););
  10818. +
  10819. + done_lh(lh);
  10820. + move_lh(lh, &local_lh);
  10821. + assert("vs-1676", node_is_empty(node));
  10822. + coord_init_first_unit(insert_coord,
  10823. + node);
  10824. + } else {
  10825. + warning("nikita-3136",
  10826. + "Cannot lock child");
  10827. + }
  10828. + done_lh(&local_lh);
  10829. + zrelse(node);
  10830. + }
  10831. + }
  10832. + } else
  10833. + result = PTR_ERR(op);
  10834. + zput(node);
  10835. + done_carry_pool(pool);
  10836. + return result;
  10837. +}
  10838. +
  10839. +/**
  10840. + * handle_eottl - handle extent-on-the-twig-level cases in tree traversal
  10841. + * @h: search handle
  10842. + * @outcome: flag saying whether search has to restart or is done
  10843. + *
  10844. + * Handles search on twig level. If this function completes search itself then
  10845. + * it returns 1. If search has to go one level down then 0 is returned. If
  10846. + * error happens then LOOKUP_DONE is returned via @outcome and error code is
  10847. + * saved in @h->result.
  10848. + */
  10849. +int handle_eottl(cbk_handle *h, int *outcome)
  10850. +{
  10851. + int result;
  10852. + reiser4_key key;
  10853. + coord_t *coord;
  10854. +
  10855. + coord = h->coord;
  10856. +
  10857. + if (h->level != TWIG_LEVEL ||
  10858. + (coord_is_existing_item(coord) && item_is_internal(coord))) {
  10859. + /* Continue to traverse tree downward. */
  10860. + return 0;
  10861. + }
  10862. +
  10863. + /*
  10864. + * make sure that @h->coord is set to twig node and that it is either
  10865. + * set to extent item or after extent item
  10866. + */
  10867. + assert("vs-356", h->level == TWIG_LEVEL);
  10868. + assert("vs-357", ({
  10869. + coord_t lcoord;
  10870. + coord_dup(&lcoord, coord);
  10871. + check_me("vs-733", coord_set_to_left(&lcoord) == 0);
  10872. + item_is_extent(&lcoord);
  10873. + }
  10874. + ));
  10875. +
  10876. + if (*outcome == NS_FOUND) {
  10877. + /* we have found desired key on twig level in extent item */
  10878. + h->result = CBK_COORD_FOUND;
  10879. + *outcome = LOOKUP_DONE;
  10880. + return 1;
  10881. + }
  10882. +
  10883. + if (!(h->flags & CBK_FOR_INSERT)) {
  10884. + /* tree traversal is not for insertion. Just return
  10885. + CBK_COORD_NOTFOUND. */
  10886. + h->result = CBK_COORD_NOTFOUND;
  10887. + *outcome = LOOKUP_DONE;
  10888. + return 1;
  10889. + }
  10890. +
  10891. + /* take a look at the item to the right of h -> coord */
  10892. + result = is_next_item_internal(coord, h->key, h->active_lh);
  10893. + if (unlikely(result < 0)) {
  10894. + h->error = "get_right_neighbor failed";
  10895. + h->result = result;
  10896. + *outcome = LOOKUP_DONE;
  10897. + return 1;
  10898. + }
  10899. + if (result == 0) {
  10900. + /*
  10901. + * item to the right is also an extent one. Allocate a new node
  10902. + * and insert pointer to it after item h -> coord.
  10903. + *
  10904. + * This is a result of extents being located at the twig
  10905. + * level. For explanation, see comment just above
  10906. + * is_next_item_internal().
  10907. + */
  10908. + znode *loaded;
  10909. +
  10910. + if (cbk_lock_mode(h->level, h) != ZNODE_WRITE_LOCK) {
  10911. + /*
  10912. + * we got node read locked, restart coord_by_key to
  10913. + * have write lock on twig level
  10914. + */
  10915. + h->lock_level = TWIG_LEVEL;
  10916. + h->lock_mode = ZNODE_WRITE_LOCK;
  10917. + *outcome = LOOKUP_REST;
  10918. + return 1;
  10919. + }
  10920. +
  10921. + loaded = coord->node;
  10922. + result =
  10923. + add_empty_leaf(coord, h->active_lh, h->key,
  10924. + rd_key(coord, &key));
  10925. + if (result) {
  10926. + h->error = "could not add empty leaf";
  10927. + h->result = result;
  10928. + *outcome = LOOKUP_DONE;
  10929. + return 1;
  10930. + }
  10931. + /* added empty leaf is locked (h->active_lh), its parent node
  10932. + is unlocked, h->coord is set as EMPTY */
  10933. + assert("vs-13", coord->between == EMPTY_NODE);
  10934. + assert("vs-14", znode_is_write_locked(coord->node));
  10935. + assert("vs-15",
  10936. + WITH_DATA(coord->node, node_is_empty(coord->node)));
  10937. + assert("vs-16", jnode_is_leaf(ZJNODE(coord->node)));
  10938. + assert("vs-17", coord->node == h->active_lh->node);
  10939. + *outcome = LOOKUP_DONE;
  10940. + h->result = CBK_COORD_NOTFOUND;
  10941. + return 1;
  10942. + } else if (result == 1) {
  10943. + /*
  10944. + * this is special case mentioned in the comment on
  10945. + * tree.h:cbk_flags. We have found internal item immediately on
  10946. + * the right of extent, and we are going to insert new item
  10947. + * there. Key of item we are going to insert is smaller than
  10948. + * leftmost key in the node pointed to by said internal item
  10949. + * (otherwise search wouldn't come to the extent in the first
  10950. + * place).
  10951. + *
  10952. + * This is a result of extents being located at the twig
  10953. + * level. For explanation, see comment just above
  10954. + * is_next_item_internal().
  10955. + */
  10956. + h->flags &= ~CBK_TRUST_DK;
  10957. + } else {
  10958. + assert("vs-8", result == 2);
  10959. + *outcome = LOOKUP_REST;
  10960. + return 1;
  10961. + }
  10962. + assert("vs-362", WITH_DATA(coord->node, item_is_internal(coord)));
  10963. + return 0;
  10964. +}
  10965. +
  10966. +/*
  10967. + * Local variables:
  10968. + * c-indentation-style: "K&R"
  10969. + * mode-name: "LC"
  10970. + * c-basic-offset: 8
  10971. + * tab-width: 8
  10972. + * fill-column: 120
  10973. + * scroll-step: 1
  10974. + * End:
  10975. + */
  10976. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/estimate.c linux-4.14.2/fs/reiser4/estimate.c
  10977. --- linux-4.14.2.orig/fs/reiser4/estimate.c 1970-01-01 01:00:00.000000000 +0100
  10978. +++ linux-4.14.2/fs/reiser4/estimate.c 2017-11-26 22:13:09.000000000 +0100
  10979. @@ -0,0 +1,129 @@
  10980. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  10981. + reiser4/README */
  10982. +
  10983. +#include "debug.h"
  10984. +#include "dformat.h"
  10985. +#include "tree.h"
  10986. +#include "carry.h"
  10987. +#include "inode.h"
  10988. +#include "plugin/cluster.h"
  10989. +#include "plugin/item/ctail.h"
  10990. +
  10991. +/* This returns how many nodes might get dirty and added nodes if @children
  10992. + nodes are dirtied
  10993. +
  10994. + Amount of internals which will get dirty or get allocated we estimate as 5%
  10995. + of the childs + 1 balancing. 1 balancing is 2 neighbours, 2 new blocks and
  10996. + the current block on the leaf level, 2 neighbour nodes + the current (or 1
  10997. + neighbour and 1 new and the current) on twig level, 2 neighbour nodes on
  10998. + upper levels and 1 for a new root. So 5 for leaf level, 3 for twig level,
  10999. + 2 on upper + 1 for root.
  11000. +
  11001. + Do not calculate the current node of the lowest level here - this is overhead
  11002. + only.
  11003. +
  11004. + children is almost always 1 here. Exception is flow insertion
  11005. +*/
  11006. +static reiser4_block_nr
  11007. +max_balance_overhead(reiser4_block_nr childen, tree_level tree_height)
  11008. +{
  11009. + reiser4_block_nr ten_percent;
  11010. +
  11011. + ten_percent = ((103 * childen) >> 10);
  11012. +
  11013. + /* If we have too many balancings at the time, tree height can raise on
  11014. + more then 1. Assume that if tree_height is 5, it can raise on 1 only.
  11015. + */
  11016. + return ((tree_height < 5 ? 5 : tree_height) * 2 + (4 + ten_percent));
  11017. +}
  11018. +
  11019. +/* this returns maximal possible number of nodes which can be modified plus
  11020. + number of new nodes which can be required to perform insertion of one item
  11021. + into the tree */
  11022. +/* it is only called when tree height changes, or gets initialized */
  11023. +reiser4_block_nr calc_estimate_one_insert(tree_level height)
  11024. +{
  11025. + return 1 + max_balance_overhead(1, height);
  11026. +}
  11027. +
  11028. +reiser4_block_nr estimate_one_insert_item(reiser4_tree * tree)
  11029. +{
  11030. + return tree->estimate_one_insert;
  11031. +}
  11032. +
  11033. +/* this returns maximal possible number of nodes which can be modified plus
  11034. + number of new nodes which can be required to perform insertion of one unit
  11035. + into an item in the tree */
  11036. +reiser4_block_nr estimate_one_insert_into_item(reiser4_tree * tree)
  11037. +{
  11038. + /* estimate insert into item just like item insertion */
  11039. + return tree->estimate_one_insert;
  11040. +}
  11041. +
  11042. +reiser4_block_nr estimate_one_item_removal(reiser4_tree * tree)
  11043. +{
  11044. + /* on item removal reiser4 does not try to pack nodes more complact, so,
  11045. + only one node may be dirtied on leaf level */
  11046. + return tree->estimate_one_insert;
  11047. +}
  11048. +
  11049. +/* on leaf level insert_flow may add CARRY_FLOW_NEW_NODES_LIMIT new nodes and
  11050. + dirty 3 existing nodes (insert point and both its neighbors).
  11051. + Max_balance_overhead should estimate number of blocks which may change/get
  11052. + added on internal levels */
  11053. +reiser4_block_nr estimate_insert_flow(tree_level height)
  11054. +{
  11055. + return 3 + CARRY_FLOW_NEW_NODES_LIMIT + max_balance_overhead(3 +
  11056. + CARRY_FLOW_NEW_NODES_LIMIT,
  11057. + height);
  11058. +}
  11059. +
  11060. +/* returnes max number of nodes can be occupied by disk cluster */
  11061. +static reiser4_block_nr estimate_cluster(struct inode *inode, int unprepped)
  11062. +{
  11063. + int per_cluster;
  11064. + per_cluster = (unprepped ? 1 : cluster_nrpages(inode));
  11065. + return 3 + per_cluster +
  11066. + max_balance_overhead(3 + per_cluster,
  11067. + REISER4_MAX_ZTREE_HEIGHT);
  11068. +}
  11069. +
  11070. +/* how many nodes might get dirty and added
  11071. + during insertion of a disk cluster */
  11072. +reiser4_block_nr estimate_insert_cluster(struct inode *inode)
  11073. +{
  11074. + return estimate_cluster(inode, 1); /* 24 */
  11075. +}
  11076. +
  11077. +/* how many nodes might get dirty and added
  11078. + during update of a (prepped or unprepped) disk cluster */
  11079. +reiser4_block_nr estimate_update_cluster(struct inode *inode)
  11080. +{
  11081. + return estimate_cluster(inode, 0); /* 44, for 64K-cluster */
  11082. +}
  11083. +
  11084. +/* How many nodes occupied by a disk cluster might get dirty.
  11085. + Note that this estimation is not precise (i.e. disk cluster
  11086. + can occupy more nodes).
  11087. + Q: Why we don't use precise estimation?
  11088. + A: 1.Because precise estimation is fairly bad: 65536 nodes
  11089. + for 64K logical cluster, it means 256M of dead space on
  11090. + a partition
  11091. + 2.It is a very rare case when disk cluster occupies more
  11092. + nodes then this estimation returns.
  11093. +*/
  11094. +reiser4_block_nr estimate_dirty_cluster(struct inode *inode)
  11095. +{
  11096. + return cluster_nrpages(inode) + 4;
  11097. +}
  11098. +
  11099. +/* Make Linus happy.
  11100. + Local variables:
  11101. + c-indentation-style: "K&R"
  11102. + mode-name: "LC"
  11103. + c-basic-offset: 8
  11104. + tab-width: 8
  11105. + fill-column: 120
  11106. + scroll-step: 1
  11107. + End:
  11108. +*/
  11109. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/export_ops.c linux-4.14.2/fs/reiser4/export_ops.c
  11110. --- linux-4.14.2.orig/fs/reiser4/export_ops.c 1970-01-01 01:00:00.000000000 +0100
  11111. +++ linux-4.14.2/fs/reiser4/export_ops.c 2017-11-26 22:13:09.000000000 +0100
  11112. @@ -0,0 +1,325 @@
  11113. +/* Copyright 2005 by Hans Reiser, licensing governed by
  11114. + * reiser4/README */
  11115. +
  11116. +#include "inode.h"
  11117. +#include "plugin/plugin.h"
  11118. +
  11119. +/*
  11120. + * Supported file-handle types
  11121. + */
  11122. +typedef enum {
  11123. + FH_WITH_PARENT = 0x10, /* file handle with parent */
  11124. + FH_WITHOUT_PARENT = 0x11 /* file handle without parent */
  11125. +} reiser4_fhtype;
  11126. +
  11127. +#define NFSERROR (255)
  11128. +
  11129. +/* initialize place-holder for object */
  11130. +static void object_on_wire_init(reiser4_object_on_wire *o)
  11131. +{
  11132. + o->plugin = NULL;
  11133. +}
  11134. +
  11135. +/* finish with @o */
  11136. +static void object_on_wire_done(reiser4_object_on_wire *o)
  11137. +{
  11138. + if (o->plugin != NULL)
  11139. + o->plugin->wire.done(o);
  11140. +}
  11141. +
  11142. +/*
  11143. + * read serialized object identity from @addr and store information about
  11144. + * object in @obj. This is dual to encode_inode().
  11145. + */
  11146. +static char *decode_inode(struct super_block *s, char *addr,
  11147. + reiser4_object_on_wire * obj)
  11148. +{
  11149. + file_plugin *fplug;
  11150. +
  11151. + /* identifier of object plugin is stored in the first two bytes,
  11152. + * followed by... */
  11153. + fplug = file_plugin_by_disk_id(reiser4_get_tree(s), (d16 *) addr);
  11154. + if (fplug != NULL) {
  11155. + addr += sizeof(d16);
  11156. + obj->plugin = fplug;
  11157. + assert("nikita-3520", fplug->wire.read != NULL);
  11158. + /* plugin specific encoding of object identity. */
  11159. + addr = fplug->wire.read(addr, obj);
  11160. + } else
  11161. + addr = ERR_PTR(RETERR(-EINVAL));
  11162. + return addr;
  11163. +}
  11164. +
  11165. +static struct dentry *reiser4_get_dentry(struct super_block *super,
  11166. + void *data);
  11167. +/**
  11168. + * reiser4_decode_fh: decode on-wire object - helper function
  11169. + * for fh_to_dentry, fh_to_parent export operations;
  11170. + * @super: super block;
  11171. + * @addr: onwire object to be decoded;
  11172. + *
  11173. + * Returns dentry referring to the object being decoded.
  11174. + */
  11175. +static struct dentry *reiser4_decode_fh(struct super_block * super,
  11176. + char * addr)
  11177. +{
  11178. + reiser4_object_on_wire object;
  11179. +
  11180. + object_on_wire_init(&object);
  11181. +
  11182. + addr = decode_inode(super, addr, &object);
  11183. + if (!IS_ERR(addr)) {
  11184. + struct dentry *d;
  11185. + d = reiser4_get_dentry(super, &object);
  11186. + if (d != NULL && !IS_ERR(d))
  11187. + /* FIXME check for -ENOMEM */
  11188. + reiser4_get_dentry_fsdata(d)->stateless = 1;
  11189. + addr = (char *)d;
  11190. + }
  11191. + object_on_wire_done(&object);
  11192. + return (void *)addr;
  11193. +}
  11194. +
  11195. +static struct dentry *reiser4_fh_to_dentry(struct super_block *sb,
  11196. + struct fid *fid,
  11197. + int fh_len, int fh_type)
  11198. +{
  11199. + reiser4_context *ctx;
  11200. + struct dentry *d;
  11201. +
  11202. + assert("edward-1536",
  11203. + fh_type == FH_WITH_PARENT || fh_type == FH_WITHOUT_PARENT);
  11204. +
  11205. + ctx = reiser4_init_context(sb);
  11206. + if (IS_ERR(ctx))
  11207. + return (struct dentry *)ctx;
  11208. +
  11209. + d = reiser4_decode_fh(sb, (char *)fid->raw);
  11210. +
  11211. + reiser4_exit_context(ctx);
  11212. + return d;
  11213. +}
  11214. +
  11215. +static struct dentry *reiser4_fh_to_parent(struct super_block *sb,
  11216. + struct fid *fid,
  11217. + int fh_len, int fh_type)
  11218. +{
  11219. + char * addr;
  11220. + struct dentry * d;
  11221. + reiser4_context *ctx;
  11222. + file_plugin *fplug;
  11223. +
  11224. + if (fh_type == FH_WITHOUT_PARENT)
  11225. + return NULL;
  11226. + assert("edward-1537", fh_type == FH_WITH_PARENT);
  11227. +
  11228. + ctx = reiser4_init_context(sb);
  11229. + if (IS_ERR(ctx))
  11230. + return (struct dentry *)ctx;
  11231. + addr = (char *)fid->raw;
  11232. + /* extract 2-bytes file plugin id */
  11233. + fplug = file_plugin_by_disk_id(reiser4_get_tree(sb), (d16 *)addr);
  11234. + if (fplug == NULL) {
  11235. + d = ERR_PTR(RETERR(-EINVAL));
  11236. + goto exit;
  11237. + }
  11238. + addr += sizeof(d16);
  11239. + /* skip previously encoded object */
  11240. + addr = fplug->wire.read(addr, NULL /* skip */);
  11241. + if (IS_ERR(addr)) {
  11242. + d = (struct dentry *)addr;
  11243. + goto exit;
  11244. + }
  11245. + /* @extract and decode parent object */
  11246. + d = reiser4_decode_fh(sb, addr);
  11247. + exit:
  11248. + reiser4_exit_context(ctx);
  11249. + return d;
  11250. +}
  11251. +
  11252. +/*
  11253. + * Object serialization support.
  11254. + *
  11255. + * To support knfsd file system provides export_operations that are used to
  11256. + * construct and interpret NFS file handles. As a generalization of this,
  11257. + * reiser4 object plugins have serialization support: it provides methods to
  11258. + * create on-wire representation of identity of reiser4 object, and
  11259. + * re-create/locate object given its on-wire identity.
  11260. + *
  11261. + */
  11262. +
  11263. +/*
  11264. + * return number of bytes that on-wire representation of @inode's identity
  11265. + * consumes.
  11266. + */
  11267. +static int encode_inode_size(struct inode *inode)
  11268. +{
  11269. + assert("nikita-3514", inode != NULL);
  11270. + assert("nikita-3515", inode_file_plugin(inode) != NULL);
  11271. + assert("nikita-3516", inode_file_plugin(inode)->wire.size != NULL);
  11272. +
  11273. + return inode_file_plugin(inode)->wire.size(inode) + sizeof(d16);
  11274. +}
  11275. +
  11276. +/*
  11277. + * store on-wire representation of @inode's identity at the area beginning at
  11278. + * @start.
  11279. + */
  11280. +static char *encode_inode(struct inode *inode, char *start)
  11281. +{
  11282. + assert("nikita-3517", inode != NULL);
  11283. + assert("nikita-3518", inode_file_plugin(inode) != NULL);
  11284. + assert("nikita-3519", inode_file_plugin(inode)->wire.write != NULL);
  11285. +
  11286. + /*
  11287. + * first, store two-byte identifier of object plugin, then
  11288. + */
  11289. + save_plugin_id(file_plugin_to_plugin(inode_file_plugin(inode)),
  11290. + (d16 *) start);
  11291. + start += sizeof(d16);
  11292. + /*
  11293. + * call plugin to serialize object's identity
  11294. + */
  11295. + return inode_file_plugin(inode)->wire.write(inode, start);
  11296. +}
  11297. +
  11298. +/* this returns number of 32 bit long numbers encoded in @lenp. 255 is
  11299. + * returned if file handle can not be stored */
  11300. +/**
  11301. + * reiser4_encode_fh - encode_fh of export operations
  11302. + * @dentry:
  11303. + * @fh:
  11304. + * @lenp:
  11305. + * @need_parent:
  11306. + *
  11307. + */
  11308. +static int
  11309. +reiser4_encode_fh(struct inode *inode, __u32 *fh, int *lenp,
  11310. + struct inode *parent)
  11311. +{
  11312. + char *addr;
  11313. + int need;
  11314. + int delta;
  11315. + int result;
  11316. + bool need_parent;
  11317. + reiser4_context *ctx;
  11318. +
  11319. + /*
  11320. + * knfsd asks as to serialize @inode, and, optionally its
  11321. + * parent @parent (if it is non-NULL).
  11322. + *
  11323. + * encode_inode() and encode_inode_size() is used to build
  11324. + * representation of object and its parent. All hard work is done by
  11325. + * object plugins.
  11326. + */
  11327. + need_parent = (parent != NULL);
  11328. + addr = (char *)fh;
  11329. +
  11330. + need = encode_inode_size(inode);
  11331. + if (need < 0)
  11332. + return NFSERROR;
  11333. + if (need_parent) {
  11334. + delta = encode_inode_size(parent);
  11335. + if (delta < 0)
  11336. + return NFSERROR;
  11337. + need += delta;
  11338. + }
  11339. +
  11340. + ctx = reiser4_init_context(inode->i_sb);
  11341. + if (IS_ERR(ctx))
  11342. + return PTR_ERR(ctx);
  11343. +
  11344. + if (need <= sizeof(__u32) * (*lenp)) {
  11345. + addr = encode_inode(inode, addr);
  11346. + if (need_parent)
  11347. + addr = encode_inode(parent, addr);
  11348. +
  11349. + /* store in lenp number of 32bit words required for file
  11350. + * handle. */
  11351. + *lenp = (need + sizeof(__u32) - 1) >> 2;
  11352. + result = need_parent ? FH_WITH_PARENT : FH_WITHOUT_PARENT;
  11353. + } else
  11354. + /* no enough space in file handle */
  11355. + result = NFSERROR;
  11356. + reiser4_exit_context(ctx);
  11357. + return result;
  11358. +}
  11359. +
  11360. +/**
  11361. + * reiser4_get_dentry_parent - get_parent of export operations
  11362. + * @child:
  11363. + *
  11364. + */
  11365. +static struct dentry *reiser4_get_dentry_parent(struct dentry *child)
  11366. +{
  11367. + struct inode *dir;
  11368. + dir_plugin *dplug;
  11369. + struct dentry *result;
  11370. + reiser4_context *ctx;
  11371. +
  11372. + assert("nikita-3527", child != NULL);
  11373. +
  11374. + dir = child->d_inode;
  11375. + assert("nikita-3529", dir != NULL);
  11376. +
  11377. + ctx = reiser4_init_context(dir->i_sb);
  11378. + if (IS_ERR(ctx))
  11379. + return (void *)ctx;
  11380. +
  11381. + dplug = inode_dir_plugin(dir);
  11382. + assert("nikita-3531", ergo(dplug != NULL, dplug->get_parent != NULL));
  11383. +
  11384. + if (unlikely(dplug == NULL)) {
  11385. + reiser4_exit_context(ctx);
  11386. + return ERR_PTR(RETERR(-ENOTDIR));
  11387. + }
  11388. + result = dplug->get_parent(dir);
  11389. + reiser4_exit_context(ctx);
  11390. + return result;
  11391. +}
  11392. +
  11393. +/**
  11394. + * reiser4_get_dentry - get_dentry of export operations
  11395. + * @super:
  11396. + * @data:
  11397. + *
  11398. + *
  11399. + */
  11400. +static struct dentry *reiser4_get_dentry(struct super_block *super, void *data)
  11401. +{
  11402. + reiser4_object_on_wire *o;
  11403. +
  11404. + assert("nikita-3522", super != NULL);
  11405. + assert("nikita-3523", data != NULL);
  11406. + /*
  11407. + * this is only supposed to be called by
  11408. + *
  11409. + * reiser4_decode_fh->find_exported_dentry
  11410. + *
  11411. + * so, reiser4_context should be here already.
  11412. + */
  11413. + assert("nikita-3526", is_in_reiser4_context());
  11414. +
  11415. + o = (reiser4_object_on_wire *)data;
  11416. + assert("nikita-3524", o->plugin != NULL);
  11417. + assert("nikita-3525", o->plugin->wire.get != NULL);
  11418. +
  11419. + return o->plugin->wire.get(super, o);
  11420. +}
  11421. +
  11422. +struct export_operations reiser4_export_operations = {
  11423. + .encode_fh = reiser4_encode_fh,
  11424. + .fh_to_dentry = reiser4_fh_to_dentry,
  11425. + .fh_to_parent = reiser4_fh_to_parent,
  11426. + .get_parent = reiser4_get_dentry_parent,
  11427. +};
  11428. +
  11429. +/*
  11430. + * Local variables:
  11431. + * c-indentation-style: "K&R"
  11432. + * mode-name: "LC"
  11433. + * c-basic-offset: 8
  11434. + * tab-width: 8
  11435. + * fill-column: 79
  11436. + * End:
  11437. + */
  11438. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/flush.c linux-4.14.2/fs/reiser4/flush.c
  11439. --- linux-4.14.2.orig/fs/reiser4/flush.c 1970-01-01 01:00:00.000000000 +0100
  11440. +++ linux-4.14.2/fs/reiser4/flush.c 2017-11-26 22:13:09.000000000 +0100
  11441. @@ -0,0 +1,3522 @@
  11442. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  11443. + reiser4/README */
  11444. +
  11445. +/* The design document for this file is at http://www.namesys.com/v4/v4.html. */
  11446. +
  11447. +#include "forward.h"
  11448. +#include "debug.h"
  11449. +#include "dformat.h"
  11450. +#include "key.h"
  11451. +#include "coord.h"
  11452. +#include "plugin/item/item.h"
  11453. +#include "plugin/plugin.h"
  11454. +#include "plugin/object.h"
  11455. +#include "txnmgr.h"
  11456. +#include "jnode.h"
  11457. +#include "znode.h"
  11458. +#include "block_alloc.h"
  11459. +#include "tree_walk.h"
  11460. +#include "carry.h"
  11461. +#include "tree.h"
  11462. +#include "vfs_ops.h"
  11463. +#include "inode.h"
  11464. +#include "page_cache.h"
  11465. +#include "wander.h"
  11466. +#include "super.h"
  11467. +#include "entd.h"
  11468. +#include "reiser4.h"
  11469. +#include "flush.h"
  11470. +#include "writeout.h"
  11471. +
  11472. +#include <asm/atomic.h>
  11473. +#include <linux/fs.h> /* for struct super_block */
  11474. +#include <linux/mm.h> /* for struct page */
  11475. +#include <linux/bio.h> /* for struct bio */
  11476. +#include <linux/pagemap.h>
  11477. +#include <linux/blkdev.h>
  11478. +
  11479. +/* IMPLEMENTATION NOTES */
  11480. +
  11481. +/* PARENT-FIRST: Some terminology: A parent-first traversal is a way of
  11482. + assigning a total order to the nodes of the tree in which the parent is
  11483. + placed before its children, which are ordered (recursively) in left-to-right
  11484. + order. When we speak of a "parent-first preceder", it describes the node that
  11485. + "came before in forward parent-first order". When we speak of a "parent-first
  11486. + follower", it describes the node that "comes next in parent-first order"
  11487. + (alternatively the node that "came before in reverse parent-first order").
  11488. +
  11489. + The following pseudo-code prints the nodes of a tree in forward parent-first
  11490. + order:
  11491. +
  11492. + void parent_first (node)
  11493. + {
  11494. + print_node (node);
  11495. + if (node->level > leaf) {
  11496. + for (i = 0; i < num_children; i += 1) {
  11497. + parent_first (node->child[i]);
  11498. + }
  11499. + }
  11500. + }
  11501. +*/
  11502. +
  11503. +/* JUST WHAT ARE WE TRYING TO OPTIMIZE, HERE? The idea is to optimize block
  11504. + allocation so that a left-to-right scan of the tree's data (i.e., the leaves
  11505. + in left-to-right order) can be accomplished with sequential reads, which
  11506. + results in reading nodes in their parent-first order. This is a
  11507. + read-optimization aspect of the flush algorithm, and there is also a
  11508. + write-optimization aspect, which is that we wish to make large sequential
  11509. + writes to the disk by allocating or reallocating blocks so that they can be
  11510. + written in sequence. Sometimes the read-optimization and write-optimization
  11511. + goals conflict with each other, as we discuss in more detail below.
  11512. +*/
  11513. +
  11514. +/* STATE BITS: The flush code revolves around the state of the jnodes it covers.
  11515. + Here are the relevant jnode->state bits and their relevence to flush:
  11516. +
  11517. + JNODE_DIRTY: If a node is dirty, it must be flushed. But in order to be
  11518. + written it must be allocated first. In order to be considered allocated,
  11519. + the jnode must have exactly one of { JNODE_OVRWR, JNODE_RELOC } set. These
  11520. + two bits are exclusive, and all dirtied jnodes eventually have one of these
  11521. + bits set during each transaction.
  11522. +
  11523. + JNODE_CREATED: The node was freshly created in its transaction and has no
  11524. + previous block address, so it is unconditionally assigned to be relocated,
  11525. + although this is mainly for code-convenience. It is not being 'relocated'
  11526. + from anything, but in almost every regard it is treated as part of the
  11527. + relocate set. The JNODE_CREATED bit remains set even after JNODE_RELOC is
  11528. + set, so the actual relocate can be distinguished from the
  11529. + created-and-allocated set easily: relocate-set members (belonging to the
  11530. + preserve-set) have (JNODE_RELOC) set and created-set members which have no
  11531. + previous location to preserve have (JNODE_RELOC | JNODE_CREATED) set.
  11532. +
  11533. + JNODE_OVRWR: The node belongs to atom's overwrite set. The flush algorithm
  11534. + made the decision to maintain the pre-existing location for this node and
  11535. + it will be written to the wandered-log.
  11536. +
  11537. + JNODE_RELOC: The flush algorithm made the decision to relocate this block
  11538. + (if it was not created, see note above). A block with JNODE_RELOC set is
  11539. + eligible for early-flushing and may be submitted during flush_empty_queues.
  11540. + When the JNODE_RELOC bit is set on a znode, the parent node's internal item
  11541. + is modified and the znode is rehashed.
  11542. +
  11543. + JNODE_SQUEEZABLE: Before shifting everything left, the flush algorithm
  11544. + scans the node and calls plugin->f.squeeze() method for its items. By this
  11545. + technology we update disk clusters of cryptcompress objects. Also if
  11546. + leftmost point that was found by flush scan has this flag (races with
  11547. + write(), rare case) the flush algorythm makes the decision to pass it to
  11548. + squalloc() in spite of its flushprepped status for squeezing, not for
  11549. + repeated allocation.
  11550. +
  11551. + JNODE_FLUSH_QUEUED: This bit is set when a call to flush enters the jnode
  11552. + into its flush queue. This means the jnode is not on any clean or dirty
  11553. + list, instead it is moved to one of the flush queue (see flush_queue.h)
  11554. + object private list. This prevents multiple concurrent flushes from
  11555. + attempting to start flushing from the same node.
  11556. +
  11557. + (DEAD STATE BIT) JNODE_FLUSH_BUSY: This bit was set during the bottom-up
  11558. + squeeze-and-allocate on a node while its children are actively being
  11559. + squeezed and allocated. This flag was created to avoid submitting a write
  11560. + request for a node while its children are still being allocated and
  11561. + squeezed. Then flush queue was re-implemented to allow unlimited number of
  11562. + nodes be queued. This flag support was commented out in source code because
  11563. + we decided that there was no reason to submit queued nodes before
  11564. + jnode_flush() finishes. However, current code calls fq_write() during a
  11565. + slum traversal and may submit "busy nodes" to disk. Probably we can
  11566. + re-enable the JNODE_FLUSH_BUSY bit support in future.
  11567. +
  11568. + With these state bits, we describe a test used frequently in the code below,
  11569. + jnode_is_flushprepped()(and the spin-lock-taking jnode_check_flushprepped()).
  11570. + The test for "flushprepped" returns true if any of the following are true:
  11571. +
  11572. + - The node is not dirty
  11573. + - The node has JNODE_RELOC set
  11574. + - The node has JNODE_OVRWR set
  11575. +
  11576. + If either the node is not dirty or it has already been processed by flush
  11577. + (and assigned JNODE_OVRWR or JNODE_RELOC), then it is prepped. If
  11578. + jnode_is_flushprepped() returns true then flush has work to do on that node.
  11579. +*/
  11580. +
  11581. +/* FLUSH_PREP_ONCE_PER_TRANSACTION: Within a single transaction a node is never
  11582. + flushprepped twice (unless an explicit call to flush_unprep is made as
  11583. + described in detail below). For example a node is dirtied, allocated, and
  11584. + then early-flushed to disk and set clean. Before the transaction commits, the
  11585. + page is dirtied again and, due to memory pressure, the node is flushed again.
  11586. + The flush algorithm will not relocate the node to a new disk location, it
  11587. + will simply write it to the same, previously relocated position again.
  11588. +*/
  11589. +
  11590. +/* THE BOTTOM-UP VS. TOP-DOWN ISSUE: This code implements a bottom-up algorithm
  11591. + where we start at a leaf node and allocate in parent-first order by iterating
  11592. + to the right. At each step of the iteration, we check for the right neighbor.
  11593. + Before advancing to the right neighbor, we check if the current position and
  11594. + the right neighbor share the same parent. If they do not share the same
  11595. + parent, the parent is allocated before the right neighbor.
  11596. +
  11597. + This process goes recursively up the tree and squeeze nodes level by level as
  11598. + long as the right neighbor and the current position have different parents,
  11599. + then it allocates the right-neighbors-with-different-parents on the way back
  11600. + down. This process is described in more detail in
  11601. + flush_squalloc_changed_ancestor and the recursive function
  11602. + squalloc_one_changed_ancestor. But the purpose here is not to discuss the
  11603. + specifics of the bottom-up approach as it is to contrast the bottom-up and
  11604. + top-down approaches.
  11605. +
  11606. + The top-down algorithm was implemented earlier (April-May 2002). In the
  11607. + top-down approach, we find a starting point by scanning left along each level
  11608. + past dirty nodes, then going up and repeating the process until the left node
  11609. + and the parent node are clean. We then perform a parent-first traversal from
  11610. + the starting point, which makes allocating in parent-first order trivial.
  11611. + After one subtree has been allocated in this manner, we move to the right,
  11612. + try moving upward, then repeat the parent-first traversal.
  11613. +
  11614. + Both approaches have problems that need to be addressed. Both are
  11615. + approximately the same amount of code, but the bottom-up approach has
  11616. + advantages in the order it acquires locks which, at the very least, make it
  11617. + the better approach. At first glance each one makes the other one look
  11618. + simpler, so it is important to remember a few of the problems with each one.
  11619. +
  11620. + Main problem with the top-down approach: When you encounter a clean child
  11621. + during the parent-first traversal, what do you do? You would like to avoid
  11622. + searching through a large tree of nodes just to find a few dirty leaves at
  11623. + the bottom, and there is not an obvious solution. One of the advantages of
  11624. + the top-down approach is that during the parent-first traversal you check
  11625. + every child of a parent to see if it is dirty. In this way, the top-down
  11626. + approach easily handles the main problem of the bottom-up approach:
  11627. + unallocated children.
  11628. +
  11629. + The unallocated children problem is that before writing a node to disk we
  11630. + must make sure that all of its children are allocated. Otherwise, the writing
  11631. + the node means extra I/O because the node will have to be written again when
  11632. + the child is finally allocated.
  11633. +
  11634. + WE HAVE NOT YET ELIMINATED THE UNALLOCATED CHILDREN PROBLEM. Except for bugs,
  11635. + this should not cause any file system corruption, it only degrades I/O
  11636. + performance because a node may be written when it is sure to be written at
  11637. + least one more time in the same transaction when the remaining children are
  11638. + allocated. What follows is a description of how we will solve the problem.
  11639. +*/
  11640. +
  11641. +/* HANDLING UNALLOCATED CHILDREN: During flush we may allocate a parent node,
  11642. + then proceeding in parent first order, allocate some of its left-children,
  11643. + then encounter a clean child in the middle of the parent. We do not allocate
  11644. + the clean child, but there may remain unallocated (dirty) children to the
  11645. + right of the clean child. If we were to stop flushing at this moment and
  11646. + write everything to disk, the parent might still contain unallocated
  11647. + children.
  11648. +
  11649. + We could try to allocate all the descendents of every node that we allocate,
  11650. + but this is not necessary. Doing so could result in allocating the entire
  11651. + tree: if the root node is allocated then every unallocated node would have to
  11652. + be allocated before flushing. Actually, we do not have to write a node just
  11653. + because we allocate it. It is possible to allocate but not write a node
  11654. + during flush, when it still has unallocated children. However, this approach
  11655. + is probably not optimal for the following reason.
  11656. +
  11657. + The flush algorithm is designed to allocate nodes in parent-first order in an
  11658. + attempt to optimize reads that occur in the same order. Thus we are
  11659. + read-optimizing for a left-to-right scan through all the leaves in the
  11660. + system, and we are hoping to write-optimize at the same time because those
  11661. + nodes will be written together in batch. What happens, however, if we assign
  11662. + a block number to a node in its read-optimized order but then avoid writing
  11663. + it because it has unallocated children? In that situation, we lose out on the
  11664. + write-optimization aspect because a node will have to be written again to the
  11665. + its location on the device, later, which likely means seeking back to that
  11666. + location.
  11667. +
  11668. + So there are tradeoffs. We can choose either:
  11669. +
  11670. + A. Allocate all unallocated children to preserve both write-optimization and
  11671. + read-optimization, but this is not always desirable because it may mean
  11672. + having to allocate and flush very many nodes at once.
  11673. +
  11674. + B. Defer writing nodes with unallocated children, keep their read-optimized
  11675. + locations, but sacrifice write-optimization because those nodes will be
  11676. + written again.
  11677. +
  11678. + C. Defer writing nodes with unallocated children, but do not keep their
  11679. + read-optimized locations. Instead, choose to write-optimize them later, when
  11680. + they are written. To facilitate this, we "undo" the read-optimized allocation
  11681. + that was given to the node so that later it can be write-optimized, thus
  11682. + "unpreparing" the flush decision. This is a case where we disturb the
  11683. + FLUSH_PREP_ONCE_PER_TRANSACTION rule described above. By a call to
  11684. + flush_unprep() we will: if the node was wandered, unset the JNODE_OVRWR bit;
  11685. + if the node was relocated, unset the JNODE_RELOC bit, non-deferred-deallocate
  11686. + its block location, and set the JNODE_CREATED bit, effectively setting the
  11687. + node back to an unallocated state.
  11688. +
  11689. + We will take the following approach in v4.0: for twig nodes we will always
  11690. + finish allocating unallocated children (A). For nodes with (level > TWIG)
  11691. + we will defer writing and choose write-optimization (C).
  11692. +
  11693. + To summarize, there are several parts to a solution that avoids the problem
  11694. + with unallocated children:
  11695. +
  11696. + FIXME-ZAM: Still no one approach is implemented to eliminate the
  11697. + "UNALLOCATED CHILDREN" problem because there was an experiment which was done
  11698. + showed that we have 1-2 nodes with unallocated children for thousands of
  11699. + written nodes. The experiment was simple like coping/deletion of linux kernel
  11700. + sources. However the problem can arise in more complex tests. I think we have
  11701. + jnode_io_hook to insert a check for unallocated children and see what kind of
  11702. + problem we have.
  11703. +
  11704. + 1. When flush reaches a stopping point (e.g. a clean node) it should continue
  11705. + calling squeeze-and-allocate on any remaining unallocated children.
  11706. + FIXME: Difficulty to implement: should be simple -- amounts to adding a while
  11707. + loop to jnode_flush, see comments in that function.
  11708. +
  11709. + 2. When flush reaches flush_empty_queue(), some of the (level > TWIG) nodes
  11710. + may still have unallocated children. If the twig level has unallocated
  11711. + children it is an assertion failure. If a higher-level node has unallocated
  11712. + children, then it should be explicitly de-allocated by a call to
  11713. + flush_unprep().
  11714. + FIXME: Difficulty to implement: should be simple.
  11715. +
  11716. + 3. (CPU-Optimization) Checking whether a node has unallocated children may
  11717. + consume more CPU cycles than we would like, and it is possible (but medium
  11718. + complexity) to optimize this somewhat in the case where large sub-trees are
  11719. + flushed. The following observation helps: if both the left- and
  11720. + right-neighbor of a node are processed by the flush algorithm then the node
  11721. + itself is guaranteed to have all of its children allocated. However, the cost
  11722. + of this check may not be so expensive after all: it is not needed for leaves
  11723. + and flush can guarantee this property for twigs. That leaves only (level >
  11724. + TWIG) nodes that have to be checked, so this optimization only helps if at
  11725. + least three (level > TWIG) nodes are flushed in one pass, and the savings
  11726. + will be very small unless there are many more (level > TWIG) nodes. But if
  11727. + there are many (level > TWIG) nodes then the number of blocks being written
  11728. + will be very large, so the savings may be insignificant. That said, the idea
  11729. + is to maintain both the left and right edges of nodes that are processed in
  11730. + flush. When flush_empty_queue() is called, a relatively simple test will
  11731. + tell whether the (level > TWIG) node is on the edge. If it is on the edge,
  11732. + the slow check is necessary, but if it is in the interior then it can be
  11733. + assumed to have all of its children allocated. FIXME: medium complexity to
  11734. + implement, but simple to verify given that we must have a slow check anyway.
  11735. +
  11736. + 4. (Optional) This part is optional, not for v4.0--flush should work
  11737. + independently of whether this option is used or not. Called RAPID_SCAN, the
  11738. + idea is to amend the left-scan operation to take unallocated children into
  11739. + account. Normally, the left-scan operation goes left as long as adjacent
  11740. + nodes are dirty up until some large maximum value (FLUSH_SCAN_MAXNODES) at
  11741. + which point it stops and begins flushing. But scan-left may stop at a
  11742. + position where there are unallocated children to the left with the same
  11743. + parent. When RAPID_SCAN is enabled, the ordinary scan-left operation stops
  11744. + after FLUSH_RELOCATE_THRESHOLD, which is much smaller than
  11745. + FLUSH_SCAN_MAXNODES, then procedes with a rapid scan. The rapid scan skips
  11746. + all the interior children of a node--if the leftmost child of a twig is
  11747. + dirty, check its left neighbor (the rightmost child of the twig to the left).
  11748. + If the left neighbor of the leftmost child is also dirty, then continue the
  11749. + scan at the left twig and repeat. This option will cause flush to allocate
  11750. + more twigs in a single pass, but it also has the potential to write many more
  11751. + nodes than would otherwise be written without the RAPID_SCAN option.
  11752. + RAPID_SCAN was partially implemented, code removed August 12, 2002 by JMACD.
  11753. +*/
  11754. +
  11755. +/* FLUSH CALLED ON NON-LEAF LEVEL. Most of our design considerations assume that
  11756. + the starting point for flush is a leaf node, but actually the flush code
  11757. + cares very little about whether or not this is true. It is possible that all
  11758. + the leaf nodes are flushed and dirty parent nodes still remain, in which case
  11759. + jnode_flush() is called on a non-leaf argument. Flush doesn't care--it treats
  11760. + the argument node as if it were a leaf, even when it is not. This is a simple
  11761. + approach, and there may be a more optimal policy but until a problem with
  11762. + this approach is discovered, simplest is probably best.
  11763. +
  11764. + NOTE: In this case, the ordering produced by flush is parent-first only if
  11765. + you ignore the leaves. This is done as a matter of simplicity and there is
  11766. + only one (shaky) justification. When an atom commits, it flushes all leaf
  11767. + level nodes first, followed by twigs, and so on. With flushing done in this
  11768. + order, if flush is eventually called on a non-leaf node it means that
  11769. + (somehow) we reached a point where all leaves are clean and only internal
  11770. + nodes need to be flushed. If that it the case, then it means there were no
  11771. + leaves that were the parent-first preceder/follower of the parent. This is
  11772. + expected to be a rare case, which is why we do nothing special about it.
  11773. + However, memory pressure may pass an internal node to flush when there are
  11774. + still dirty leaf nodes that need to be flushed, which could prove our
  11775. + original assumptions "inoperative". If this needs to be fixed, then
  11776. + scan_left/right should have special checks for the non-leaf levels. For
  11777. + example, instead of passing from a node to the left neighbor, it should pass
  11778. + from the node to the left neighbor's rightmost descendent (if dirty).
  11779. +
  11780. +*/
  11781. +
  11782. +/* UNIMPLEMENTED AS YET: REPACKING AND RESIZING. We walk the tree in 4MB-16MB
  11783. + chunks, dirtying everything and putting it into a transaction. We tell the
  11784. + allocator to allocate the blocks as far as possible towards one end of the
  11785. + logical device--the left (starting) end of the device if we are walking from
  11786. + left to right, the right end of the device if we are walking from right to
  11787. + left. We then make passes in alternating directions, and as we do this the
  11788. + device becomes sorted such that tree order and block number order fully
  11789. + correlate.
  11790. +
  11791. + Resizing is done by shifting everything either all the way to the left or all
  11792. + the way to the right, and then reporting the last block.
  11793. +*/
  11794. +
  11795. +/* RELOCATE DECISIONS: The code makes a decision to relocate in several places.
  11796. + This descibes the policy from the highest level:
  11797. +
  11798. + The FLUSH_RELOCATE_THRESHOLD parameter: If we count this many consecutive
  11799. + nodes on the leaf level during flush-scan (right, left), then we
  11800. + unconditionally decide to relocate leaf nodes.
  11801. +
  11802. + Otherwise, there are two contexts in which we make a decision to relocate:
  11803. +
  11804. + 1. The REVERSE PARENT-FIRST context: Implemented in reverse_allocate
  11805. + During the initial stages of flush, after scan-right completes, we want to
  11806. + ask the question: should we relocate this leaf node and thus dirty the parent
  11807. + node. Then if the node is a leftmost child its parent is its own parent-first
  11808. + preceder, thus we repeat the question at the next level up, and so on. In
  11809. + these cases we are moving in the reverse-parent first direction.
  11810. +
  11811. + There is another case which is considered the reverse direction, which comes
  11812. + at the end of a twig in reverse_relocate_end_of_twig(). As we finish
  11813. + processing a twig we may reach a point where there is a clean twig to the
  11814. + right with a dirty leftmost child. In this case, we may wish to relocate the
  11815. + child by testing if it should be relocated relative to its parent.
  11816. +
  11817. + 2. The FORWARD PARENT-FIRST context: Testing for forward relocation is done
  11818. + in allocate_znode. What distinguishes the forward parent-first case from the
  11819. + reverse-parent first case is that the preceder has already been allocated in
  11820. + the forward case, whereas in the reverse case we don't know what the preceder
  11821. + is until we finish "going in reverse". That simplifies the forward case
  11822. + considerably, and there we actually use the block allocator to determine
  11823. + whether, e.g., a block closer to the preceder is available.
  11824. +*/
  11825. +
  11826. +/* SQUEEZE_LEFT_EDGE: Unimplemented idea for future consideration. The idea is,
  11827. + once we finish scan-left and find a starting point, if the parent's left
  11828. + neighbor is dirty then squeeze the parent's left neighbor and the parent.
  11829. + This may change the flush-starting-node's parent. Repeat until the child's
  11830. + parent is stable. If the child is a leftmost child, repeat this left-edge
  11831. + squeezing operation at the next level up. Note that we cannot allocate
  11832. + extents during this or they will be out of parent-first order. There is also
  11833. + some difficult coordinate maintenence issues. We can't do a tree search to
  11834. + find coordinates again (because we hold locks), we have to determine them
  11835. + from the two nodes being squeezed. Looks difficult, but has potential to
  11836. + increase space utilization. */
  11837. +
  11838. +/* Flush-scan helper functions. */
  11839. +static void scan_init(flush_scan * scan);
  11840. +static void scan_done(flush_scan * scan);
  11841. +
  11842. +/* Flush-scan algorithm. */
  11843. +static int scan_left(flush_scan * scan, flush_scan * right, jnode * node,
  11844. + unsigned limit);
  11845. +static int scan_right(flush_scan * scan, jnode * node, unsigned limit);
  11846. +static int scan_common(flush_scan * scan, flush_scan * other);
  11847. +static int scan_formatted(flush_scan * scan);
  11848. +static int scan_unformatted(flush_scan * scan, flush_scan * other);
  11849. +static int scan_by_coord(flush_scan * scan);
  11850. +
  11851. +/* Initial flush-point ancestor allocation. */
  11852. +static int alloc_pos_and_ancestors(flush_pos_t *pos);
  11853. +static int alloc_one_ancestor(const coord_t *coord, flush_pos_t *pos);
  11854. +static int set_preceder(const coord_t *coord_in, flush_pos_t *pos);
  11855. +
  11856. +/* Main flush algorithm.
  11857. + Note on abbreviation: "squeeze and allocate" == "squalloc". */
  11858. +static int squalloc(flush_pos_t *pos);
  11859. +
  11860. +/* Flush squeeze implementation. */
  11861. +static int squeeze_right_non_twig(znode * left, znode * right);
  11862. +static int shift_one_internal_unit(znode * left, znode * right);
  11863. +
  11864. +/* Flush reverse parent-first relocation routines. */
  11865. +static int reverse_allocate_parent(jnode * node,
  11866. + const coord_t *parent_coord,
  11867. + flush_pos_t *pos);
  11868. +
  11869. +/* Flush allocate write-queueing functions: */
  11870. +static int allocate_znode(znode * node, const coord_t *parent_coord,
  11871. + flush_pos_t *pos);
  11872. +static int lock_parent_and_allocate_znode(znode *, flush_pos_t *);
  11873. +
  11874. +/* Flush helper functions: */
  11875. +static int jnode_lock_parent_coord(jnode * node,
  11876. + coord_t *coord,
  11877. + lock_handle * parent_lh,
  11878. + load_count * parent_zh,
  11879. + znode_lock_mode mode, int try);
  11880. +static int neighbor_in_slum(znode * node, lock_handle * right_lock, sideof side,
  11881. + znode_lock_mode mode, int check_dirty, int expected);
  11882. +static int znode_same_parents(znode * a, znode * b);
  11883. +
  11884. +static int znode_check_flushprepped(znode * node)
  11885. +{
  11886. + return jnode_check_flushprepped(ZJNODE(node));
  11887. +}
  11888. +static void update_znode_dkeys(znode * left, znode * right);
  11889. +
  11890. +/* Flush position functions */
  11891. +static void pos_init(flush_pos_t *pos);
  11892. +static int pos_valid(flush_pos_t *pos);
  11893. +static void pos_done(flush_pos_t *pos);
  11894. +static int pos_stop(flush_pos_t *pos);
  11895. +
  11896. +/* check that @org is first jnode extent unit, if extent is unallocated,
  11897. + * because all jnodes of unallocated extent are dirty and of the same atom. */
  11898. +#define checkchild(scan) \
  11899. +assert("nikita-3435", \
  11900. + ergo(scan->direction == LEFT_SIDE && \
  11901. + (scan->parent_coord.node->level == TWIG_LEVEL) && \
  11902. + jnode_is_unformatted(scan->node) && \
  11903. + extent_is_unallocated(&scan->parent_coord), \
  11904. + extent_unit_index(&scan->parent_coord) == index_jnode(scan->node)))
  11905. +
  11906. +/* This flush_cnt variable is used to track the number of concurrent flush
  11907. + operations, useful for debugging. It is initialized in txnmgr.c out of
  11908. + laziness (because flush has no static initializer function...) */
  11909. +ON_DEBUG(atomic_t flush_cnt;)
  11910. +
  11911. +/* check fs backing device for write congestion */
  11912. +static int check_write_congestion(void)
  11913. +{
  11914. + struct super_block *sb;
  11915. + struct backing_dev_info *bdi;
  11916. +
  11917. + sb = reiser4_get_current_sb();
  11918. + bdi = inode_to_bdi(reiser4_get_super_fake(sb));
  11919. + return bdi_write_congested(bdi);
  11920. +}
  11921. +
  11922. +/* conditionally write flush queue */
  11923. +static int write_prepped_nodes(flush_pos_t *pos)
  11924. +{
  11925. + int ret;
  11926. +
  11927. + assert("zam-831", pos);
  11928. + assert("zam-832", pos->fq);
  11929. +
  11930. + if (!(pos->flags & JNODE_FLUSH_WRITE_BLOCKS))
  11931. + return 0;
  11932. +
  11933. + if (check_write_congestion())
  11934. + return 0;
  11935. +
  11936. + ret = reiser4_write_fq(pos->fq, pos->nr_written,
  11937. + WRITEOUT_SINGLE_STREAM | WRITEOUT_FOR_PAGE_RECLAIM);
  11938. + return ret;
  11939. +}
  11940. +
  11941. +/* Proper release all flush pos. resources then move flush position to new
  11942. + locked node */
  11943. +static void move_flush_pos(flush_pos_t *pos, lock_handle * new_lock,
  11944. + load_count * new_load, const coord_t *new_coord)
  11945. +{
  11946. + assert("zam-857", new_lock->node == new_load->node);
  11947. +
  11948. + if (new_coord) {
  11949. + assert("zam-858", new_coord->node == new_lock->node);
  11950. + coord_dup(&pos->coord, new_coord);
  11951. + } else {
  11952. + coord_init_first_unit(&pos->coord, new_lock->node);
  11953. + }
  11954. +
  11955. + if (pos->child) {
  11956. + jput(pos->child);
  11957. + pos->child = NULL;
  11958. + }
  11959. +
  11960. + move_load_count(&pos->load, new_load);
  11961. + done_lh(&pos->lock);
  11962. + move_lh(&pos->lock, new_lock);
  11963. +}
  11964. +
  11965. +/* delete empty node which link from the parent still exists. */
  11966. +static int delete_empty_node(znode * node)
  11967. +{
  11968. + reiser4_key smallest_removed;
  11969. +
  11970. + assert("zam-1019", node != NULL);
  11971. + assert("zam-1020", node_is_empty(node));
  11972. + assert("zam-1023", znode_is_wlocked(node));
  11973. +
  11974. + return reiser4_delete_node(node, &smallest_removed, NULL, 1);
  11975. +}
  11976. +
  11977. +/* Prepare flush position for alloc_pos_and_ancestors() and squalloc() */
  11978. +static int prepare_flush_pos(flush_pos_t *pos, jnode * org)
  11979. +{
  11980. + int ret;
  11981. + load_count load;
  11982. + lock_handle lock;
  11983. +
  11984. + init_lh(&lock);
  11985. + init_load_count(&load);
  11986. +
  11987. + if (jnode_is_znode(org)) {
  11988. + ret = longterm_lock_znode(&lock, JZNODE(org),
  11989. + ZNODE_WRITE_LOCK, ZNODE_LOCK_HIPRI);
  11990. + if (ret)
  11991. + return ret;
  11992. +
  11993. + ret = incr_load_count_znode(&load, JZNODE(org));
  11994. + if (ret)
  11995. + return ret;
  11996. +
  11997. + pos->state =
  11998. + (jnode_get_level(org) ==
  11999. + LEAF_LEVEL) ? POS_ON_LEAF : POS_ON_INTERNAL;
  12000. + move_flush_pos(pos, &lock, &load, NULL);
  12001. + } else {
  12002. + coord_t parent_coord;
  12003. + ret = jnode_lock_parent_coord(org, &parent_coord, &lock,
  12004. + &load, ZNODE_WRITE_LOCK, 0);
  12005. + if (ret)
  12006. + goto done;
  12007. + if (!item_is_extent(&parent_coord)) {
  12008. + /* file was converted to tail, org became HB, we found
  12009. + internal item */
  12010. + ret = -EAGAIN;
  12011. + goto done;
  12012. + }
  12013. +
  12014. + pos->state = POS_ON_EPOINT;
  12015. + move_flush_pos(pos, &lock, &load, &parent_coord);
  12016. + pos->child = jref(org);
  12017. + if (extent_is_unallocated(&parent_coord)
  12018. + && extent_unit_index(&parent_coord) != index_jnode(org)) {
  12019. + /* @org is not first child of its parent unit. This may
  12020. + happen because longerm lock of its parent node was
  12021. + released between scan_left and scan_right. For now
  12022. + work around this having flush to repeat */
  12023. + ret = -EAGAIN;
  12024. + }
  12025. + }
  12026. +
  12027. +done:
  12028. + done_load_count(&load);
  12029. + done_lh(&lock);
  12030. + return ret;
  12031. +}
  12032. +
  12033. +static txmod_plugin *get_txmod_plugin(void)
  12034. +{
  12035. + struct super_block *sb = reiser4_get_current_sb();
  12036. + return txmod_plugin_by_id(get_super_private(sb)->txmod);
  12037. +}
  12038. +
  12039. +/* TODO LIST (no particular order): */
  12040. +/* I have labelled most of the legitimate FIXME comments in this file with
  12041. + letters to indicate which issue they relate to. There are a few miscellaneous
  12042. + FIXMEs with specific names mentioned instead that need to be
  12043. + inspected/resolved. */
  12044. +/* B. There is an issue described in reverse_allocate having to do with an
  12045. + imprecise is_preceder? check having to do with partially-dirty extents. The
  12046. + code that sets preceder hints and computes the preceder is basically
  12047. + untested. Careful testing needs to be done that preceder calculations are
  12048. + done correctly, since if it doesn't affect correctness we will not catch this
  12049. + stuff during regular testing. */
  12050. +/* C. EINVAL, E_DEADLOCK, E_NO_NEIGHBOR, ENOENT handling. It is unclear which of
  12051. + these are considered expected but unlikely conditions. Flush currently
  12052. + returns 0 (i.e., success but no progress, i.e., restart) whenever it receives
  12053. + any of these in jnode_flush(). Many of the calls that may produce one of
  12054. + these return values (i.e., longterm_lock_znode, reiser4_get_parent,
  12055. + reiser4_get_neighbor, ...) check some of these values themselves and, for
  12056. + instance, stop flushing instead of resulting in a restart. If any of these
  12057. + results are true error conditions then flush will go into a busy-loop, as we
  12058. + noticed during testing when a corrupt tree caused find_child_ptr to return
  12059. + ENOENT. It needs careful thought and testing of corner conditions.
  12060. +*/
  12061. +/* D. Atomicity of flush_prep against deletion and flush concurrency. Suppose a
  12062. + created block is assigned a block number then early-flushed to disk. It is
  12063. + dirtied again and flush is called again. Concurrently, that block is deleted,
  12064. + and the de-allocation of its block number does not need to be deferred, since
  12065. + it is not part of the preserve set (i.e., it didn't exist before the
  12066. + transaction). I think there may be a race condition where flush writes the
  12067. + dirty, created block after the non-deferred deallocated block number is
  12068. + re-allocated, making it possible to write deleted data on top of non-deleted
  12069. + data. Its just a theory, but it needs to be thought out. */
  12070. +/* F. bio_alloc() failure is not handled gracefully. */
  12071. +/* G. Unallocated children. */
  12072. +/* H. Add a WANDERED_LIST to the atom to clarify the placement of wandered
  12073. + blocks. */
  12074. +/* I. Rename flush-scan to scan-point, (flush-pos to flush-point?) */
  12075. +
  12076. +/* JNODE_FLUSH: MAIN ENTRY POINT */
  12077. +/* This is the main entry point for flushing a jnode and its dirty neighborhood
  12078. + (dirty neighborhood is named "slum"). Jnode_flush() is called if reiser4 has
  12079. + to write dirty blocks to disk, it happens when Linux VM decides to reduce
  12080. + number of dirty pages or as a part of transaction commit.
  12081. +
  12082. + Our objective here is to prep and flush the slum the jnode belongs to. We
  12083. + want to squish the slum together, and allocate the nodes in it as we squish
  12084. + because allocation of children affects squishing of parents.
  12085. +
  12086. + The "argument" @node tells flush where to start. From there, flush finds the
  12087. + left edge of the slum, and calls squalloc (in which nodes are squeezed and
  12088. + allocated). To find a "better place" to start squalloc first we perform a
  12089. + flush_scan.
  12090. +
  12091. + Flush-scanning may be performed in both left and right directions, but for
  12092. + different purposes. When scanning to the left, we are searching for a node
  12093. + that precedes a sequence of parent-first-ordered nodes which we will then
  12094. + flush in parent-first order. During flush-scanning, we also take the
  12095. + opportunity to count the number of consecutive leaf nodes. If this number is
  12096. + past some threshold (FLUSH_RELOCATE_THRESHOLD), then we make a decision to
  12097. + reallocate leaf nodes (thus favoring write-optimization).
  12098. +
  12099. + Since the flush argument node can be anywhere in a sequence of dirty leaves,
  12100. + there may also be dirty nodes to the right of the argument. If the scan-left
  12101. + operation does not count at least FLUSH_RELOCATE_THRESHOLD nodes then we
  12102. + follow it with a right-scan operation to see whether there is, in fact,
  12103. + enough nodes to meet the relocate threshold. Each right- and left-scan
  12104. + operation uses a single flush_scan object.
  12105. +
  12106. + After left-scan and possibly right-scan, we prepare a flush_position object
  12107. + with the starting flush point or parent coordinate, which was determined
  12108. + using scan-left.
  12109. +
  12110. + Next we call the main flush routine, squalloc, which iterates along the leaf
  12111. + level, squeezing and allocating nodes (and placing them into the flush
  12112. + queue).
  12113. +
  12114. + After squalloc returns we take extra steps to ensure that all the children
  12115. + of the final twig node are allocated--this involves repeating squalloc
  12116. + until we finish at a twig with no unallocated children.
  12117. +
  12118. + Finally, we call flush_empty_queue to submit write-requests to disk. If we
  12119. + encounter any above-twig nodes during flush_empty_queue that still have
  12120. + unallocated children, we flush_unprep them.
  12121. +
  12122. + Flush treats several "failure" cases as non-failures, essentially causing
  12123. + them to start over. E_DEADLOCK is one example.
  12124. + FIXME:(C) EINVAL, E_NO_NEIGHBOR, ENOENT: these should probably be handled
  12125. + properly rather than restarting, but there are a bunch of cases to audit.
  12126. +*/
  12127. +
  12128. +static int
  12129. +jnode_flush(jnode * node, long nr_to_write, long *nr_written,
  12130. + flush_queue_t *fq, int flags)
  12131. +{
  12132. + long ret = 0;
  12133. + flush_scan *right_scan;
  12134. + flush_scan *left_scan;
  12135. + flush_pos_t *flush_pos;
  12136. + int todo;
  12137. + struct super_block *sb;
  12138. + reiser4_super_info_data *sbinfo;
  12139. + jnode *leftmost_in_slum = NULL;
  12140. +
  12141. + assert("jmacd-76619", lock_stack_isclean(get_current_lock_stack()));
  12142. + assert("nikita-3022", reiser4_schedulable());
  12143. +
  12144. + assert("nikita-3185",
  12145. + get_current_super_private()->delete_mutex_owner != current);
  12146. +
  12147. + /* allocate right_scan, left_scan and flush_pos */
  12148. + right_scan =
  12149. + kmalloc(2 * sizeof(*right_scan) + sizeof(*flush_pos),
  12150. + reiser4_ctx_gfp_mask_get());
  12151. + if (right_scan == NULL)
  12152. + return RETERR(-ENOMEM);
  12153. + left_scan = right_scan + 1;
  12154. + flush_pos = (flush_pos_t *) (left_scan + 1);
  12155. +
  12156. + sb = reiser4_get_current_sb();
  12157. + sbinfo = get_super_private(sb);
  12158. +
  12159. + /* Flush-concurrency debug code */
  12160. +#if REISER4_DEBUG
  12161. + atomic_inc(&flush_cnt);
  12162. +#endif
  12163. +
  12164. + reiser4_enter_flush(sb);
  12165. +
  12166. + /* Initialize a flush position. */
  12167. + pos_init(flush_pos);
  12168. +
  12169. + flush_pos->nr_written = nr_written;
  12170. + flush_pos->fq = fq;
  12171. + flush_pos->flags = flags;
  12172. + flush_pos->nr_to_write = nr_to_write;
  12173. +
  12174. + scan_init(right_scan);
  12175. + scan_init(left_scan);
  12176. +
  12177. + /* First scan left and remember the leftmost scan position. If the
  12178. + leftmost position is unformatted we remember its parent_coord. We
  12179. + scan until counting FLUSH_SCAN_MAXNODES.
  12180. +
  12181. + If starting @node is unformatted, at the beginning of left scan its
  12182. + parent (twig level node, containing extent item) will be long term
  12183. + locked and lock handle will be stored in the
  12184. + @right_scan->parent_lock. This lock is used to start the rightward
  12185. + scan without redoing the tree traversal (necessary to find parent)
  12186. + and, hence, is kept during leftward scan. As a result, we have to
  12187. + use try-lock when taking long term locks during the leftward scan.
  12188. + */
  12189. + ret = scan_left(left_scan, right_scan,
  12190. + node, sbinfo->flush.scan_maxnodes);
  12191. + if (ret != 0)
  12192. + goto failed;
  12193. +
  12194. + leftmost_in_slum = jref(left_scan->node);
  12195. + scan_done(left_scan);
  12196. +
  12197. + /* Then possibly go right to decide if we will use a policy of
  12198. + relocating leaves. This is only done if we did not scan past (and
  12199. + count) enough nodes during the leftward scan. If we do scan right,
  12200. + we only care to go far enough to establish that at least
  12201. + FLUSH_RELOCATE_THRESHOLD number of nodes are being flushed. The scan
  12202. + limit is the difference between left_scan.count and the threshold. */
  12203. +
  12204. + todo = sbinfo->flush.relocate_threshold - left_scan->count;
  12205. + /* scan right is inherently deadlock prone, because we are
  12206. + * (potentially) holding a lock on the twig node at this moment.
  12207. + * FIXME: this is incorrect comment: lock is not held */
  12208. + if (todo > 0) {
  12209. + ret = scan_right(right_scan, node, (unsigned)todo);
  12210. + if (ret != 0)
  12211. + goto failed;
  12212. + }
  12213. +
  12214. + /* Only the right-scan count is needed, release any rightward locks
  12215. + right away. */
  12216. + scan_done(right_scan);
  12217. +
  12218. + /* ... and the answer is: we should relocate leaf nodes if at least
  12219. + FLUSH_RELOCATE_THRESHOLD nodes were found. */
  12220. + flush_pos->leaf_relocate = JF_ISSET(node, JNODE_REPACK) ||
  12221. + (left_scan->count + right_scan->count >=
  12222. + sbinfo->flush.relocate_threshold);
  12223. +
  12224. + /* Funny business here. We set the 'point' in the flush_position at
  12225. + prior to starting squalloc regardless of whether the first point is
  12226. + formatted or unformatted. Without this there would be an invariant,
  12227. + in the rest of the code, that if the flush_position is unformatted
  12228. + then flush_position->point is NULL and
  12229. + flush_position->parent_{lock,coord} is set, and if the flush_position
  12230. + is formatted then flush_position->point is non-NULL and no parent
  12231. + info is set.
  12232. +
  12233. + This seems lazy, but it makes the initial calls to
  12234. + reverse_allocate (which ask "is it the pos->point the leftmost
  12235. + child of its parent") much easier because we know the first child
  12236. + already. Nothing is broken by this, but the reasoning is subtle.
  12237. + Holding an extra reference on a jnode during flush can cause us to
  12238. + see nodes with HEARD_BANSHEE during squalloc, because nodes are not
  12239. + removed from sibling lists until they have zero reference count.
  12240. + Flush would never observe a HEARD_BANSHEE node on the left-edge of
  12241. + flush, nodes are only deleted to the right. So if nothing is broken,
  12242. + why fix it?
  12243. +
  12244. + NOTE-NIKITA actually, flush can meet HEARD_BANSHEE node at any
  12245. + point and in any moment, because of the concurrent file system
  12246. + activity (for example, truncate). */
  12247. +
  12248. + /* Check jnode state after flush_scan completed. Having a lock on this
  12249. + node or its parent (in case of unformatted) helps us in case of
  12250. + concurrent flushing. */
  12251. + if (jnode_check_flushprepped(leftmost_in_slum)
  12252. + && !jnode_convertible(leftmost_in_slum)) {
  12253. + ret = 0;
  12254. + goto failed;
  12255. + }
  12256. +
  12257. + /* Now setup flush_pos using scan_left's endpoint. */
  12258. + ret = prepare_flush_pos(flush_pos, leftmost_in_slum);
  12259. + if (ret)
  12260. + goto failed;
  12261. +
  12262. + if (znode_get_level(flush_pos->coord.node) == LEAF_LEVEL
  12263. + && node_is_empty(flush_pos->coord.node)) {
  12264. + znode *empty = flush_pos->coord.node;
  12265. +
  12266. + assert("zam-1022", !ZF_ISSET(empty, JNODE_HEARD_BANSHEE));
  12267. + ret = delete_empty_node(empty);
  12268. + goto failed;
  12269. + }
  12270. +
  12271. + if (jnode_check_flushprepped(leftmost_in_slum)
  12272. + && !jnode_convertible(leftmost_in_slum)) {
  12273. + ret = 0;
  12274. + goto failed;
  12275. + }
  12276. +
  12277. + /* Set pos->preceder and (re)allocate pos and its ancestors if it is
  12278. + needed */
  12279. + ret = alloc_pos_and_ancestors(flush_pos);
  12280. + if (ret)
  12281. + goto failed;
  12282. +
  12283. + /* Do the main rightward-bottom-up squeeze and allocate loop. */
  12284. + ret = squalloc(flush_pos);
  12285. + pos_stop(flush_pos);
  12286. + if (ret)
  12287. + goto failed;
  12288. +
  12289. + /* FIXME_NFQUCMPD: Here, handle the twig-special case for unallocated
  12290. + children. First, the pos_stop() and pos_valid() routines should be
  12291. + modified so that pos_stop() sets a flush_position->stop flag to 1
  12292. + without releasing the current position immediately--instead release
  12293. + it in pos_done(). This is a better implementation than the current
  12294. + one anyway.
  12295. +
  12296. + It is not clear that all fields of the flush_position should not be
  12297. + released, but at the very least the parent_lock, parent_coord, and
  12298. + parent_load should remain held because they are hold the last twig
  12299. + when pos_stop() is called.
  12300. +
  12301. + When we reach this point in the code, if the parent_coord is set to
  12302. + after the last item then we know that flush reached the end of a twig
  12303. + (and according to the new flush queueing design, we will return now).
  12304. + If parent_coord is not past the last item, we should check if the
  12305. + current twig has any unallocated children to the right (we are not
  12306. + concerned with unallocated children to the left--in that case the
  12307. + twig itself should not have been allocated). If the twig has
  12308. + unallocated children to the right, set the parent_coord to that
  12309. + position and then repeat the call to squalloc.
  12310. +
  12311. + Testing for unallocated children may be defined in two ways: if any
  12312. + internal item has a fake block number, it is unallocated; if any
  12313. + extent item is unallocated then all of its children are unallocated.
  12314. + But there is a more aggressive approach: if there are any dirty
  12315. + children of the twig to the right of the current position, we may
  12316. + wish to relocate those nodes now. Checking for potential relocation
  12317. + is more expensive as it requires knowing whether there are any dirty
  12318. + children that are not unallocated. The extent_needs_allocation should
  12319. + be used after setting the correct preceder.
  12320. +
  12321. + When we reach the end of a twig at this point in the code, if the
  12322. + flush can continue (when the queue is ready) it will need some
  12323. + information on the future starting point. That should be stored away
  12324. + in the flush_handle using a seal, I believe. Holding a jref() on the
  12325. + future starting point may break other code that deletes that node.
  12326. + */
  12327. +
  12328. + /* FIXME_NFQUCMPD: Also, we don't want to do any flushing when flush is
  12329. + called above the twig level. If the VM calls flush above the twig
  12330. + level, do nothing and return (but figure out why this happens). The
  12331. + txnmgr should be modified to only flush its leaf-level dirty list.
  12332. + This will do all the necessary squeeze and allocate steps but leave
  12333. + unallocated branches and possibly unallocated twigs (when the twig's
  12334. + leftmost child is not dirty). After flushing the leaf level, the
  12335. + remaining unallocated nodes should be given write-optimized
  12336. + locations. (Possibly, the remaining unallocated twigs should be
  12337. + allocated just before their leftmost child.)
  12338. + */
  12339. +
  12340. + /* Any failure reaches this point. */
  12341. +failed:
  12342. +
  12343. + switch (ret) {
  12344. + case -E_REPEAT:
  12345. + case -EINVAL:
  12346. + case -E_DEADLOCK:
  12347. + case -E_NO_NEIGHBOR:
  12348. + case -ENOENT:
  12349. + /* FIXME(C): Except for E_DEADLOCK, these should probably be
  12350. + handled properly in each case. They already are handled in
  12351. + many cases. */
  12352. + /* Something bad happened, but difficult to avoid... Try again!
  12353. + */
  12354. + ret = 0;
  12355. + }
  12356. +
  12357. + if (leftmost_in_slum)
  12358. + jput(leftmost_in_slum);
  12359. +
  12360. + pos_done(flush_pos);
  12361. + scan_done(left_scan);
  12362. + scan_done(right_scan);
  12363. + kfree(right_scan);
  12364. +
  12365. + ON_DEBUG(atomic_dec(&flush_cnt));
  12366. +
  12367. + reiser4_leave_flush(sb);
  12368. +
  12369. + return ret;
  12370. +}
  12371. +
  12372. +/* The reiser4 flush subsystem can be turned into "rapid flush mode" means that
  12373. + * flusher should submit all prepped nodes immediately without keeping them in
  12374. + * flush queues for long time. The reason for rapid flush mode is to free
  12375. + * memory as fast as possible. */
  12376. +
  12377. +#if REISER4_USE_RAPID_FLUSH
  12378. +
  12379. +/**
  12380. + * submit all prepped nodes if rapid flush mode is set,
  12381. + * turn rapid flush mode off.
  12382. + */
  12383. +
  12384. +static int rapid_flush(flush_pos_t *pos)
  12385. +{
  12386. + if (!wbq_available())
  12387. + return 0;
  12388. +
  12389. + return write_prepped_nodes(pos);
  12390. +}
  12391. +
  12392. +#else
  12393. +
  12394. +#define rapid_flush(pos) (0)
  12395. +
  12396. +#endif /* REISER4_USE_RAPID_FLUSH */
  12397. +
  12398. +static jnode *find_flush_start_jnode(jnode *start, txn_atom * atom,
  12399. + flush_queue_t *fq, int *nr_queued,
  12400. + int flags)
  12401. +{
  12402. + jnode * node;
  12403. +
  12404. + if (start != NULL) {
  12405. + spin_lock_jnode(start);
  12406. + if (!jnode_is_flushprepped(start)) {
  12407. + assert("zam-1056", start->atom == atom);
  12408. + node = start;
  12409. + goto enter;
  12410. + }
  12411. + spin_unlock_jnode(start);
  12412. + }
  12413. + /*
  12414. + * In this loop we process all already prepped (RELOC or OVRWR) and
  12415. + * dirtied again nodes. The atom spin lock is not released until all
  12416. + * dirty nodes processed or not prepped node found in the atom dirty
  12417. + * lists.
  12418. + */
  12419. + while ((node = find_first_dirty_jnode(atom, flags))) {
  12420. + spin_lock_jnode(node);
  12421. +enter:
  12422. + assert("zam-881", JF_ISSET(node, JNODE_DIRTY));
  12423. + assert("zam-898", !JF_ISSET(node, JNODE_OVRWR));
  12424. +
  12425. + if (JF_ISSET(node, JNODE_WRITEBACK)) {
  12426. + /* move node to the end of atom's writeback list */
  12427. + list_move_tail(&node->capture_link, ATOM_WB_LIST(atom));
  12428. +
  12429. + /*
  12430. + * jnode is not necessarily on dirty list: if it was
  12431. + * dirtied when it was on flush queue - it does not get
  12432. + * moved to dirty list
  12433. + */
  12434. + ON_DEBUG(count_jnode(atom, node, NODE_LIST(node),
  12435. + WB_LIST, 1));
  12436. +
  12437. + } else if (jnode_is_znode(node)
  12438. + && znode_above_root(JZNODE(node))) {
  12439. + /*
  12440. + * A special case for znode-above-root. The above-root
  12441. + * (fake) znode is captured and dirtied when the tree
  12442. + * height changes or when the root node is relocated.
  12443. + * This causes atoms to fuse so that changes at the root
  12444. + * are serialized. However, this node is never flushed.
  12445. + * This special case used to be in lock.c to prevent the
  12446. + * above-root node from ever being captured, but now
  12447. + * that it is captured we simply prevent it from
  12448. + * flushing. The log-writer code relies on this to
  12449. + * properly log superblock modifications of the tree
  12450. + * height.
  12451. + */
  12452. + jnode_make_wander_nolock(node);
  12453. + } else if (JF_ISSET(node, JNODE_RELOC)) {
  12454. + queue_jnode(fq, node);
  12455. + ++(*nr_queued);
  12456. + } else
  12457. + break;
  12458. +
  12459. + spin_unlock_jnode(node);
  12460. + }
  12461. + return node;
  12462. +}
  12463. +
  12464. +/* Flush some nodes of current atom, usually slum, return -E_REPEAT if there are
  12465. + * more nodes to flush, return 0 if atom's dirty lists empty and keep current
  12466. + * atom locked, return other errors as they are. */
  12467. +int
  12468. +flush_current_atom(int flags, long nr_to_write, long *nr_submitted,
  12469. + txn_atom ** atom, jnode *start)
  12470. +{
  12471. + reiser4_super_info_data *sinfo = get_current_super_private();
  12472. + flush_queue_t *fq = NULL;
  12473. + jnode *node;
  12474. + int nr_queued;
  12475. + int ret;
  12476. +
  12477. + assert("zam-889", atom != NULL && *atom != NULL);
  12478. + assert_spin_locked(&((*atom)->alock));
  12479. + assert("zam-892", get_current_context()->trans->atom == *atom);
  12480. +
  12481. + BUG_ON(rofs_super(get_current_context()->super));
  12482. +
  12483. + nr_to_write = LONG_MAX;
  12484. + while (1) {
  12485. + ret = reiser4_fq_by_atom(*atom, &fq);
  12486. + if (ret != -E_REPEAT)
  12487. + break;
  12488. + *atom = get_current_atom_locked();
  12489. + }
  12490. + if (ret)
  12491. + return ret;
  12492. +
  12493. + assert_spin_locked(&((*atom)->alock));
  12494. +
  12495. + /* parallel flushers limit */
  12496. + if (sinfo->tmgr.atom_max_flushers != 0) {
  12497. + while ((*atom)->nr_flushers >= sinfo->tmgr.atom_max_flushers) {
  12498. + /* An reiser4_atom_send_event() call is inside
  12499. + reiser4_fq_put_nolock() which is called when flush is
  12500. + finished and nr_flushers is decremented. */
  12501. + reiser4_atom_wait_event(*atom);
  12502. + *atom = get_current_atom_locked();
  12503. + }
  12504. + }
  12505. +
  12506. + /* count ourself as a flusher */
  12507. + (*atom)->nr_flushers++;
  12508. +
  12509. + writeout_mode_enable();
  12510. +
  12511. + nr_queued = 0;
  12512. + node = find_flush_start_jnode(start, *atom, fq, &nr_queued, flags);
  12513. +
  12514. + if (node == NULL) {
  12515. + if (nr_queued == 0) {
  12516. + (*atom)->nr_flushers--;
  12517. + reiser4_fq_put_nolock(fq);
  12518. + reiser4_atom_send_event(*atom);
  12519. + /* current atom remains locked */
  12520. + writeout_mode_disable();
  12521. + return 0;
  12522. + }
  12523. + spin_unlock_atom(*atom);
  12524. + } else {
  12525. + jref(node);
  12526. + BUG_ON((*atom)->super != node->tree->super);
  12527. + spin_unlock_atom(*atom);
  12528. + spin_unlock_jnode(node);
  12529. + BUG_ON(nr_to_write == 0);
  12530. + ret = jnode_flush(node, nr_to_write, nr_submitted, fq, flags);
  12531. + jput(node);
  12532. + }
  12533. +
  12534. + ret =
  12535. + reiser4_write_fq(fq, nr_submitted,
  12536. + WRITEOUT_SINGLE_STREAM | WRITEOUT_FOR_PAGE_RECLAIM);
  12537. +
  12538. + *atom = get_current_atom_locked();
  12539. + (*atom)->nr_flushers--;
  12540. + reiser4_fq_put_nolock(fq);
  12541. + reiser4_atom_send_event(*atom);
  12542. + spin_unlock_atom(*atom);
  12543. +
  12544. + writeout_mode_disable();
  12545. +
  12546. + if (ret == 0)
  12547. + ret = -E_REPEAT;
  12548. +
  12549. + return ret;
  12550. +}
  12551. +
  12552. +/**
  12553. + * This function calls txmod->reverse_alloc_formatted() to make a
  12554. + * reverse-parent-first relocation decision and then, if yes, it marks
  12555. + * the parent dirty.
  12556. + */
  12557. +static int reverse_allocate_parent(jnode * node,
  12558. + const coord_t *parent_coord,
  12559. + flush_pos_t *pos)
  12560. +{
  12561. + int ret;
  12562. +
  12563. + if (!JF_ISSET(ZJNODE(parent_coord->node), JNODE_DIRTY)) {
  12564. + txmod_plugin *txmod_plug = get_txmod_plugin();
  12565. +
  12566. + if (!txmod_plug->reverse_alloc_formatted)
  12567. + return 0;
  12568. + ret = txmod_plug->reverse_alloc_formatted(node,
  12569. + parent_coord, pos);
  12570. + if (ret < 0)
  12571. + return ret;
  12572. + /*
  12573. + * FIXME-ZAM: if parent is already relocated -
  12574. + * we do not want to grab space, right?
  12575. + */
  12576. + if (ret == 1) {
  12577. + int grabbed;
  12578. +
  12579. + grabbed = get_current_context()->grabbed_blocks;
  12580. + if (reiser4_grab_space_force((__u64) 1, BA_RESERVED) !=
  12581. + 0)
  12582. + reiser4_panic("umka-1250",
  12583. + "No space left during flush.");
  12584. +
  12585. + assert("jmacd-18923",
  12586. + znode_is_write_locked(parent_coord->node));
  12587. + znode_make_dirty(parent_coord->node);
  12588. + grabbed2free_mark(grabbed);
  12589. + }
  12590. + }
  12591. + return 0;
  12592. +}
  12593. +
  12594. +/* INITIAL ALLOCATE ANCESTORS STEP (REVERSE PARENT-FIRST ALLOCATION BEFORE
  12595. + FORWARD PARENT-FIRST LOOP BEGINS) */
  12596. +
  12597. +/* Get the leftmost child for given coord. */
  12598. +static int get_leftmost_child_of_unit(const coord_t *coord, jnode ** child)
  12599. +{
  12600. + int ret;
  12601. +
  12602. + ret = item_utmost_child(coord, LEFT_SIDE, child);
  12603. +
  12604. + if (ret)
  12605. + return ret;
  12606. +
  12607. + if (IS_ERR(*child))
  12608. + return PTR_ERR(*child);
  12609. +
  12610. + return 0;
  12611. +}
  12612. +
  12613. +/* This step occurs after the left- and right-scans are completed, before
  12614. + starting the forward parent-first traversal. Here we attempt to allocate
  12615. + ancestors of the starting flush point, which means continuing in the reverse
  12616. + parent-first direction to the parent, grandparent, and so on (as long as the
  12617. + child is a leftmost child). This routine calls a recursive process,
  12618. + alloc_one_ancestor, which does the real work, except there is special-case
  12619. + handling here for the first ancestor, which may be a twig. At each level
  12620. + (here and alloc_one_ancestor), we check for relocation and then, if the child
  12621. + is a leftmost child, repeat at the next level. On the way back down (the
  12622. + recursion), we allocate the ancestors in parent-first order. */
  12623. +static int alloc_pos_and_ancestors(flush_pos_t *pos)
  12624. +{
  12625. + int ret = 0;
  12626. + lock_handle plock;
  12627. + load_count pload;
  12628. + coord_t pcoord;
  12629. +
  12630. + if (znode_check_flushprepped(pos->lock.node))
  12631. + return 0;
  12632. +
  12633. + coord_init_invalid(&pcoord, NULL);
  12634. + init_lh(&plock);
  12635. + init_load_count(&pload);
  12636. +
  12637. + if (pos->state == POS_ON_EPOINT) {
  12638. + /* a special case for pos on twig level, where we already have
  12639. + a lock on parent node. */
  12640. + /* The parent may not be dirty, in which case we should decide
  12641. + whether to relocate the child now. If decision is made to
  12642. + relocate the child, the parent is marked dirty. */
  12643. + ret = reverse_allocate_parent(pos->child, &pos->coord, pos);
  12644. + if (ret)
  12645. + goto exit;
  12646. +
  12647. + /* FIXME_NFQUCMPD: We only need to allocate the twig (if child
  12648. + is leftmost) and the leaf/child, so recursion is not needed.
  12649. + Levels above the twig will be allocated for
  12650. + write-optimization before the transaction commits. */
  12651. +
  12652. + /* Do the recursive step, allocating zero or more of our
  12653. + * ancestors. */
  12654. + ret = alloc_one_ancestor(&pos->coord, pos);
  12655. +
  12656. + } else {
  12657. + if (!znode_is_root(pos->lock.node)) {
  12658. + /* all formatted nodes except tree root */
  12659. + ret =
  12660. + reiser4_get_parent(&plock, pos->lock.node,
  12661. + ZNODE_WRITE_LOCK);
  12662. + if (ret)
  12663. + goto exit;
  12664. +
  12665. + ret = incr_load_count_znode(&pload, plock.node);
  12666. + if (ret)
  12667. + goto exit;
  12668. +
  12669. + ret =
  12670. + find_child_ptr(plock.node, pos->lock.node, &pcoord);
  12671. + if (ret)
  12672. + goto exit;
  12673. +
  12674. + ret = reverse_allocate_parent(ZJNODE(pos->lock.node),
  12675. + &pcoord,
  12676. + pos);
  12677. + if (ret)
  12678. + goto exit;
  12679. +
  12680. + ret = alloc_one_ancestor(&pcoord, pos);
  12681. + if (ret)
  12682. + goto exit;
  12683. + }
  12684. +
  12685. + ret = allocate_znode(pos->lock.node, &pcoord, pos);
  12686. + }
  12687. +exit:
  12688. + done_load_count(&pload);
  12689. + done_lh(&plock);
  12690. + return ret;
  12691. +}
  12692. +
  12693. +/* This is the recursive step described in alloc_pos_and_ancestors, above.
  12694. + Ignoring the call to set_preceder, which is the next function described, this
  12695. + checks if the child is a leftmost child and returns if it is not. If the
  12696. + child is a leftmost child it checks for relocation, possibly dirtying the
  12697. + parent. Then it performs the recursive step. */
  12698. +static int alloc_one_ancestor(const coord_t *coord, flush_pos_t *pos)
  12699. +{
  12700. + int ret = 0;
  12701. + lock_handle alock;
  12702. + load_count aload;
  12703. + coord_t acoord;
  12704. +
  12705. + /* As we ascend at the left-edge of the region to flush, take this
  12706. + opportunity at the twig level to find our parent-first preceder
  12707. + unless we have already set it. */
  12708. + if (pos->preceder.blk == 0) {
  12709. + ret = set_preceder(coord, pos);
  12710. + if (ret != 0)
  12711. + return ret;
  12712. + }
  12713. +
  12714. + /* If the ancestor is clean or already allocated, or if the child is not
  12715. + a leftmost child, stop going up, even leaving coord->node not
  12716. + flushprepped. */
  12717. + if (znode_check_flushprepped(coord->node)
  12718. + || !coord_is_leftmost_unit(coord))
  12719. + return 0;
  12720. +
  12721. + init_lh(&alock);
  12722. + init_load_count(&aload);
  12723. + coord_init_invalid(&acoord, NULL);
  12724. +
  12725. + /* Only ascend to the next level if it is a leftmost child, but
  12726. + write-lock the parent in case we will relocate the child. */
  12727. + if (!znode_is_root(coord->node)) {
  12728. +
  12729. + ret =
  12730. + jnode_lock_parent_coord(ZJNODE(coord->node), &acoord,
  12731. + &alock, &aload, ZNODE_WRITE_LOCK,
  12732. + 0);
  12733. + if (ret != 0) {
  12734. + /* FIXME(C): check EINVAL, E_DEADLOCK */
  12735. + goto exit;
  12736. + }
  12737. +
  12738. + ret = reverse_allocate_parent(ZJNODE(coord->node),
  12739. + &acoord, pos);
  12740. + if (ret != 0)
  12741. + goto exit;
  12742. +
  12743. + /* Recursive call. */
  12744. + if (!znode_check_flushprepped(acoord.node)) {
  12745. + ret = alloc_one_ancestor(&acoord, pos);
  12746. + if (ret)
  12747. + goto exit;
  12748. + }
  12749. + }
  12750. +
  12751. + /* Note: we call allocate with the parent write-locked (except at the
  12752. + root) in case we relocate the child, in which case it will modify the
  12753. + parent during this call. */
  12754. + ret = allocate_znode(coord->node, &acoord, pos);
  12755. +
  12756. +exit:
  12757. + done_load_count(&aload);
  12758. + done_lh(&alock);
  12759. + return ret;
  12760. +}
  12761. +
  12762. +/* During the reverse parent-first alloc_pos_and_ancestors process described
  12763. + above there is a call to this function at the twig level. During
  12764. + alloc_pos_and_ancestors we may ask: should this node be relocated (in reverse
  12765. + parent-first context)? We repeat this process as long as the child is the
  12766. + leftmost child, eventually reaching an ancestor of the flush point that is
  12767. + not a leftmost child. The preceder of that ancestors, which is not a leftmost
  12768. + child, is actually on the leaf level. The preceder of that block is the
  12769. + left-neighbor of the flush point. The preceder of that block is the rightmost
  12770. + child of the twig on the left. So, when alloc_pos_and_ancestors passes upward
  12771. + through the twig level, it stops momentarily to remember the block of the
  12772. + rightmost child of the twig on the left and sets it to the flush_position's
  12773. + preceder_hint.
  12774. +
  12775. + There is one other place where we may set the flush_position's preceder hint,
  12776. + which is during scan-left.
  12777. +*/
  12778. +static int set_preceder(const coord_t *coord_in, flush_pos_t *pos)
  12779. +{
  12780. + int ret;
  12781. + coord_t coord;
  12782. + lock_handle left_lock;
  12783. + load_count left_load;
  12784. +
  12785. + coord_dup(&coord, coord_in);
  12786. +
  12787. + init_lh(&left_lock);
  12788. + init_load_count(&left_load);
  12789. +
  12790. + /* FIXME(B): Same FIXME as in "Find the preceder" in
  12791. + reverse_allocate. coord_is_leftmost_unit is not the right test
  12792. + if the unformatted child is in the middle of the first extent unit.*/
  12793. + if (!coord_is_leftmost_unit(&coord)) {
  12794. + coord_prev_unit(&coord);
  12795. + } else {
  12796. + ret =
  12797. + reiser4_get_left_neighbor(&left_lock, coord.node,
  12798. + ZNODE_READ_LOCK, GN_SAME_ATOM);
  12799. + if (ret) {
  12800. + /* If we fail for any reason it doesn't matter because
  12801. + the preceder is only a hint. We are low-priority at
  12802. + this point, so this must be the case. */
  12803. + if (ret == -E_REPEAT || ret == -E_NO_NEIGHBOR ||
  12804. + ret == -ENOENT || ret == -EINVAL
  12805. + || ret == -E_DEADLOCK)
  12806. + ret = 0;
  12807. + goto exit;
  12808. + }
  12809. +
  12810. + ret = incr_load_count_znode(&left_load, left_lock.node);
  12811. + if (ret)
  12812. + goto exit;
  12813. +
  12814. + coord_init_last_unit(&coord, left_lock.node);
  12815. + }
  12816. +
  12817. + ret =
  12818. + item_utmost_child_real_block(&coord, RIGHT_SIDE,
  12819. + &pos->preceder.blk);
  12820. +exit:
  12821. + check_preceder(pos->preceder.blk);
  12822. + done_load_count(&left_load);
  12823. + done_lh(&left_lock);
  12824. + return ret;
  12825. +}
  12826. +
  12827. +/* MAIN SQUEEZE AND ALLOCATE LOOP (THREE BIG FUNCTIONS) */
  12828. +
  12829. +/* This procedure implements the outer loop of the flush algorithm. To put this
  12830. + in context, here is the general list of steps taken by the flush routine as a
  12831. + whole:
  12832. +
  12833. + 1. Scan-left
  12834. + 2. Scan-right (maybe)
  12835. + 3. Allocate initial flush position and its ancestors
  12836. + 4. <handle extents>
  12837. + 5. <squeeze and next position and its ancestors to-the-right,
  12838. + then update position to-the-right>
  12839. + 6. <repeat from #4 until flush is stopped>
  12840. +
  12841. + This procedure implements the loop in steps 4 through 6 in the above listing.
  12842. +
  12843. + Step 4: if the current flush position is an extent item (position on the twig
  12844. + level), it allocates the extent (allocate_extent_item_in_place) then shifts
  12845. + to the next coordinate. If the next coordinate's leftmost child needs
  12846. + flushprep, we will continue. If the next coordinate is an internal item, we
  12847. + descend back to the leaf level, otherwise we repeat a step #4 (labeled
  12848. + ALLOC_EXTENTS below). If the "next coordinate" brings us past the end of the
  12849. + twig level, then we call reverse_relocate_end_of_twig to possibly dirty the
  12850. + next (right) twig, prior to step #5 which moves to the right.
  12851. +
  12852. + Step 5: calls squalloc_changed_ancestors, which initiates a recursive call up
  12853. + the tree to allocate any ancestors of the next-right flush position that are
  12854. + not also ancestors of the current position. Those ancestors (in top-down
  12855. + order) are the next in parent-first order. We squeeze adjacent nodes on the
  12856. + way up until the right node and current node share the same parent, then
  12857. + allocate on the way back down. Finally, this step sets the flush position to
  12858. + the next-right node. Then repeat steps 4 and 5.
  12859. +*/
  12860. +
  12861. +/* SQUEEZE CODE */
  12862. +
  12863. +/* squalloc_right_twig helper function, cut a range of extent items from
  12864. + cut node to->node from the beginning up to coord @to. */
  12865. +static int squalloc_right_twig_cut(coord_t *to, reiser4_key * to_key,
  12866. + znode * left)
  12867. +{
  12868. + coord_t from;
  12869. + reiser4_key from_key;
  12870. +
  12871. + coord_init_first_unit(&from, to->node);
  12872. + item_key_by_coord(&from, &from_key);
  12873. +
  12874. + return cut_node_content(&from, to, &from_key, to_key, NULL);
  12875. +}
  12876. +
  12877. +/* Copy as much of the leading extents from @right to @left, allocating
  12878. + unallocated extents as they are copied. Returns SQUEEZE_TARGET_FULL or
  12879. + SQUEEZE_SOURCE_EMPTY when no more can be shifted. If the next item is an
  12880. + internal item it calls shift_one_internal_unit and may then return
  12881. + SUBTREE_MOVED. */
  12882. +static int squeeze_right_twig(znode * left, znode * right, flush_pos_t *pos)
  12883. +{
  12884. + int ret = SUBTREE_MOVED;
  12885. + coord_t coord; /* used to iterate over items */
  12886. + reiser4_key stop_key;
  12887. + reiser4_tree *tree;
  12888. + txmod_plugin *txmod_plug = get_txmod_plugin();
  12889. +
  12890. + assert("jmacd-2008", !node_is_empty(right));
  12891. + coord_init_first_unit(&coord, right);
  12892. +
  12893. + /* FIXME: can be optimized to cut once */
  12894. + while (!node_is_empty(coord.node) && item_is_extent(&coord)) {
  12895. + ON_DEBUG(void *vp);
  12896. +
  12897. + assert("vs-1468", coord_is_leftmost_unit(&coord));
  12898. + ON_DEBUG(vp = shift_check_prepare(left, coord.node));
  12899. +
  12900. + /* stop_key is used to find what was copied and what to cut */
  12901. + stop_key = *reiser4_min_key();
  12902. + ret = txmod_plug->squeeze_alloc_unformatted(left,
  12903. + &coord, pos,
  12904. + &stop_key);
  12905. + if (ret != SQUEEZE_CONTINUE) {
  12906. + ON_DEBUG(kfree(vp));
  12907. + break;
  12908. + }
  12909. + assert("vs-1465", !keyeq(&stop_key, reiser4_min_key()));
  12910. +
  12911. + /* Helper function to do the cutting. */
  12912. + set_key_offset(&stop_key, get_key_offset(&stop_key) - 1);
  12913. + check_me("vs-1466",
  12914. + squalloc_right_twig_cut(&coord, &stop_key, left) == 0);
  12915. +
  12916. + ON_DEBUG(shift_check(vp, left, coord.node));
  12917. + }
  12918. + /*
  12919. + * @left and @right nodes participated in the
  12920. + * implicit shift, determined by the pair of
  12921. + * functions:
  12922. + * . squalloc_extent() - append units to the @left
  12923. + * . squalloc_right_twig_cut() - cut the units from @right
  12924. + * so update their delimiting keys
  12925. + */
  12926. + tree = znode_get_tree(left);
  12927. + write_lock_dk(tree);
  12928. + update_znode_dkeys(left, right);
  12929. + write_unlock_dk(tree);
  12930. +
  12931. + if (node_is_empty(coord.node))
  12932. + ret = SQUEEZE_SOURCE_EMPTY;
  12933. +
  12934. + if (ret == SQUEEZE_TARGET_FULL)
  12935. + goto out;
  12936. +
  12937. + if (node_is_empty(right)) {
  12938. + /* The whole right node was copied into @left. */
  12939. + assert("vs-464", ret == SQUEEZE_SOURCE_EMPTY);
  12940. + goto out;
  12941. + }
  12942. +
  12943. + coord_init_first_unit(&coord, right);
  12944. +
  12945. + if (!item_is_internal(&coord)) {
  12946. + /* we do not want to squeeze anything else to left neighbor
  12947. + because "slum" is over */
  12948. + ret = SQUEEZE_TARGET_FULL;
  12949. + goto out;
  12950. + }
  12951. + assert("jmacd-433", item_is_internal(&coord));
  12952. +
  12953. + /* Shift an internal unit. The child must be allocated before shifting
  12954. + any more extents, so we stop here. */
  12955. + ret = shift_one_internal_unit(left, right);
  12956. +
  12957. +out:
  12958. + assert("jmacd-8612", ret < 0 || ret == SQUEEZE_TARGET_FULL
  12959. + || ret == SUBTREE_MOVED || ret == SQUEEZE_SOURCE_EMPTY);
  12960. +
  12961. + if (ret == SQUEEZE_TARGET_FULL) {
  12962. + /* We submit prepped nodes here and expect that this @left twig
  12963. + * will not be modified again during this jnode_flush() call. */
  12964. + int ret1;
  12965. +
  12966. + /* NOTE: seems like io is done under long term locks. */
  12967. + ret1 = write_prepped_nodes(pos);
  12968. + if (ret1 < 0)
  12969. + return ret1;
  12970. + }
  12971. +
  12972. + return ret;
  12973. +}
  12974. +
  12975. +#if REISER4_DEBUG
  12976. +static void item_convert_invariant(flush_pos_t *pos)
  12977. +{
  12978. + assert("edward-1225", coord_is_existing_item(&pos->coord));
  12979. + if (convert_data_attached(pos)) {
  12980. + item_plugin *iplug = item_convert_plug(pos);
  12981. +
  12982. + assert("edward-1000",
  12983. + iplug == item_plugin_by_coord(&pos->coord));
  12984. + assert("edward-1001", iplug->f.convert != NULL);
  12985. + } else
  12986. + assert("edward-1226", pos->child == NULL);
  12987. +}
  12988. +#else
  12989. +
  12990. +#define item_convert_invariant(pos) noop
  12991. +
  12992. +#endif
  12993. +
  12994. +/*
  12995. + * Scan all node's items and apply for each one
  12996. + * its ->convert() method. This method may:
  12997. + * . resize the item;
  12998. + * . kill the item;
  12999. + * . insert a group of items/nodes on the right,
  13000. + * which possess the following properties:
  13001. + * . all new nodes are dirty and not convertible;
  13002. + * . for all new items ->convert() method is a noop.
  13003. + *
  13004. + * NOTE: this function makes the tree unbalanced!
  13005. + * This intended to be used by flush squalloc() in a
  13006. + * combination with squeeze procedure.
  13007. + *
  13008. + * GLOSSARY
  13009. + *
  13010. + * Chained nodes and items.
  13011. + * Two neighboring nodes @left and @right are chained,
  13012. + * iff the last item of @left and the first item of @right
  13013. + * belong to the same item cluster. In this case those
  13014. + * items are called chained.
  13015. + */
  13016. +static int convert_node(flush_pos_t *pos, znode * node)
  13017. +{
  13018. + int ret = 0;
  13019. + item_plugin *iplug;
  13020. + assert("edward-304", pos != NULL);
  13021. + assert("edward-305", pos->child == NULL);
  13022. + assert("edward-475", znode_convertible(node));
  13023. + assert("edward-669", znode_is_wlocked(node));
  13024. + assert("edward-1210", !node_is_empty(node));
  13025. +
  13026. + if (znode_get_level(node) != LEAF_LEVEL)
  13027. + /* unsupported */
  13028. + goto exit;
  13029. +
  13030. + coord_init_first_unit(&pos->coord, node);
  13031. +
  13032. + while (1) {
  13033. + ret = 0;
  13034. + coord_set_to_left(&pos->coord);
  13035. + item_convert_invariant(pos);
  13036. +
  13037. + iplug = item_plugin_by_coord(&pos->coord);
  13038. + assert("edward-844", iplug != NULL);
  13039. +
  13040. + if (iplug->f.convert) {
  13041. + ret = iplug->f.convert(pos);
  13042. + if (ret)
  13043. + goto exit;
  13044. + }
  13045. + assert("edward-307", pos->child == NULL);
  13046. +
  13047. + if (coord_next_item(&pos->coord)) {
  13048. + /*
  13049. + * node is over
  13050. + */
  13051. + if (convert_data_attached(pos))
  13052. + /*
  13053. + * the last item was convertible and
  13054. + * there still is an unprocesssed flow
  13055. + */
  13056. + if (next_node_is_chained(pos)) {
  13057. + /*
  13058. + * next node contains items of
  13059. + * the same disk cluster,
  13060. + * so finish with this node
  13061. + */
  13062. + update_chaining_state(pos, 0/* move
  13063. + to next
  13064. + node */);
  13065. + break;
  13066. + }
  13067. + else {
  13068. + /*
  13069. + * perform one more iteration
  13070. + * for the same item and the
  13071. + * rest of flow
  13072. + */
  13073. + update_chaining_state(pos, 1/* this
  13074. + node */);
  13075. + }
  13076. + else
  13077. + /*
  13078. + * the last item wasn't convertible, or
  13079. + * convert date was detached in the last
  13080. + * iteration,
  13081. + * go to next node
  13082. + */
  13083. + break;
  13084. + } else {
  13085. + /*
  13086. + * Node is not over, item position got decremented.
  13087. + */
  13088. + if (convert_data_attached(pos)) {
  13089. + /*
  13090. + * disk cluster should be increased, so roll
  13091. + * one item position back and perform the
  13092. + * iteration with the previous item and the
  13093. + * rest of attached data
  13094. + */
  13095. + if (iplug != item_plugin_by_coord(&pos->coord))
  13096. + set_item_convert_count(pos, 0);
  13097. +
  13098. + ret = coord_prev_item(&pos->coord);
  13099. + assert("edward-1003", !ret);
  13100. +
  13101. + update_chaining_state(pos, 1/* this node */);
  13102. + }
  13103. + else
  13104. + /*
  13105. + * previous item was't convertible, or
  13106. + * convert date was detached in the last
  13107. + * iteration, go to next item
  13108. + */
  13109. + ;
  13110. + }
  13111. + }
  13112. + JF_CLR(ZJNODE(node), JNODE_CONVERTIBLE);
  13113. + znode_make_dirty(node);
  13114. +exit:
  13115. + assert("edward-1004", !ret);
  13116. + return ret;
  13117. +}
  13118. +
  13119. +/* Squeeze and allocate the right neighbor. This is called after @left and
  13120. + its current children have been squeezed and allocated already. This
  13121. + procedure's job is to squeeze and items from @right to @left.
  13122. +
  13123. + If at the leaf level, use the shift_everything_left memcpy-optimized
  13124. + version of shifting (squeeze_right_leaf).
  13125. +
  13126. + If at the twig level, extents are allocated as they are shifted from @right
  13127. + to @left (squalloc_right_twig).
  13128. +
  13129. + At any other level, shift one internal item and return to the caller
  13130. + (squalloc_parent_first) so that the shifted-subtree can be processed in
  13131. + parent-first order.
  13132. +
  13133. + When unit of internal item is moved, squeezing stops and SUBTREE_MOVED is
  13134. + returned. When all content of @right is squeezed, SQUEEZE_SOURCE_EMPTY is
  13135. + returned. If nothing can be moved into @left anymore, SQUEEZE_TARGET_FULL
  13136. + is returned.
  13137. +*/
  13138. +
  13139. +static int squeeze_right_neighbor(flush_pos_t *pos, znode * left,
  13140. + znode * right)
  13141. +{
  13142. + int ret;
  13143. +
  13144. + /* FIXME it is possible to see empty hasn't-heard-banshee node in a
  13145. + * tree owing to error (for example, ENOSPC) in write */
  13146. + /* assert("jmacd-9321", !node_is_empty(left)); */
  13147. + assert("jmacd-9322", !node_is_empty(right));
  13148. + assert("jmacd-9323", znode_get_level(left) == znode_get_level(right));
  13149. +
  13150. + switch (znode_get_level(left)) {
  13151. + case TWIG_LEVEL:
  13152. + /* Shift with extent allocating until either an internal item
  13153. + is encountered or everything is shifted or no free space
  13154. + left in @left */
  13155. + ret = squeeze_right_twig(left, right, pos);
  13156. + break;
  13157. +
  13158. + default:
  13159. + /* All other levels can use shift_everything until we implement
  13160. + per-item flush plugins. */
  13161. + ret = squeeze_right_non_twig(left, right);
  13162. + break;
  13163. + }
  13164. +
  13165. + assert("jmacd-2011", (ret < 0 ||
  13166. + ret == SQUEEZE_SOURCE_EMPTY
  13167. + || ret == SQUEEZE_TARGET_FULL
  13168. + || ret == SUBTREE_MOVED));
  13169. + return ret;
  13170. +}
  13171. +
  13172. +static int squeeze_right_twig_and_advance_coord(flush_pos_t *pos,
  13173. + znode * right)
  13174. +{
  13175. + int ret;
  13176. +
  13177. + ret = squeeze_right_twig(pos->lock.node, right, pos);
  13178. + if (ret < 0)
  13179. + return ret;
  13180. + if (ret > 0) {
  13181. + coord_init_after_last_item(&pos->coord, pos->lock.node);
  13182. + return ret;
  13183. + }
  13184. +
  13185. + coord_init_last_unit(&pos->coord, pos->lock.node);
  13186. + return 0;
  13187. +}
  13188. +
  13189. +/* forward declaration */
  13190. +static int squalloc_upper_levels(flush_pos_t *, znode *, znode *);
  13191. +
  13192. +/* do a fast check for "same parents" condition before calling
  13193. + * squalloc_upper_levels() */
  13194. +static inline int check_parents_and_squalloc_upper_levels(flush_pos_t *pos,
  13195. + znode * left,
  13196. + znode * right)
  13197. +{
  13198. + if (znode_same_parents(left, right))
  13199. + return 0;
  13200. +
  13201. + return squalloc_upper_levels(pos, left, right);
  13202. +}
  13203. +
  13204. +/* Check whether the parent of given @right node needs to be processes
  13205. + ((re)allocated) prior to processing of the child. If @left and @right do not
  13206. + share at least the parent of the @right is after the @left but before the
  13207. + @right in parent-first order, we have to (re)allocate it before the @right
  13208. + gets (re)allocated. */
  13209. +static int squalloc_upper_levels(flush_pos_t *pos, znode * left, znode * right)
  13210. +{
  13211. + int ret;
  13212. +
  13213. + lock_handle left_parent_lock;
  13214. + lock_handle right_parent_lock;
  13215. +
  13216. + load_count left_parent_load;
  13217. + load_count right_parent_load;
  13218. +
  13219. + init_lh(&left_parent_lock);
  13220. + init_lh(&right_parent_lock);
  13221. +
  13222. + init_load_count(&left_parent_load);
  13223. + init_load_count(&right_parent_load);
  13224. +
  13225. + ret = reiser4_get_parent(&left_parent_lock, left, ZNODE_WRITE_LOCK);
  13226. + if (ret)
  13227. + goto out;
  13228. +
  13229. + ret = reiser4_get_parent(&right_parent_lock, right, ZNODE_WRITE_LOCK);
  13230. + if (ret)
  13231. + goto out;
  13232. +
  13233. + /* Check for same parents */
  13234. + if (left_parent_lock.node == right_parent_lock.node)
  13235. + goto out;
  13236. +
  13237. + if (znode_check_flushprepped(right_parent_lock.node)) {
  13238. + /* Keep parent-first order. In the order, the right parent node
  13239. + stands before the @right node. If it is already allocated,
  13240. + we set the preceder (next block search start point) to its
  13241. + block number, @right node should be allocated after it.
  13242. +
  13243. + However, preceder is set only if the right parent is on twig
  13244. + level. The explanation is the following: new branch nodes are
  13245. + allocated over already allocated children while the tree
  13246. + grows, it is difficult to keep tree ordered, we assume that
  13247. + only leaves and twings are correctly allocated. So, only
  13248. + twigs are used as a preceder for allocating of the rest of
  13249. + the slum. */
  13250. + if (znode_get_level(right_parent_lock.node) == TWIG_LEVEL) {
  13251. + pos->preceder.blk =
  13252. + *znode_get_block(right_parent_lock.node);
  13253. + check_preceder(pos->preceder.blk);
  13254. + }
  13255. + goto out;
  13256. + }
  13257. +
  13258. + ret = incr_load_count_znode(&left_parent_load, left_parent_lock.node);
  13259. + if (ret)
  13260. + goto out;
  13261. +
  13262. + ret = incr_load_count_znode(&right_parent_load, right_parent_lock.node);
  13263. + if (ret)
  13264. + goto out;
  13265. +
  13266. + ret =
  13267. + squeeze_right_neighbor(pos, left_parent_lock.node,
  13268. + right_parent_lock.node);
  13269. + /* We stop if error. We stop if some items/units were shifted (ret == 0)
  13270. + * and thus @right changed its parent. It means we have not process
  13271. + * right_parent node prior to processing of @right. Positive return
  13272. + * values say that shifting items was not happen because of "empty
  13273. + * source" or "target full" conditions. */
  13274. + if (ret <= 0)
  13275. + goto out;
  13276. +
  13277. + /* parent(@left) and parent(@right) may have different parents also. We
  13278. + * do a recursive call for checking that. */
  13279. + ret =
  13280. + check_parents_and_squalloc_upper_levels(pos, left_parent_lock.node,
  13281. + right_parent_lock.node);
  13282. + if (ret)
  13283. + goto out;
  13284. +
  13285. + /* allocate znode when going down */
  13286. + ret = lock_parent_and_allocate_znode(right_parent_lock.node, pos);
  13287. +
  13288. +out:
  13289. + done_load_count(&left_parent_load);
  13290. + done_load_count(&right_parent_load);
  13291. +
  13292. + done_lh(&left_parent_lock);
  13293. + done_lh(&right_parent_lock);
  13294. +
  13295. + return ret;
  13296. +}
  13297. +
  13298. +/* Check the leftmost child "flushprepped" status, also returns true if child
  13299. + * node was not found in cache. */
  13300. +static int leftmost_child_of_unit_check_flushprepped(const coord_t *coord)
  13301. +{
  13302. + int ret;
  13303. + int prepped;
  13304. +
  13305. + jnode *child;
  13306. +
  13307. + ret = get_leftmost_child_of_unit(coord, &child);
  13308. +
  13309. + if (ret)
  13310. + return ret;
  13311. +
  13312. + if (child) {
  13313. + prepped = jnode_check_flushprepped(child);
  13314. + jput(child);
  13315. + } else {
  13316. + /* We consider not existing child as a node which slum
  13317. + processing should not continue to. Not cached node is clean,
  13318. + so it is flushprepped. */
  13319. + prepped = 1;
  13320. + }
  13321. +
  13322. + return prepped;
  13323. +}
  13324. +
  13325. +/* (re)allocate znode with automated getting parent node */
  13326. +static int lock_parent_and_allocate_znode(znode * node, flush_pos_t *pos)
  13327. +{
  13328. + int ret;
  13329. + lock_handle parent_lock;
  13330. + load_count parent_load;
  13331. + coord_t pcoord;
  13332. +
  13333. + assert("zam-851", znode_is_write_locked(node));
  13334. +
  13335. + init_lh(&parent_lock);
  13336. + init_load_count(&parent_load);
  13337. +
  13338. + ret = reiser4_get_parent(&parent_lock, node, ZNODE_WRITE_LOCK);
  13339. + if (ret)
  13340. + goto out;
  13341. +
  13342. + ret = incr_load_count_znode(&parent_load, parent_lock.node);
  13343. + if (ret)
  13344. + goto out;
  13345. +
  13346. + ret = find_child_ptr(parent_lock.node, node, &pcoord);
  13347. + if (ret)
  13348. + goto out;
  13349. +
  13350. + ret = allocate_znode(node, &pcoord, pos);
  13351. +
  13352. +out:
  13353. + done_load_count(&parent_load);
  13354. + done_lh(&parent_lock);
  13355. + return ret;
  13356. +}
  13357. +
  13358. +/*
  13359. + * Process nodes on the leaf level until unformatted node or
  13360. + * rightmost node in the slum reached.
  13361. + *
  13362. + * This function is a complicated beast, because it calls a
  13363. + * static machine ->convert_node() for every node, which, in
  13364. + * turn, scans node's items and does something for each of them.
  13365. + */
  13366. +static int handle_pos_on_formatted(flush_pos_t *pos)
  13367. +{
  13368. + int ret;
  13369. + lock_handle right_lock;
  13370. + load_count right_load;
  13371. +
  13372. + init_lh(&right_lock);
  13373. + init_load_count(&right_load);
  13374. +
  13375. + if (znode_convertible(pos->lock.node)) {
  13376. + ret = convert_node(pos, pos->lock.node);
  13377. + if (ret)
  13378. + return ret;
  13379. + }
  13380. + while (1) {
  13381. + assert("edward-1635",
  13382. + ergo(node_is_empty(pos->lock.node),
  13383. + ZF_ISSET(pos->lock.node, JNODE_HEARD_BANSHEE)));
  13384. + /*
  13385. + * First of all, grab a right neighbor
  13386. + */
  13387. + if (convert_data(pos) && convert_data(pos)->right_locked) {
  13388. + /*
  13389. + * the right neighbor was locked by convert_node()
  13390. + * transfer the lock from the "cache".
  13391. + */
  13392. + move_lh(&right_lock, &convert_data(pos)->right_lock);
  13393. + done_lh(&convert_data(pos)->right_lock);
  13394. + convert_data(pos)->right_locked = 0;
  13395. + }
  13396. + else {
  13397. + ret = neighbor_in_slum(pos->lock.node, &right_lock,
  13398. + RIGHT_SIDE, ZNODE_WRITE_LOCK,
  13399. + 1, 0);
  13400. + if (ret) {
  13401. + /*
  13402. + * There is no right neighbor for some reasons,
  13403. + * so finish with this level.
  13404. + */
  13405. + assert("edward-1636",
  13406. + !should_convert_right_neighbor(pos));
  13407. + break;
  13408. + }
  13409. + }
  13410. + /*
  13411. + * Check "flushprepped" status of the right neighbor.
  13412. + *
  13413. + * We don't prep(allocate) nodes for flushing twice. This can be
  13414. + * suboptimal, or it can be optimal. For now we choose to live
  13415. + * with the risk that it will be suboptimal because it would be
  13416. + * quite complex to code it to be smarter.
  13417. + */
  13418. + if (znode_check_flushprepped(right_lock.node)
  13419. + && !znode_convertible(right_lock.node)) {
  13420. + assert("edward-1005",
  13421. + !should_convert_right_neighbor(pos));
  13422. + pos_stop(pos);
  13423. + break;
  13424. + }
  13425. + ret = incr_load_count_znode(&right_load, right_lock.node);
  13426. + if (ret)
  13427. + break;
  13428. + if (znode_convertible(right_lock.node)) {
  13429. + assert("edward-1643",
  13430. + ergo(convert_data(pos),
  13431. + convert_data(pos)->right_locked == 0));
  13432. +
  13433. + ret = convert_node(pos, right_lock.node);
  13434. + if (ret)
  13435. + break;
  13436. + }
  13437. + else
  13438. + assert("edward-1637",
  13439. + !should_convert_right_neighbor(pos));
  13440. +
  13441. + if (node_is_empty(pos->lock.node)) {
  13442. + /*
  13443. + * Current node became empty after conversion
  13444. + * and, hence, was removed from the tree;
  13445. + * Advance the current position to the right neighbor.
  13446. + */
  13447. + assert("edward-1638",
  13448. + ZF_ISSET(pos->lock.node, JNODE_HEARD_BANSHEE));
  13449. + move_flush_pos(pos, &right_lock, &right_load, NULL);
  13450. + continue;
  13451. + }
  13452. + if (node_is_empty(right_lock.node)) {
  13453. + assert("edward-1639",
  13454. + ZF_ISSET(right_lock.node, JNODE_HEARD_BANSHEE));
  13455. + /*
  13456. + * The right neighbor became empty after
  13457. + * convertion, and hence it was deleted
  13458. + * from the tree - skip this.
  13459. + * Since current node is not empty,
  13460. + * we'll obtain a correct pointer to
  13461. + * the next right neighbor
  13462. + */
  13463. + done_load_count(&right_load);
  13464. + done_lh(&right_lock);
  13465. + continue;
  13466. + }
  13467. + /*
  13468. + * At this point both, current node and its right
  13469. + * neigbor are converted and not empty.
  13470. + * Squeeze them _before_ going upward.
  13471. + */
  13472. + ret = squeeze_right_neighbor(pos, pos->lock.node,
  13473. + right_lock.node);
  13474. + if (ret < 0)
  13475. + break;
  13476. + if (node_is_empty(right_lock.node)) {
  13477. + assert("edward-1640",
  13478. + ZF_ISSET(right_lock.node, JNODE_HEARD_BANSHEE));
  13479. + /*
  13480. + * right neighbor was squeezed completely,
  13481. + * and hence has been deleted from the tree.
  13482. + * Skip this.
  13483. + */
  13484. + done_load_count(&right_load);
  13485. + done_lh(&right_lock);
  13486. + continue;
  13487. + }
  13488. + if (znode_check_flushprepped(right_lock.node)) {
  13489. + if (should_convert_right_neighbor(pos)) {
  13490. + /*
  13491. + * in spite of flushprepped status of the node,
  13492. + * its right slum neighbor should be converted
  13493. + */
  13494. + assert("edward-953", convert_data(pos));
  13495. + assert("edward-954", item_convert_data(pos));
  13496. +
  13497. + move_flush_pos(pos, &right_lock, &right_load, NULL);
  13498. + continue;
  13499. + } else {
  13500. + pos_stop(pos);
  13501. + break;
  13502. + }
  13503. + }
  13504. + /*
  13505. + * parent(right_lock.node) has to be processed before
  13506. + * (right_lock.node) due to "parent-first" allocation
  13507. + * order
  13508. + */
  13509. + ret = check_parents_and_squalloc_upper_levels(pos,
  13510. + pos->lock.node,
  13511. + right_lock.node);
  13512. + if (ret)
  13513. + break;
  13514. + /*
  13515. + * (re)allocate _after_ going upward
  13516. + */
  13517. + ret = lock_parent_and_allocate_znode(right_lock.node, pos);
  13518. + if (ret)
  13519. + break;
  13520. + if (should_terminate_squalloc(pos)) {
  13521. + set_item_convert_count(pos, 0);
  13522. + break;
  13523. + }
  13524. + /*
  13525. + * advance the flush position to the right neighbor
  13526. + */
  13527. + move_flush_pos(pos, &right_lock, &right_load, NULL);
  13528. +
  13529. + ret = rapid_flush(pos);
  13530. + if (ret)
  13531. + break;
  13532. + }
  13533. + check_convert_info(pos);
  13534. + done_load_count(&right_load);
  13535. + done_lh(&right_lock);
  13536. + /*
  13537. + * This function indicates via pos whether to stop or go to twig or
  13538. + * continue on current level
  13539. + */
  13540. + return ret;
  13541. +
  13542. +}
  13543. +
  13544. +/* Process nodes on leaf level until unformatted node or rightmost node in the
  13545. + * slum reached. */
  13546. +static int handle_pos_on_leaf(flush_pos_t *pos)
  13547. +{
  13548. + int ret;
  13549. +
  13550. + assert("zam-845", pos->state == POS_ON_LEAF);
  13551. +
  13552. + ret = handle_pos_on_formatted(pos);
  13553. +
  13554. + if (ret == -E_NO_NEIGHBOR) {
  13555. + /* cannot get right neighbor, go process extents. */
  13556. + pos->state = POS_TO_TWIG;
  13557. + return 0;
  13558. + }
  13559. +
  13560. + return ret;
  13561. +}
  13562. +
  13563. +/* Process slum on level > 1 */
  13564. +static int handle_pos_on_internal(flush_pos_t *pos)
  13565. +{
  13566. + assert("zam-850", pos->state == POS_ON_INTERNAL);
  13567. + return handle_pos_on_formatted(pos);
  13568. +}
  13569. +
  13570. +/* check whether squalloc should stop before processing given extent */
  13571. +static int squalloc_extent_should_stop(flush_pos_t *pos)
  13572. +{
  13573. + assert("zam-869", item_is_extent(&pos->coord));
  13574. +
  13575. + /* pos->child is a jnode handle_pos_on_extent() should start with in
  13576. + * stead of the first child of the first extent unit. */
  13577. + if (pos->child) {
  13578. + int prepped;
  13579. +
  13580. + assert("vs-1383", jnode_is_unformatted(pos->child));
  13581. + prepped = jnode_check_flushprepped(pos->child);
  13582. + pos->pos_in_unit =
  13583. + jnode_get_index(pos->child) -
  13584. + extent_unit_index(&pos->coord);
  13585. + assert("vs-1470",
  13586. + pos->pos_in_unit < extent_unit_width(&pos->coord));
  13587. + assert("nikita-3434",
  13588. + ergo(extent_is_unallocated(&pos->coord),
  13589. + pos->pos_in_unit == 0));
  13590. + jput(pos->child);
  13591. + pos->child = NULL;
  13592. +
  13593. + return prepped;
  13594. + }
  13595. +
  13596. + pos->pos_in_unit = 0;
  13597. + if (extent_is_unallocated(&pos->coord))
  13598. + return 0;
  13599. +
  13600. + return leftmost_child_of_unit_check_flushprepped(&pos->coord);
  13601. +}
  13602. +
  13603. +/* Handle the case when regular reiser4 tree (znodes connected one to its
  13604. + * neighbors by sibling pointers) is interrupted on leaf level by one or more
  13605. + * unformatted nodes. By having a lock on twig level and use extent code
  13606. + * routines to process unformatted nodes we swim around an irregular part of
  13607. + * reiser4 tree. */
  13608. +static int handle_pos_on_twig(flush_pos_t *pos)
  13609. +{
  13610. + int ret;
  13611. + txmod_plugin *txmod_plug = get_txmod_plugin();
  13612. +
  13613. + assert("zam-844", pos->state == POS_ON_EPOINT);
  13614. + assert("zam-843", item_is_extent(&pos->coord));
  13615. +
  13616. + /* We decide should we continue slum processing with current extent
  13617. + unit: if leftmost child of current extent unit is flushprepped
  13618. + (i.e. clean or already processed by flush) we stop squalloc(). There
  13619. + is a fast check for unallocated extents which we assume contain all
  13620. + not flushprepped nodes. */
  13621. + /* FIXME: Here we implement simple check, we are only looking on the
  13622. + leftmost child. */
  13623. + ret = squalloc_extent_should_stop(pos);
  13624. + if (ret != 0) {
  13625. + pos_stop(pos);
  13626. + return ret;
  13627. + }
  13628. +
  13629. + while (pos_valid(pos) && coord_is_existing_unit(&pos->coord)
  13630. + && item_is_extent(&pos->coord)) {
  13631. + ret = txmod_plug->forward_alloc_unformatted(pos);
  13632. + if (ret)
  13633. + break;
  13634. + coord_next_unit(&pos->coord);
  13635. + }
  13636. +
  13637. + if (coord_is_after_rightmost(&pos->coord)) {
  13638. + pos->state = POS_END_OF_TWIG;
  13639. + return 0;
  13640. + }
  13641. + if (item_is_internal(&pos->coord)) {
  13642. + pos->state = POS_TO_LEAF;
  13643. + return 0;
  13644. + }
  13645. +
  13646. + assert("zam-860", item_is_extent(&pos->coord));
  13647. +
  13648. + /* "slum" is over */
  13649. + pos->state = POS_INVALID;
  13650. + return 0;
  13651. +}
  13652. +
  13653. +/* When we about to return flush position from twig to leaf level we can process
  13654. + * the right twig node or move position to the leaf. This processes right twig
  13655. + * if it is possible and jump to leaf level if not. */
  13656. +static int handle_pos_end_of_twig(flush_pos_t *pos)
  13657. +{
  13658. + int ret;
  13659. + lock_handle right_lock;
  13660. + load_count right_load;
  13661. + coord_t at_right;
  13662. + jnode *child = NULL;
  13663. +
  13664. + assert("zam-848", pos->state == POS_END_OF_TWIG);
  13665. + assert("zam-849", coord_is_after_rightmost(&pos->coord));
  13666. +
  13667. + init_lh(&right_lock);
  13668. + init_load_count(&right_load);
  13669. +
  13670. + /* We get a lock on the right twig node even it is not dirty because
  13671. + * slum continues or discontinues on leaf level not on next twig. This
  13672. + * lock on the right twig is needed for getting its leftmost child. */
  13673. + ret =
  13674. + reiser4_get_right_neighbor(&right_lock, pos->lock.node,
  13675. + ZNODE_WRITE_LOCK, GN_SAME_ATOM);
  13676. + if (ret)
  13677. + goto out;
  13678. +
  13679. + ret = incr_load_count_znode(&right_load, right_lock.node);
  13680. + if (ret)
  13681. + goto out;
  13682. +
  13683. + /* right twig could be not dirty */
  13684. + if (JF_ISSET(ZJNODE(right_lock.node), JNODE_DIRTY)) {
  13685. + /* If right twig node is dirty we always attempt to squeeze it
  13686. + * content to the left... */
  13687. +became_dirty:
  13688. + ret =
  13689. + squeeze_right_twig_and_advance_coord(pos, right_lock.node);
  13690. + if (ret <= 0) {
  13691. + /* pos->coord is on internal item, go to leaf level, or
  13692. + * we have an error which will be caught in squalloc()
  13693. + */
  13694. + pos->state = POS_TO_LEAF;
  13695. + goto out;
  13696. + }
  13697. +
  13698. + /* If right twig was squeezed completely we wave to re-lock
  13699. + * right twig. now it is done through the top-level squalloc
  13700. + * routine. */
  13701. + if (node_is_empty(right_lock.node))
  13702. + goto out;
  13703. +
  13704. + /* ... and prep it if it is not yet prepped */
  13705. + if (!znode_check_flushprepped(right_lock.node)) {
  13706. + /* As usual, process parent before ... */
  13707. + ret =
  13708. + check_parents_and_squalloc_upper_levels(pos,
  13709. + pos->lock.
  13710. + node,
  13711. + right_lock.
  13712. + node);
  13713. + if (ret)
  13714. + goto out;
  13715. +
  13716. + /* ... processing the child */
  13717. + ret =
  13718. + lock_parent_and_allocate_znode(right_lock.node,
  13719. + pos);
  13720. + if (ret)
  13721. + goto out;
  13722. + }
  13723. + } else {
  13724. + coord_init_first_unit(&at_right, right_lock.node);
  13725. +
  13726. + /* check first child of next twig, should we continue there ? */
  13727. + ret = get_leftmost_child_of_unit(&at_right, &child);
  13728. + if (ret || child == NULL || jnode_check_flushprepped(child)) {
  13729. + pos_stop(pos);
  13730. + goto out;
  13731. + }
  13732. +
  13733. + /* check clean twig for possible relocation */
  13734. + if (!znode_check_flushprepped(right_lock.node)) {
  13735. + ret = reverse_allocate_parent(child, &at_right, pos);
  13736. + if (ret)
  13737. + goto out;
  13738. + if (JF_ISSET(ZJNODE(right_lock.node), JNODE_DIRTY))
  13739. + goto became_dirty;
  13740. + }
  13741. + }
  13742. +
  13743. + assert("zam-875", znode_check_flushprepped(right_lock.node));
  13744. +
  13745. + /* Update the preceder by a block number of just processed right twig
  13746. + * node. The code above could miss the preceder updating because
  13747. + * allocate_znode() could not be called for this node. */
  13748. + pos->preceder.blk = *znode_get_block(right_lock.node);
  13749. + check_preceder(pos->preceder.blk);
  13750. +
  13751. + coord_init_first_unit(&at_right, right_lock.node);
  13752. + assert("zam-868", coord_is_existing_unit(&at_right));
  13753. +
  13754. + pos->state = item_is_extent(&at_right) ? POS_ON_EPOINT : POS_TO_LEAF;
  13755. + move_flush_pos(pos, &right_lock, &right_load, &at_right);
  13756. +
  13757. +out:
  13758. + done_load_count(&right_load);
  13759. + done_lh(&right_lock);
  13760. +
  13761. + if (child)
  13762. + jput(child);
  13763. +
  13764. + return ret;
  13765. +}
  13766. +
  13767. +/* Move the pos->lock to leaf node pointed by pos->coord, check should we
  13768. + * continue there. */
  13769. +static int handle_pos_to_leaf(flush_pos_t *pos)
  13770. +{
  13771. + int ret;
  13772. + lock_handle child_lock;
  13773. + load_count child_load;
  13774. + jnode *child;
  13775. +
  13776. + assert("zam-846", pos->state == POS_TO_LEAF);
  13777. + assert("zam-847", item_is_internal(&pos->coord));
  13778. +
  13779. + init_lh(&child_lock);
  13780. + init_load_count(&child_load);
  13781. +
  13782. + ret = get_leftmost_child_of_unit(&pos->coord, &child);
  13783. + if (ret)
  13784. + return ret;
  13785. + if (child == NULL) {
  13786. + pos_stop(pos);
  13787. + return 0;
  13788. + }
  13789. +
  13790. + if (jnode_check_flushprepped(child)) {
  13791. + pos->state = POS_INVALID;
  13792. + goto out;
  13793. + }
  13794. +
  13795. + ret =
  13796. + longterm_lock_znode(&child_lock, JZNODE(child), ZNODE_WRITE_LOCK,
  13797. + ZNODE_LOCK_LOPRI);
  13798. + if (ret)
  13799. + goto out;
  13800. +
  13801. + ret = incr_load_count_znode(&child_load, JZNODE(child));
  13802. + if (ret)
  13803. + goto out;
  13804. +
  13805. + ret = allocate_znode(JZNODE(child), &pos->coord, pos);
  13806. + if (ret)
  13807. + goto out;
  13808. +
  13809. + /* move flush position to leaf level */
  13810. + pos->state = POS_ON_LEAF;
  13811. + move_flush_pos(pos, &child_lock, &child_load, NULL);
  13812. +
  13813. + if (node_is_empty(JZNODE(child))) {
  13814. + ret = delete_empty_node(JZNODE(child));
  13815. + pos->state = POS_INVALID;
  13816. + }
  13817. +out:
  13818. + done_load_count(&child_load);
  13819. + done_lh(&child_lock);
  13820. + jput(child);
  13821. +
  13822. + return ret;
  13823. +}
  13824. +
  13825. +/* move pos from leaf to twig, and move lock from leaf to twig. */
  13826. +/* Move pos->lock to upper (twig) level */
  13827. +static int handle_pos_to_twig(flush_pos_t *pos)
  13828. +{
  13829. + int ret;
  13830. +
  13831. + lock_handle parent_lock;
  13832. + load_count parent_load;
  13833. + coord_t pcoord;
  13834. +
  13835. + assert("zam-852", pos->state == POS_TO_TWIG);
  13836. +
  13837. + init_lh(&parent_lock);
  13838. + init_load_count(&parent_load);
  13839. +
  13840. + ret =
  13841. + reiser4_get_parent(&parent_lock, pos->lock.node, ZNODE_WRITE_LOCK);
  13842. + if (ret)
  13843. + goto out;
  13844. +
  13845. + ret = incr_load_count_znode(&parent_load, parent_lock.node);
  13846. + if (ret)
  13847. + goto out;
  13848. +
  13849. + ret = find_child_ptr(parent_lock.node, pos->lock.node, &pcoord);
  13850. + if (ret)
  13851. + goto out;
  13852. +
  13853. + assert("zam-870", item_is_internal(&pcoord));
  13854. + coord_next_item(&pcoord);
  13855. +
  13856. + if (coord_is_after_rightmost(&pcoord))
  13857. + pos->state = POS_END_OF_TWIG;
  13858. + else if (item_is_extent(&pcoord))
  13859. + pos->state = POS_ON_EPOINT;
  13860. + else {
  13861. + /* Here we understand that getting -E_NO_NEIGHBOR in
  13862. + * handle_pos_on_leaf() was because of just a reaching edge of
  13863. + * slum */
  13864. + pos_stop(pos);
  13865. + goto out;
  13866. + }
  13867. +
  13868. + move_flush_pos(pos, &parent_lock, &parent_load, &pcoord);
  13869. +
  13870. +out:
  13871. + done_load_count(&parent_load);
  13872. + done_lh(&parent_lock);
  13873. +
  13874. + return ret;
  13875. +}
  13876. +
  13877. +typedef int (*pos_state_handle_t) (flush_pos_t *);
  13878. +static pos_state_handle_t flush_pos_handlers[] = {
  13879. + /* process formatted nodes on leaf level, keep lock on a leaf node */
  13880. + [POS_ON_LEAF] = handle_pos_on_leaf,
  13881. + /* process unformatted nodes, keep lock on twig node, pos->coord points
  13882. + * to extent currently being processed */
  13883. + [POS_ON_EPOINT] = handle_pos_on_twig,
  13884. + /* move a lock from leaf node to its parent for further processing of
  13885. + unformatted nodes */
  13886. + [POS_TO_TWIG] = handle_pos_to_twig,
  13887. + /* move a lock from twig to leaf level when a processing of unformatted
  13888. + * nodes finishes, pos->coord points to the leaf node we jump to */
  13889. + [POS_TO_LEAF] = handle_pos_to_leaf,
  13890. + /* after processing last extent in the twig node, attempting to shift
  13891. + * items from the twigs right neighbor and process them while shifting*/
  13892. + [POS_END_OF_TWIG] = handle_pos_end_of_twig,
  13893. + /* process formatted nodes on internal level, keep lock on an internal
  13894. + node */
  13895. + [POS_ON_INTERNAL] = handle_pos_on_internal
  13896. +};
  13897. +
  13898. +/* Advance flush position horizontally, prepare for flushing ((re)allocate,
  13899. + * squeeze, encrypt) nodes and their ancestors in "parent-first" order */
  13900. +static int squalloc(flush_pos_t *pos)
  13901. +{
  13902. + int ret = 0;
  13903. +
  13904. + /* maybe needs to be made a case statement with handle_pos_on_leaf as
  13905. + * first case, for greater CPU efficiency? Measure and see.... -Hans */
  13906. + while (pos_valid(pos)) {
  13907. + ret = flush_pos_handlers[pos->state] (pos);
  13908. + if (ret < 0)
  13909. + break;
  13910. +
  13911. + ret = rapid_flush(pos);
  13912. + if (ret)
  13913. + break;
  13914. + }
  13915. +
  13916. + /* any positive value or -E_NO_NEIGHBOR are legal return codes for
  13917. + handle_pos* routines, -E_NO_NEIGHBOR means that slum edge was
  13918. + reached */
  13919. + if (ret > 0 || ret == -E_NO_NEIGHBOR)
  13920. + ret = 0;
  13921. +
  13922. + return ret;
  13923. +}
  13924. +
  13925. +static void update_ldkey(znode * node)
  13926. +{
  13927. + reiser4_key ldkey;
  13928. +
  13929. + assert_rw_write_locked(&(znode_get_tree(node)->dk_lock));
  13930. + if (node_is_empty(node))
  13931. + return;
  13932. +
  13933. + znode_set_ld_key(node, leftmost_key_in_node(node, &ldkey));
  13934. +}
  13935. +
  13936. +/* this is to be called after calling of shift node's method to shift data from
  13937. + @right to @left. It sets left delimiting keys of @left and @right to keys of
  13938. + first items of @left and @right correspondingly and sets right delimiting key
  13939. + of @left to first key of @right */
  13940. +static void update_znode_dkeys(znode * left, znode * right)
  13941. +{
  13942. + assert_rw_write_locked(&(znode_get_tree(right)->dk_lock));
  13943. + assert("vs-1629", (znode_is_write_locked(left) &&
  13944. + znode_is_write_locked(right)));
  13945. +
  13946. + /* we need to update left delimiting of left if it was empty before
  13947. + shift */
  13948. + update_ldkey(left);
  13949. + update_ldkey(right);
  13950. + if (node_is_empty(right))
  13951. + znode_set_rd_key(left, znode_get_rd_key(right));
  13952. + else
  13953. + znode_set_rd_key(left, znode_get_ld_key(right));
  13954. +}
  13955. +
  13956. +/* try to shift everything from @right to @left. If everything was shifted -
  13957. + @right is removed from the tree. Result is the number of bytes shifted. */
  13958. +static int
  13959. +shift_everything_left(znode * right, znode * left, carry_level * todo)
  13960. +{
  13961. + coord_t from;
  13962. + node_plugin *nplug;
  13963. + carry_plugin_info info;
  13964. +
  13965. + coord_init_after_last_item(&from, right);
  13966. +
  13967. + nplug = node_plugin_by_node(right);
  13968. + info.doing = NULL;
  13969. + info.todo = todo;
  13970. + return nplug->shift(&from, left, SHIFT_LEFT,
  13971. + 1 /* delete @right if it becomes empty */ ,
  13972. + 1
  13973. + /* move coord @from to node @left if everything will
  13974. + be shifted */
  13975. + ,
  13976. + &info);
  13977. +}
  13978. +
  13979. +/* Shift as much as possible from @right to @left using the memcpy-optimized
  13980. + shift_everything_left. @left and @right are formatted neighboring nodes on
  13981. + leaf level. */
  13982. +static int squeeze_right_non_twig(znode * left, znode * right)
  13983. +{
  13984. + int ret;
  13985. + carry_pool *pool;
  13986. + carry_level *todo;
  13987. +
  13988. + assert("nikita-2246", znode_get_level(left) == znode_get_level(right));
  13989. +
  13990. + if (!JF_ISSET(ZJNODE(left), JNODE_DIRTY) ||
  13991. + !JF_ISSET(ZJNODE(right), JNODE_DIRTY))
  13992. + return SQUEEZE_TARGET_FULL;
  13993. +
  13994. + pool = init_carry_pool(sizeof(*pool) + 3 * sizeof(*todo));
  13995. + if (IS_ERR(pool))
  13996. + return PTR_ERR(pool);
  13997. + todo = (carry_level *) (pool + 1);
  13998. + init_carry_level(todo, pool);
  13999. +
  14000. + ret = shift_everything_left(right, left, todo);
  14001. + if (ret > 0) {
  14002. + /* something was shifted */
  14003. + reiser4_tree *tree;
  14004. + __u64 grabbed;
  14005. +
  14006. + znode_make_dirty(left);
  14007. + znode_make_dirty(right);
  14008. +
  14009. + /* update delimiting keys of nodes which participated in
  14010. + shift. FIXME: it would be better to have this in shift
  14011. + node's operation. But it can not be done there. Nobody
  14012. + remembers why, though
  14013. + */
  14014. + tree = znode_get_tree(left);
  14015. + write_lock_dk(tree);
  14016. + update_znode_dkeys(left, right);
  14017. + write_unlock_dk(tree);
  14018. +
  14019. + /* Carry is called to update delimiting key and, maybe, to
  14020. + remove empty node. */
  14021. + grabbed = get_current_context()->grabbed_blocks;
  14022. + ret = reiser4_grab_space_force(tree->height, BA_RESERVED);
  14023. + assert("nikita-3003", ret == 0); /* reserved space is
  14024. + exhausted. Ask Hans. */
  14025. + ret = reiser4_carry(todo, NULL/* previous level */);
  14026. + grabbed2free_mark(grabbed);
  14027. + } else {
  14028. + /* Shifting impossible, we return appropriate result code */
  14029. + ret =
  14030. + node_is_empty(right) ? SQUEEZE_SOURCE_EMPTY :
  14031. + SQUEEZE_TARGET_FULL;
  14032. + }
  14033. +
  14034. + done_carry_pool(pool);
  14035. +
  14036. + return ret;
  14037. +}
  14038. +
  14039. +#if REISER4_DEBUG
  14040. +static int sibling_link_is_ok(const znode *left, const znode *right)
  14041. +{
  14042. + int result;
  14043. +
  14044. + read_lock_tree(znode_get_tree(left));
  14045. + result = (left->right == right && left == right->left);
  14046. + read_unlock_tree(znode_get_tree(left));
  14047. + return result;
  14048. +}
  14049. +#endif
  14050. +
  14051. +/* Shift first unit of first item if it is an internal one. Return
  14052. + SQUEEZE_TARGET_FULL if it fails to shift an item, otherwise return
  14053. + SUBTREE_MOVED. */
  14054. +static int shift_one_internal_unit(znode * left, znode * right)
  14055. +{
  14056. + int ret;
  14057. + carry_pool *pool;
  14058. + carry_level *todo;
  14059. + coord_t *coord;
  14060. + carry_plugin_info *info;
  14061. + int size, moved;
  14062. +
  14063. + assert("nikita-2247", znode_get_level(left) == znode_get_level(right));
  14064. + assert("nikita-2435", znode_is_write_locked(left));
  14065. + assert("nikita-2436", znode_is_write_locked(right));
  14066. + assert("nikita-2434", sibling_link_is_ok(left, right));
  14067. +
  14068. + pool = init_carry_pool(sizeof(*pool) + 3 * sizeof(*todo) +
  14069. + sizeof(*coord) + sizeof(*info)
  14070. +#if REISER4_DEBUG
  14071. + + sizeof(*coord) + 2 * sizeof(reiser4_key)
  14072. +#endif
  14073. + );
  14074. + if (IS_ERR(pool))
  14075. + return PTR_ERR(pool);
  14076. + todo = (carry_level *) (pool + 1);
  14077. + init_carry_level(todo, pool);
  14078. +
  14079. + coord = (coord_t *) (todo + 3);
  14080. + coord_init_first_unit(coord, right);
  14081. + info = (carry_plugin_info *) (coord + 1);
  14082. +
  14083. +#if REISER4_DEBUG
  14084. + if (!node_is_empty(left)) {
  14085. + coord_t *last;
  14086. + reiser4_key *right_key;
  14087. + reiser4_key *left_key;
  14088. +
  14089. + last = (coord_t *) (info + 1);
  14090. + right_key = (reiser4_key *) (last + 1);
  14091. + left_key = right_key + 1;
  14092. + coord_init_last_unit(last, left);
  14093. +
  14094. + assert("nikita-2463",
  14095. + keyle(item_key_by_coord(last, left_key),
  14096. + item_key_by_coord(coord, right_key)));
  14097. + }
  14098. +#endif
  14099. +
  14100. + assert("jmacd-2007", item_is_internal(coord));
  14101. +
  14102. + size = item_length_by_coord(coord);
  14103. + info->todo = todo;
  14104. + info->doing = NULL;
  14105. +
  14106. + ret = node_plugin_by_node(left)->shift(coord, left, SHIFT_LEFT,
  14107. + 1
  14108. + /* delete @right if it becomes
  14109. + empty */
  14110. + ,
  14111. + 0
  14112. + /* do not move coord @coord to
  14113. + node @left */
  14114. + ,
  14115. + info);
  14116. +
  14117. + /* If shift returns positive, then we shifted the item. */
  14118. + assert("vs-423", ret <= 0 || size == ret);
  14119. + moved = (ret > 0);
  14120. +
  14121. + if (moved) {
  14122. + /* something was moved */
  14123. + reiser4_tree *tree;
  14124. + int grabbed;
  14125. +
  14126. + znode_make_dirty(left);
  14127. + znode_make_dirty(right);
  14128. + tree = znode_get_tree(left);
  14129. + write_lock_dk(tree);
  14130. + update_znode_dkeys(left, right);
  14131. + write_unlock_dk(tree);
  14132. +
  14133. + /* reserve space for delimiting keys after shifting */
  14134. + grabbed = get_current_context()->grabbed_blocks;
  14135. + ret = reiser4_grab_space_force(tree->height, BA_RESERVED);
  14136. + assert("nikita-3003", ret == 0); /* reserved space is
  14137. + exhausted. Ask Hans. */
  14138. +
  14139. + ret = reiser4_carry(todo, NULL/* previous level */);
  14140. + grabbed2free_mark(grabbed);
  14141. + }
  14142. +
  14143. + done_carry_pool(pool);
  14144. +
  14145. + if (ret != 0) {
  14146. + /* Shift or carry operation failed. */
  14147. + assert("jmacd-7325", ret < 0);
  14148. + return ret;
  14149. + }
  14150. +
  14151. + return moved ? SUBTREE_MOVED : SQUEEZE_TARGET_FULL;
  14152. +}
  14153. +
  14154. +static int allocate_znode(znode * node,
  14155. + const coord_t *parent_coord, flush_pos_t *pos)
  14156. +{
  14157. + txmod_plugin *plug = get_txmod_plugin();
  14158. + /*
  14159. + * perform znode allocation with znode pinned in memory to avoid races
  14160. + * with asynchronous emergency flush (which plays with
  14161. + * JNODE_FLUSH_RESERVED bit).
  14162. + */
  14163. + return WITH_DATA(node, plug->forward_alloc_formatted(node,
  14164. + parent_coord,
  14165. + pos));
  14166. +}
  14167. +
  14168. +
  14169. +/* JNODE INTERFACE */
  14170. +
  14171. +/* Lock a node (if formatted) and then get its parent locked, set the child's
  14172. + coordinate in the parent. If the child is the root node, the above_root
  14173. + znode is returned but the coord is not set. This function may cause atom
  14174. + fusion, but it is only used for read locks (at this point) and therefore
  14175. + fusion only occurs when the parent is already dirty. */
  14176. +/* Hans adds this note: remember to ask how expensive this operation is vs.
  14177. + storing parent pointer in jnodes. */
  14178. +static int
  14179. +jnode_lock_parent_coord(jnode * node,
  14180. + coord_t *coord,
  14181. + lock_handle * parent_lh,
  14182. + load_count * parent_zh,
  14183. + znode_lock_mode parent_mode, int try)
  14184. +{
  14185. + int ret;
  14186. +
  14187. + assert("edward-53", jnode_is_unformatted(node) || jnode_is_znode(node));
  14188. + assert("edward-54", jnode_is_unformatted(node)
  14189. + || znode_is_any_locked(JZNODE(node)));
  14190. +
  14191. + if (!jnode_is_znode(node)) {
  14192. + reiser4_key key;
  14193. + tree_level stop_level = TWIG_LEVEL;
  14194. + lookup_bias bias = FIND_EXACT;
  14195. +
  14196. + assert("edward-168", !(jnode_get_type(node) == JNODE_BITMAP));
  14197. +
  14198. + /* The case when node is not znode, but can have parent coord
  14199. + (unformatted node, node which represents cluster page,
  14200. + etc..). Generate a key for the appropriate entry, search
  14201. + in the tree using coord_by_key, which handles locking for
  14202. + us. */
  14203. +
  14204. + /*
  14205. + * nothing is locked at this moment, so, nothing prevents
  14206. + * concurrent truncate from removing jnode from inode. To
  14207. + * prevent this spin-lock jnode. jnode can be truncated just
  14208. + * after call to the jnode_build_key(), but this is ok,
  14209. + * because coord_by_key() will just fail to find appropriate
  14210. + * extent.
  14211. + */
  14212. + spin_lock_jnode(node);
  14213. + if (!JF_ISSET(node, JNODE_HEARD_BANSHEE)) {
  14214. + jnode_build_key(node, &key);
  14215. + ret = 0;
  14216. + } else
  14217. + ret = RETERR(-ENOENT);
  14218. + spin_unlock_jnode(node);
  14219. +
  14220. + if (ret != 0)
  14221. + return ret;
  14222. +
  14223. + if (jnode_is_cluster_page(node))
  14224. + stop_level = LEAF_LEVEL;
  14225. +
  14226. + assert("jmacd-1812", coord != NULL);
  14227. +
  14228. + ret = coord_by_key(jnode_get_tree(node), &key, coord, parent_lh,
  14229. + parent_mode, bias, stop_level, stop_level,
  14230. + CBK_UNIQUE, NULL/*ra_info */);
  14231. + switch (ret) {
  14232. + case CBK_COORD_NOTFOUND:
  14233. + assert("edward-1038",
  14234. + ergo(jnode_is_cluster_page(node),
  14235. + JF_ISSET(node, JNODE_HEARD_BANSHEE)));
  14236. + if (!JF_ISSET(node, JNODE_HEARD_BANSHEE))
  14237. + warning("nikita-3177", "Parent not found");
  14238. + return ret;
  14239. + case CBK_COORD_FOUND:
  14240. + if (coord->between != AT_UNIT) {
  14241. + /* FIXME: comment needed */
  14242. + done_lh(parent_lh);
  14243. + if (!JF_ISSET(node, JNODE_HEARD_BANSHEE)) {
  14244. + warning("nikita-3178",
  14245. + "Found but not happy: %i",
  14246. + coord->between);
  14247. + }
  14248. + return RETERR(-ENOENT);
  14249. + }
  14250. + ret = incr_load_count_znode(parent_zh, parent_lh->node);
  14251. + if (ret != 0)
  14252. + return ret;
  14253. + /* if (jnode_is_cluster_page(node)) {
  14254. + races with write() are possible
  14255. + check_child_cluster (parent_lh->node);
  14256. + }
  14257. + */
  14258. + break;
  14259. + default:
  14260. + return ret;
  14261. + }
  14262. +
  14263. + } else {
  14264. + int flags;
  14265. + znode *z;
  14266. +
  14267. + z = JZNODE(node);
  14268. + /* Formatted node case: */
  14269. + assert("jmacd-2061", !znode_is_root(z));
  14270. +
  14271. + flags = GN_ALLOW_NOT_CONNECTED;
  14272. + if (try)
  14273. + flags |= GN_TRY_LOCK;
  14274. +
  14275. + ret =
  14276. + reiser4_get_parent_flags(parent_lh, z, parent_mode, flags);
  14277. + if (ret != 0)
  14278. + /* -E_REPEAT is ok here, it is handled by the caller. */
  14279. + return ret;
  14280. +
  14281. + /* Make the child's position "hint" up-to-date. (Unless above
  14282. + root, which caller must check.) */
  14283. + if (coord != NULL) {
  14284. +
  14285. + ret = incr_load_count_znode(parent_zh, parent_lh->node);
  14286. + if (ret != 0) {
  14287. + warning("jmacd-976812386",
  14288. + "incr_load_count_znode failed: %d",
  14289. + ret);
  14290. + return ret;
  14291. + }
  14292. +
  14293. + ret = find_child_ptr(parent_lh->node, z, coord);
  14294. + if (ret != 0) {
  14295. + warning("jmacd-976812",
  14296. + "find_child_ptr failed: %d", ret);
  14297. + return ret;
  14298. + }
  14299. + }
  14300. + }
  14301. +
  14302. + return 0;
  14303. +}
  14304. +
  14305. +/* Get the (locked) next neighbor of a znode which is dirty and a member of the
  14306. + same atom. If there is no next neighbor or the neighbor is not in memory or
  14307. + if there is a neighbor but it is not dirty or not in the same atom,
  14308. + -E_NO_NEIGHBOR is returned. In some cases the slum may include nodes which
  14309. + are not dirty, if so @check_dirty should be 0 */
  14310. +static int neighbor_in_slum(znode * node, /* starting point */
  14311. + lock_handle * lock, /* lock on starting point */
  14312. + sideof side, /* left or right direction we
  14313. + seek the next node in */
  14314. + znode_lock_mode mode, /* kind of lock we want */
  14315. + int check_dirty, /* true if the neighbor should
  14316. + be dirty */
  14317. + int use_upper_levels /* get neighbor by going though
  14318. + upper levels */)
  14319. +{
  14320. + int ret;
  14321. + int flags;
  14322. +
  14323. + assert("jmacd-6334", znode_is_connected(node));
  14324. +
  14325. + flags = GN_SAME_ATOM | (side == LEFT_SIDE ? GN_GO_LEFT : 0);
  14326. + if (use_upper_levels)
  14327. + flags |= GN_CAN_USE_UPPER_LEVELS;
  14328. +
  14329. + ret = reiser4_get_neighbor(lock, node, mode, flags);
  14330. + if (ret) {
  14331. + /* May return -ENOENT or -E_NO_NEIGHBOR. */
  14332. + /* FIXME(C): check EINVAL, E_DEADLOCK */
  14333. + if (ret == -ENOENT)
  14334. + ret = RETERR(-E_NO_NEIGHBOR);
  14335. + return ret;
  14336. + }
  14337. + if (!check_dirty)
  14338. + return 0;
  14339. + /* Check dirty bit of locked znode, no races here */
  14340. + if (JF_ISSET(ZJNODE(lock->node), JNODE_DIRTY))
  14341. + return 0;
  14342. +
  14343. + done_lh(lock);
  14344. + return RETERR(-E_NO_NEIGHBOR);
  14345. +}
  14346. +
  14347. +/* Return true if two znodes have the same parent. This is called with both
  14348. + nodes write-locked (for squeezing) so no tree lock is needed. */
  14349. +static int znode_same_parents(znode * a, znode * b)
  14350. +{
  14351. + int result;
  14352. +
  14353. + assert("jmacd-7011", znode_is_write_locked(a));
  14354. + assert("jmacd-7012", znode_is_write_locked(b));
  14355. +
  14356. + /* We lock the whole tree for this check.... I really don't like whole
  14357. + * tree locks... -Hans */
  14358. + read_lock_tree(znode_get_tree(a));
  14359. + result = (znode_parent(a) == znode_parent(b));
  14360. + read_unlock_tree(znode_get_tree(a));
  14361. + return result;
  14362. +}
  14363. +
  14364. +/* FLUSH SCAN */
  14365. +
  14366. +/* Initialize the flush_scan data structure. */
  14367. +static void scan_init(flush_scan * scan)
  14368. +{
  14369. + memset(scan, 0, sizeof(*scan));
  14370. + init_lh(&scan->node_lock);
  14371. + init_lh(&scan->parent_lock);
  14372. + init_load_count(&scan->parent_load);
  14373. + init_load_count(&scan->node_load);
  14374. + coord_init_invalid(&scan->parent_coord, NULL);
  14375. +}
  14376. +
  14377. +/* Release any resources held by the flush scan, e.g. release locks,
  14378. + free memory, etc. */
  14379. +static void scan_done(flush_scan * scan)
  14380. +{
  14381. + done_load_count(&scan->node_load);
  14382. + if (scan->node != NULL) {
  14383. + jput(scan->node);
  14384. + scan->node = NULL;
  14385. + }
  14386. + done_load_count(&scan->parent_load);
  14387. + done_lh(&scan->parent_lock);
  14388. + done_lh(&scan->node_lock);
  14389. +}
  14390. +
  14391. +/* Returns true if flush scanning is finished. */
  14392. +int reiser4_scan_finished(flush_scan * scan)
  14393. +{
  14394. + return scan->stop || (scan->direction == RIGHT_SIDE &&
  14395. + scan->count >= scan->max_count);
  14396. +}
  14397. +
  14398. +/* Return true if the scan should continue to the @tonode. True if the node
  14399. + meets the same_slum_check condition. If not, deref the "left" node and stop
  14400. + the scan. */
  14401. +int reiser4_scan_goto(flush_scan * scan, jnode * tonode)
  14402. +{
  14403. + int go = same_slum_check(scan->node, tonode, 1, 0);
  14404. +
  14405. + if (!go) {
  14406. + scan->stop = 1;
  14407. + jput(tonode);
  14408. + }
  14409. +
  14410. + return go;
  14411. +}
  14412. +
  14413. +/* Set the current scan->node, refcount it, increment count by the @add_count
  14414. + (number to count, e.g., skipped unallocated nodes), deref previous current,
  14415. + and copy the current parent coordinate. */
  14416. +int
  14417. +scan_set_current(flush_scan * scan, jnode * node, unsigned add_count,
  14418. + const coord_t *parent)
  14419. +{
  14420. + /* Release the old references, take the new reference. */
  14421. + done_load_count(&scan->node_load);
  14422. +
  14423. + if (scan->node != NULL)
  14424. + jput(scan->node);
  14425. + scan->node = node;
  14426. + scan->count += add_count;
  14427. +
  14428. + /* This next stmt is somewhat inefficient. The reiser4_scan_extent()
  14429. + code could delay this update step until it finishes and update the
  14430. + parent_coord only once. It did that before, but there was a bug and
  14431. + this was the easiest way to make it correct. */
  14432. + if (parent != NULL)
  14433. + coord_dup(&scan->parent_coord, parent);
  14434. +
  14435. + /* Failure may happen at the incr_load_count call, but the caller can
  14436. + assume the reference is safely taken. */
  14437. + return incr_load_count_jnode(&scan->node_load, node);
  14438. +}
  14439. +
  14440. +/* Return true if scanning in the leftward direction. */
  14441. +int reiser4_scanning_left(flush_scan * scan)
  14442. +{
  14443. + return scan->direction == LEFT_SIDE;
  14444. +}
  14445. +
  14446. +/* Performs leftward scanning starting from either kind of node. Counts the
  14447. + starting node. The right-scan object is passed in for the left-scan in order
  14448. + to copy the parent of an unformatted starting position. This way we avoid
  14449. + searching for the unformatted node's parent when scanning in each direction.
  14450. + If we search for the parent once it is set in both scan objects. The limit
  14451. + parameter tells flush-scan when to stop.
  14452. +
  14453. + Rapid scanning is used only during scan_left, where we are interested in
  14454. + finding the 'leftpoint' where we begin flushing. We are interested in
  14455. + stopping at the left child of a twig that does not have a dirty left
  14456. + neighbour. THIS IS A SPECIAL CASE. The problem is finding a way to flush only
  14457. + those nodes without unallocated children, and it is difficult to solve in the
  14458. + bottom-up flushing algorithm we are currently using. The problem can be
  14459. + solved by scanning left at every level as we go upward, but this would
  14460. + basically bring us back to using a top-down allocation strategy, which we
  14461. + already tried (see BK history from May 2002), and has a different set of
  14462. + problems. The top-down strategy makes avoiding unallocated children easier,
  14463. + but makes it difficult to propertly flush dirty children with clean parents
  14464. + that would otherwise stop the top-down flush, only later to dirty the parent
  14465. + once the children are flushed. So we solve the problem in the bottom-up
  14466. + algorithm with a special case for twigs and leaves only.
  14467. +
  14468. + The first step in solving the problem is this rapid leftward scan. After we
  14469. + determine that there are at least enough nodes counted to qualify for
  14470. + FLUSH_RELOCATE_THRESHOLD we are no longer interested in the exact count, we
  14471. + are only interested in finding the best place to start the flush.
  14472. +
  14473. + We could choose one of two possibilities:
  14474. +
  14475. + 1. Stop at the leftmost child (of a twig) that does not have a dirty left
  14476. + neighbor. This requires checking one leaf per rapid-scan twig
  14477. +
  14478. + 2. Stop at the leftmost child (of a twig) where there are no dirty children
  14479. + of the twig to the left. This requires checking possibly all of the in-memory
  14480. + children of each twig during the rapid scan.
  14481. +
  14482. + For now we implement the first policy.
  14483. +*/
  14484. +static int
  14485. +scan_left(flush_scan * scan, flush_scan * right, jnode * node, unsigned limit)
  14486. +{
  14487. + int ret = 0;
  14488. +
  14489. + scan->max_count = limit;
  14490. + scan->direction = LEFT_SIDE;
  14491. +
  14492. + ret = scan_set_current(scan, jref(node), 1, NULL);
  14493. + if (ret != 0)
  14494. + return ret;
  14495. +
  14496. + ret = scan_common(scan, right);
  14497. + if (ret != 0)
  14498. + return ret;
  14499. +
  14500. + /* Before rapid scanning, we need a lock on scan->node so that we can
  14501. + get its parent, only if formatted. */
  14502. + if (jnode_is_znode(scan->node)) {
  14503. + ret = longterm_lock_znode(&scan->node_lock, JZNODE(scan->node),
  14504. + ZNODE_WRITE_LOCK, ZNODE_LOCK_LOPRI);
  14505. + }
  14506. +
  14507. + /* Rapid_scan would go here (with limit set to FLUSH_RELOCATE_THRESHOLD)
  14508. + */
  14509. + return ret;
  14510. +}
  14511. +
  14512. +/* Performs rightward scanning... Does not count the starting node. The limit
  14513. + parameter is described in scan_left. If the starting node is unformatted then
  14514. + the parent_coord was already set during scan_left. The rapid_after parameter
  14515. + is not used during right-scanning.
  14516. +
  14517. + scan_right is only called if the scan_left operation does not count at least
  14518. + FLUSH_RELOCATE_THRESHOLD nodes for flushing. Otherwise, the limit parameter
  14519. + is set to the difference between scan-left's count and
  14520. + FLUSH_RELOCATE_THRESHOLD, meaning scan-right counts as high as
  14521. + FLUSH_RELOCATE_THRESHOLD and then stops. */
  14522. +static int scan_right(flush_scan * scan, jnode * node, unsigned limit)
  14523. +{
  14524. + int ret;
  14525. +
  14526. + scan->max_count = limit;
  14527. + scan->direction = RIGHT_SIDE;
  14528. +
  14529. + ret = scan_set_current(scan, jref(node), 0, NULL);
  14530. + if (ret != 0)
  14531. + return ret;
  14532. +
  14533. + return scan_common(scan, NULL);
  14534. +}
  14535. +
  14536. +/* Common code to perform left or right scanning. */
  14537. +static int scan_common(flush_scan * scan, flush_scan * other)
  14538. +{
  14539. + int ret;
  14540. +
  14541. + assert("nikita-2376", scan->node != NULL);
  14542. + assert("edward-54", jnode_is_unformatted(scan->node)
  14543. + || jnode_is_znode(scan->node));
  14544. +
  14545. + /* Special case for starting at an unformatted node. Optimization: we
  14546. + only want to search for the parent (which requires a tree traversal)
  14547. + once. Obviously, we shouldn't have to call it once for the left scan
  14548. + and once for the right scan. For this reason, if we search for the
  14549. + parent during scan-left we then duplicate the coord/lock/load into
  14550. + the scan-right object. */
  14551. + if (jnode_is_unformatted(scan->node)) {
  14552. + ret = scan_unformatted(scan, other);
  14553. + if (ret != 0)
  14554. + return ret;
  14555. + }
  14556. + /* This loop expects to start at a formatted position and performs
  14557. + chaining of formatted regions */
  14558. + while (!reiser4_scan_finished(scan)) {
  14559. +
  14560. + ret = scan_formatted(scan);
  14561. + if (ret != 0)
  14562. + return ret;
  14563. + }
  14564. +
  14565. + return 0;
  14566. +}
  14567. +
  14568. +static int scan_unformatted(flush_scan * scan, flush_scan * other)
  14569. +{
  14570. + int ret = 0;
  14571. + int try = 0;
  14572. +
  14573. + if (!coord_is_invalid(&scan->parent_coord))
  14574. + goto scan;
  14575. +
  14576. + /* set parent coord from */
  14577. + if (!jnode_is_unformatted(scan->node)) {
  14578. + /* formatted position */
  14579. +
  14580. + lock_handle lock;
  14581. + assert("edward-301", jnode_is_znode(scan->node));
  14582. + init_lh(&lock);
  14583. +
  14584. + /*
  14585. + * when flush starts from unformatted node, first thing it
  14586. + * does is tree traversal to find formatted parent of starting
  14587. + * node. This parent is then kept lock across scans to the
  14588. + * left and to the right. This means that during scan to the
  14589. + * left we cannot take left-ward lock, because this is
  14590. + * dead-lock prone. So, if we are scanning to the left and
  14591. + * there is already lock held by this thread,
  14592. + * jnode_lock_parent_coord() should use try-lock.
  14593. + */
  14594. + try = reiser4_scanning_left(scan)
  14595. + && !lock_stack_isclean(get_current_lock_stack());
  14596. + /* Need the node locked to get the parent lock, We have to
  14597. + take write lock since there is at least one call path
  14598. + where this znode is already write-locked by us. */
  14599. + ret =
  14600. + longterm_lock_znode(&lock, JZNODE(scan->node),
  14601. + ZNODE_WRITE_LOCK,
  14602. + reiser4_scanning_left(scan) ?
  14603. + ZNODE_LOCK_LOPRI :
  14604. + ZNODE_LOCK_HIPRI);
  14605. + if (ret != 0)
  14606. + /* EINVAL or E_DEADLOCK here mean... try again! At this
  14607. + point we've scanned too far and can't back out, just
  14608. + start over. */
  14609. + return ret;
  14610. +
  14611. + ret = jnode_lock_parent_coord(scan->node,
  14612. + &scan->parent_coord,
  14613. + &scan->parent_lock,
  14614. + &scan->parent_load,
  14615. + ZNODE_WRITE_LOCK, try);
  14616. +
  14617. + /* FIXME(C): check EINVAL, E_DEADLOCK */
  14618. + done_lh(&lock);
  14619. + if (ret == -E_REPEAT) {
  14620. + scan->stop = 1;
  14621. + return 0;
  14622. + }
  14623. + if (ret)
  14624. + return ret;
  14625. +
  14626. + } else {
  14627. + /* unformatted position */
  14628. +
  14629. + ret =
  14630. + jnode_lock_parent_coord(scan->node, &scan->parent_coord,
  14631. + &scan->parent_lock,
  14632. + &scan->parent_load,
  14633. + ZNODE_WRITE_LOCK, try);
  14634. +
  14635. + if (IS_CBKERR(ret))
  14636. + return ret;
  14637. +
  14638. + if (ret == CBK_COORD_NOTFOUND)
  14639. + /* FIXME(C): check EINVAL, E_DEADLOCK */
  14640. + return ret;
  14641. +
  14642. + /* parent was found */
  14643. + assert("jmacd-8661", other != NULL);
  14644. + /* Duplicate the reference into the other flush_scan. */
  14645. + coord_dup(&other->parent_coord, &scan->parent_coord);
  14646. + copy_lh(&other->parent_lock, &scan->parent_lock);
  14647. + copy_load_count(&other->parent_load, &scan->parent_load);
  14648. + }
  14649. +scan:
  14650. + return scan_by_coord(scan);
  14651. +}
  14652. +
  14653. +/* Performs left- or rightward scanning starting from a formatted node. Follow
  14654. + left pointers under tree lock as long as:
  14655. +
  14656. + - node->left/right is non-NULL
  14657. + - node->left/right is connected, dirty
  14658. + - node->left/right belongs to the same atom
  14659. + - scan has not reached maximum count
  14660. +*/
  14661. +static int scan_formatted(flush_scan * scan)
  14662. +{
  14663. + int ret;
  14664. + znode *neighbor = NULL;
  14665. +
  14666. + assert("jmacd-1401", !reiser4_scan_finished(scan));
  14667. +
  14668. + do {
  14669. + znode *node = JZNODE(scan->node);
  14670. +
  14671. + /* Node should be connected, but if not stop the scan. */
  14672. + if (!znode_is_connected(node)) {
  14673. + scan->stop = 1;
  14674. + break;
  14675. + }
  14676. +
  14677. + /* Lock the tree, check-for and reference the next sibling. */
  14678. + read_lock_tree(znode_get_tree(node));
  14679. +
  14680. + /* It may be that a node is inserted or removed between a node
  14681. + and its left sibling while the tree lock is released, but the
  14682. + flush-scan count does not need to be precise. Thus, we
  14683. + release the tree lock as soon as we get the neighboring node.
  14684. + */
  14685. + neighbor =
  14686. + reiser4_scanning_left(scan) ? node->left : node->right;
  14687. + if (neighbor != NULL)
  14688. + zref(neighbor);
  14689. +
  14690. + read_unlock_tree(znode_get_tree(node));
  14691. +
  14692. + /* If neighbor is NULL at the leaf level, need to check for an
  14693. + unformatted sibling using the parent--break in any case. */
  14694. + if (neighbor == NULL)
  14695. + break;
  14696. +
  14697. + /* Check the condition for going left, break if it is not met.
  14698. + This also releases (jputs) the neighbor if false. */
  14699. + if (!reiser4_scan_goto(scan, ZJNODE(neighbor)))
  14700. + break;
  14701. +
  14702. + /* Advance the flush_scan state to the left, repeat. */
  14703. + ret = scan_set_current(scan, ZJNODE(neighbor), 1, NULL);
  14704. + if (ret != 0)
  14705. + return ret;
  14706. +
  14707. + } while (!reiser4_scan_finished(scan));
  14708. +
  14709. + /* If neighbor is NULL then we reached the end of a formatted region, or
  14710. + else the sibling is out of memory, now check for an extent to the
  14711. + left (as long as LEAF_LEVEL). */
  14712. + if (neighbor != NULL || jnode_get_level(scan->node) != LEAF_LEVEL
  14713. + || reiser4_scan_finished(scan)) {
  14714. + scan->stop = 1;
  14715. + return 0;
  14716. + }
  14717. + /* Otherwise, calls scan_by_coord for the right(left)most item of the
  14718. + left(right) neighbor on the parent level, then possibly continue. */
  14719. +
  14720. + coord_init_invalid(&scan->parent_coord, NULL);
  14721. + return scan_unformatted(scan, NULL);
  14722. +}
  14723. +
  14724. +/* NOTE-EDWARD:
  14725. + This scans adjacent items of the same type and calls scan flush plugin for
  14726. + each one. Performs left(right)ward scanning starting from a (possibly)
  14727. + unformatted node. If we start from unformatted node, then we continue only if
  14728. + the next neighbor is also unformatted. When called from scan_formatted, we
  14729. + skip first iteration (to make sure that right(left)most item of the
  14730. + left(right) neighbor on the parent level is of the same type and set
  14731. + appropriate coord). */
  14732. +static int scan_by_coord(flush_scan * scan)
  14733. +{
  14734. + int ret = 0;
  14735. + int scan_this_coord;
  14736. + lock_handle next_lock;
  14737. + load_count next_load;
  14738. + coord_t next_coord;
  14739. + jnode *child;
  14740. + item_plugin *iplug;
  14741. +
  14742. + init_lh(&next_lock);
  14743. + init_load_count(&next_load);
  14744. + scan_this_coord = (jnode_is_unformatted(scan->node) ? 1 : 0);
  14745. +
  14746. + /* set initial item id */
  14747. + iplug = item_plugin_by_coord(&scan->parent_coord);
  14748. +
  14749. + for (; !reiser4_scan_finished(scan); scan_this_coord = 1) {
  14750. + if (scan_this_coord) {
  14751. + /* Here we expect that unit is scannable. it would not
  14752. + * be so due to race with extent->tail conversion. */
  14753. + if (iplug->f.scan == NULL) {
  14754. + scan->stop = 1;
  14755. + ret = -E_REPEAT;
  14756. + /* skip the check at the end. */
  14757. + goto race;
  14758. + }
  14759. +
  14760. + ret = iplug->f.scan(scan);
  14761. + if (ret != 0)
  14762. + goto exit;
  14763. +
  14764. + if (reiser4_scan_finished(scan)) {
  14765. + checkchild(scan);
  14766. + break;
  14767. + }
  14768. + } else {
  14769. + /* the same race against truncate as above is possible
  14770. + * here, it seems */
  14771. +
  14772. + /* NOTE-JMACD: In this case, apply the same end-of-node
  14773. + logic but don't scan the first coordinate. */
  14774. + assert("jmacd-1231",
  14775. + item_is_internal(&scan->parent_coord));
  14776. + }
  14777. +
  14778. + if (iplug->f.utmost_child == NULL
  14779. + || znode_get_level(scan->parent_coord.node) != TWIG_LEVEL) {
  14780. + /* stop this coord and continue on parrent level */
  14781. + ret =
  14782. + scan_set_current(scan,
  14783. + ZJNODE(zref
  14784. + (scan->parent_coord.node)),
  14785. + 1, NULL);
  14786. + if (ret != 0)
  14787. + goto exit;
  14788. + break;
  14789. + }
  14790. +
  14791. + /* Either way, the invariant is that scan->parent_coord is set
  14792. + to the parent of scan->node. Now get the next unit. */
  14793. + coord_dup(&next_coord, &scan->parent_coord);
  14794. + coord_sideof_unit(&next_coord, scan->direction);
  14795. +
  14796. + /* If off-the-end of the twig, try the next twig. */
  14797. + if (coord_is_after_sideof_unit(&next_coord, scan->direction)) {
  14798. + /* We take the write lock because we may start flushing
  14799. + * from this coordinate. */
  14800. + ret = neighbor_in_slum(next_coord.node,
  14801. + &next_lock,
  14802. + scan->direction,
  14803. + ZNODE_WRITE_LOCK,
  14804. + 1 /* check dirty */,
  14805. + 0 /* don't go though upper
  14806. + levels */);
  14807. + if (ret == -E_NO_NEIGHBOR) {
  14808. + scan->stop = 1;
  14809. + ret = 0;
  14810. + break;
  14811. + }
  14812. +
  14813. + if (ret != 0)
  14814. + goto exit;
  14815. +
  14816. + ret = incr_load_count_znode(&next_load, next_lock.node);
  14817. + if (ret != 0)
  14818. + goto exit;
  14819. +
  14820. + coord_init_sideof_unit(&next_coord, next_lock.node,
  14821. + sideof_reverse(scan->direction));
  14822. + }
  14823. +
  14824. + iplug = item_plugin_by_coord(&next_coord);
  14825. +
  14826. + /* Get the next child. */
  14827. + ret =
  14828. + iplug->f.utmost_child(&next_coord,
  14829. + sideof_reverse(scan->direction),
  14830. + &child);
  14831. + if (ret != 0)
  14832. + goto exit;
  14833. + /* If the next child is not in memory, or, item_utmost_child
  14834. + failed (due to race with unlink, most probably), stop
  14835. + here. */
  14836. + if (child == NULL || IS_ERR(child)) {
  14837. + scan->stop = 1;
  14838. + checkchild(scan);
  14839. + break;
  14840. + }
  14841. +
  14842. + assert("nikita-2374", jnode_is_unformatted(child)
  14843. + || jnode_is_znode(child));
  14844. +
  14845. + /* See if it is dirty, part of the same atom. */
  14846. + if (!reiser4_scan_goto(scan, child)) {
  14847. + checkchild(scan);
  14848. + break;
  14849. + }
  14850. +
  14851. + /* If so, make this child current. */
  14852. + ret = scan_set_current(scan, child, 1, &next_coord);
  14853. + if (ret != 0)
  14854. + goto exit;
  14855. +
  14856. + /* Now continue. If formatted we release the parent lock and
  14857. + return, then proceed. */
  14858. + if (jnode_is_znode(child))
  14859. + break;
  14860. +
  14861. + /* Otherwise, repeat the above loop with next_coord. */
  14862. + if (next_load.node != NULL) {
  14863. + done_lh(&scan->parent_lock);
  14864. + move_lh(&scan->parent_lock, &next_lock);
  14865. + move_load_count(&scan->parent_load, &next_load);
  14866. + }
  14867. + }
  14868. +
  14869. + assert("jmacd-6233",
  14870. + reiser4_scan_finished(scan) || jnode_is_znode(scan->node));
  14871. +exit:
  14872. + checkchild(scan);
  14873. +race: /* skip the above check */
  14874. + if (jnode_is_znode(scan->node)) {
  14875. + done_lh(&scan->parent_lock);
  14876. + done_load_count(&scan->parent_load);
  14877. + }
  14878. +
  14879. + done_load_count(&next_load);
  14880. + done_lh(&next_lock);
  14881. + return ret;
  14882. +}
  14883. +
  14884. +/* FLUSH POS HELPERS */
  14885. +
  14886. +/* Initialize the fields of a flush_position. */
  14887. +static void pos_init(flush_pos_t *pos)
  14888. +{
  14889. + memset(pos, 0, sizeof *pos);
  14890. +
  14891. + pos->state = POS_INVALID;
  14892. + coord_init_invalid(&pos->coord, NULL);
  14893. + init_lh(&pos->lock);
  14894. + init_load_count(&pos->load);
  14895. +
  14896. + reiser4_blocknr_hint_init(&pos->preceder);
  14897. +}
  14898. +
  14899. +/* The flush loop inside squalloc periodically checks pos_valid to determine
  14900. + when "enough flushing" has been performed. This will return true until one
  14901. + of the following conditions is met:
  14902. +
  14903. + 1. the number of flush-queued nodes has reached the kernel-supplied
  14904. + "int *nr_to_flush" parameter, meaning we have flushed as many blocks as the
  14905. + kernel requested. When flushing to commit, this parameter is NULL.
  14906. +
  14907. + 2. pos_stop() is called because squalloc discovers that the "next" node in
  14908. + the flush order is either non-existant, not dirty, or not in the same atom.
  14909. +*/
  14910. +
  14911. +static int pos_valid(flush_pos_t *pos)
  14912. +{
  14913. + return pos->state != POS_INVALID;
  14914. +}
  14915. +
  14916. +/* Release any resources of a flush_position. Called when jnode_flush
  14917. + finishes. */
  14918. +static void pos_done(flush_pos_t *pos)
  14919. +{
  14920. + pos_stop(pos);
  14921. + reiser4_blocknr_hint_done(&pos->preceder);
  14922. + if (convert_data(pos))
  14923. + free_convert_data(pos);
  14924. +}
  14925. +
  14926. +/* Reset the point and parent. Called during flush subroutines to terminate the
  14927. + squalloc loop. */
  14928. +static int pos_stop(flush_pos_t *pos)
  14929. +{
  14930. + pos->state = POS_INVALID;
  14931. + done_lh(&pos->lock);
  14932. + done_load_count(&pos->load);
  14933. + coord_init_invalid(&pos->coord, NULL);
  14934. +
  14935. + if (pos->child) {
  14936. + jput(pos->child);
  14937. + pos->child = NULL;
  14938. + }
  14939. +
  14940. + return 0;
  14941. +}
  14942. +
  14943. +/* Return the flush_position's block allocator hint. */
  14944. +reiser4_blocknr_hint *reiser4_pos_hint(flush_pos_t *pos)
  14945. +{
  14946. + return &pos->preceder;
  14947. +}
  14948. +
  14949. +flush_queue_t *reiser4_pos_fq(flush_pos_t *pos)
  14950. +{
  14951. + return pos->fq;
  14952. +}
  14953. +
  14954. +/* Make Linus happy.
  14955. + Local variables:
  14956. + c-indentation-style: "K&R"
  14957. + mode-name: "LC"
  14958. + c-basic-offset: 8
  14959. + tab-width: 8
  14960. + fill-column: 90
  14961. + LocalWords: preceder
  14962. + End:
  14963. +*/
  14964. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/flush.h linux-4.14.2/fs/reiser4/flush.h
  14965. --- linux-4.14.2.orig/fs/reiser4/flush.h 1970-01-01 01:00:00.000000000 +0100
  14966. +++ linux-4.14.2/fs/reiser4/flush.h 2017-11-26 22:13:09.000000000 +0100
  14967. @@ -0,0 +1,290 @@
  14968. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  14969. +
  14970. +/* DECLARATIONS: */
  14971. +
  14972. +#if !defined(__REISER4_FLUSH_H__)
  14973. +#define __REISER4_FLUSH_H__
  14974. +
  14975. +#include "plugin/cluster.h"
  14976. +
  14977. +/* The flush_scan data structure maintains the state of an in-progress
  14978. + flush-scan on a single level of the tree. A flush-scan is used for counting
  14979. + the number of adjacent nodes to flush, which is used to determine whether we
  14980. + should relocate, and it is also used to find a starting point for flush. A
  14981. + flush-scan object can scan in both right and left directions via the
  14982. + scan_left() and scan_right() interfaces. The right- and left-variations are
  14983. + similar but perform different functions. When scanning left we (optionally
  14984. + perform rapid scanning and then) longterm-lock the endpoint node. When
  14985. + scanning right we are simply counting the number of adjacent, dirty nodes. */
  14986. +struct flush_scan {
  14987. +
  14988. + /* The current number of nodes scanned on this level. */
  14989. + unsigned count;
  14990. +
  14991. + /* There may be a maximum number of nodes for a scan on any single
  14992. + level. When going leftward, max_count is determined by
  14993. + FLUSH_SCAN_MAXNODES (see reiser4.h) */
  14994. + unsigned max_count;
  14995. +
  14996. + /* Direction: Set to one of the sideof enumeration:
  14997. + { LEFT_SIDE, RIGHT_SIDE }. */
  14998. + sideof direction;
  14999. +
  15000. + /* Initially @stop is set to false then set true once some condition
  15001. + stops the search (e.g., we found a clean node before reaching
  15002. + max_count or we found a node belonging to another atom). */
  15003. + int stop;
  15004. +
  15005. + /* The current scan position. If @node is non-NULL then its reference
  15006. + count has been incremented to reflect this reference. */
  15007. + jnode *node;
  15008. +
  15009. + /* A handle for zload/zrelse of current scan position node. */
  15010. + load_count node_load;
  15011. +
  15012. + /* During left-scan, if the final position (a.k.a. endpoint node) is
  15013. + formatted the node is locked using this lock handle. The endpoint
  15014. + needs to be locked for transfer to the flush_position object after
  15015. + scanning finishes. */
  15016. + lock_handle node_lock;
  15017. +
  15018. + /* When the position is unformatted, its parent, coordinate, and parent
  15019. + zload/zrelse handle. */
  15020. + lock_handle parent_lock;
  15021. + coord_t parent_coord;
  15022. + load_count parent_load;
  15023. +
  15024. + /* The block allocator preceder hint. Sometimes flush_scan determines
  15025. + what the preceder is and if so it sets it here, after which it is
  15026. + copied into the flush_position. Otherwise, the preceder is computed
  15027. + later. */
  15028. + reiser4_block_nr preceder_blk;
  15029. +};
  15030. +
  15031. +struct convert_item_info {
  15032. + dc_item_stat d_cur; /* per-cluster status of the current item */
  15033. + dc_item_stat d_next; /* per-cluster status of the first item on
  15034. + the right neighbor */
  15035. + int cluster_shift; /* disk cluster shift */
  15036. + flow_t flow; /* disk cluster data */
  15037. +};
  15038. +
  15039. +struct convert_info {
  15040. + int count; /* for squalloc terminating */
  15041. + item_plugin *iplug; /* current item plugin */
  15042. + struct convert_item_info *itm; /* current item info */
  15043. + struct cluster_handle clust; /* transform cluster */
  15044. + lock_handle right_lock; /* lock handle of the right neighbor */
  15045. + int right_locked;
  15046. +};
  15047. +
  15048. +typedef enum flush_position_state {
  15049. + POS_INVALID, /* Invalid or stopped pos, do not continue slum
  15050. + * processing */
  15051. + POS_ON_LEAF, /* pos points to already prepped, locked
  15052. + * formatted node at leaf level */
  15053. + POS_ON_EPOINT, /* pos keeps a lock on twig level, "coord" field
  15054. + * is used to traverse unformatted nodes */
  15055. + POS_TO_LEAF, /* pos is being moved to leaf level */
  15056. + POS_TO_TWIG, /* pos is being moved to twig level */
  15057. + POS_END_OF_TWIG, /* special case of POS_ON_TWIG, when coord is
  15058. + * after rightmost unit of the current twig */
  15059. + POS_ON_INTERNAL /* same as POS_ON_LEAF, but points to internal
  15060. + * node */
  15061. +} flushpos_state_t;
  15062. +
  15063. +/* An encapsulation of the current flush point and all the parameters that are
  15064. + passed through the entire squeeze-and-allocate stage of the flush routine.
  15065. + A single flush_position object is constructed after left- and right-scanning
  15066. + finishes. */
  15067. +struct flush_position {
  15068. + flushpos_state_t state;
  15069. +
  15070. + coord_t coord; /* coord to traverse unformatted nodes */
  15071. + lock_handle lock; /* current lock we hold */
  15072. + load_count load; /* load status for current locked formatted node
  15073. + */
  15074. + jnode *child; /* for passing a reference to unformatted child
  15075. + * across pos state changes */
  15076. +
  15077. + reiser4_blocknr_hint preceder; /* The flush 'hint' state. */
  15078. + int leaf_relocate; /* True if enough leaf-level nodes were
  15079. + * found to suggest a relocate policy. */
  15080. + int alloc_cnt; /* The number of nodes allocated during squeeze
  15081. + and allococate. */
  15082. + int prep_or_free_cnt; /* The number of nodes prepared for write
  15083. + (allocate) or squeezed and freed. */
  15084. + flush_queue_t *fq;
  15085. + long *nr_written; /* number of nodes submitted to disk */
  15086. + int flags; /* a copy of jnode_flush flags argument */
  15087. +
  15088. + znode *prev_twig; /* previous parent pointer value, used to catch
  15089. + * processing of new twig node */
  15090. + struct convert_info *sq; /* convert info */
  15091. +
  15092. + unsigned long pos_in_unit; /* for extents only. Position
  15093. + within an extent unit of first
  15094. + jnode of slum */
  15095. + long nr_to_write; /* number of unformatted nodes to handle on
  15096. + flush */
  15097. +};
  15098. +
  15099. +static inline int item_convert_count(flush_pos_t *pos)
  15100. +{
  15101. + return pos->sq->count;
  15102. +}
  15103. +static inline void inc_item_convert_count(flush_pos_t *pos)
  15104. +{
  15105. + pos->sq->count++;
  15106. +}
  15107. +static inline void set_item_convert_count(flush_pos_t *pos, int count)
  15108. +{
  15109. + pos->sq->count = count;
  15110. +}
  15111. +static inline item_plugin *item_convert_plug(flush_pos_t *pos)
  15112. +{
  15113. + return pos->sq->iplug;
  15114. +}
  15115. +
  15116. +static inline struct convert_info *convert_data(flush_pos_t *pos)
  15117. +{
  15118. + return pos->sq;
  15119. +}
  15120. +
  15121. +static inline struct convert_item_info *item_convert_data(flush_pos_t *pos)
  15122. +{
  15123. + assert("edward-955", convert_data(pos));
  15124. + return pos->sq->itm;
  15125. +}
  15126. +
  15127. +static inline struct tfm_cluster *tfm_cluster_sq(flush_pos_t *pos)
  15128. +{
  15129. + return &pos->sq->clust.tc;
  15130. +}
  15131. +
  15132. +static inline struct tfm_stream *tfm_stream_sq(flush_pos_t *pos,
  15133. + tfm_stream_id id)
  15134. +{
  15135. + assert("edward-854", pos->sq != NULL);
  15136. + return get_tfm_stream(tfm_cluster_sq(pos), id);
  15137. +}
  15138. +
  15139. +static inline int convert_data_attached(flush_pos_t *pos)
  15140. +{
  15141. + return convert_data(pos) != NULL && item_convert_data(pos) != NULL;
  15142. +}
  15143. +
  15144. +#define should_convert_right_neighbor(pos) convert_data_attached(pos)
  15145. +
  15146. +/* Returns true if next node contains next item of the disk cluster
  15147. + so item convert data should be moved to the right slum neighbor.
  15148. +*/
  15149. +static inline int next_node_is_chained(flush_pos_t *pos)
  15150. +{
  15151. + return convert_data_attached(pos) &&
  15152. + item_convert_data(pos)->d_next == DC_CHAINED_ITEM;
  15153. +}
  15154. +
  15155. +/*
  15156. + * Update "twin state" (d_cur, d_next) to assign a proper
  15157. + * conversion mode in the next iteration of convert_node()
  15158. + */
  15159. +static inline void update_chaining_state(flush_pos_t *pos,
  15160. + int this_node /* where to proceed */)
  15161. +{
  15162. +
  15163. + assert("edward-1010", convert_data_attached(pos));
  15164. +
  15165. + if (this_node) {
  15166. + /*
  15167. + * we want to perform one more iteration with the same item
  15168. + */
  15169. + assert("edward-1013",
  15170. + item_convert_data(pos)->d_cur == DC_FIRST_ITEM ||
  15171. + item_convert_data(pos)->d_cur == DC_CHAINED_ITEM);
  15172. + assert("edward-1227",
  15173. + item_convert_data(pos)->d_next == DC_AFTER_CLUSTER ||
  15174. + item_convert_data(pos)->d_next == DC_INVALID_STATE);
  15175. +
  15176. + item_convert_data(pos)->d_cur = DC_AFTER_CLUSTER;
  15177. + item_convert_data(pos)->d_next = DC_INVALID_STATE;
  15178. + }
  15179. + else {
  15180. + /*
  15181. + * we want to proceed on right neighbor, which is chained
  15182. + */
  15183. + assert("edward-1011",
  15184. + item_convert_data(pos)->d_cur == DC_FIRST_ITEM ||
  15185. + item_convert_data(pos)->d_cur == DC_CHAINED_ITEM);
  15186. + assert("edward-1012",
  15187. + item_convert_data(pos)->d_next == DC_CHAINED_ITEM);
  15188. +
  15189. + item_convert_data(pos)->d_cur = DC_CHAINED_ITEM;
  15190. + item_convert_data(pos)->d_next = DC_INVALID_STATE;
  15191. + }
  15192. +}
  15193. +
  15194. +#define SQUALLOC_THRESHOLD 256
  15195. +
  15196. +static inline int should_terminate_squalloc(flush_pos_t *pos)
  15197. +{
  15198. + return convert_data(pos) &&
  15199. + !item_convert_data(pos) &&
  15200. + item_convert_count(pos) >= SQUALLOC_THRESHOLD;
  15201. +}
  15202. +
  15203. +#if REISER4_DEBUG
  15204. +#define check_convert_info(pos) \
  15205. +do { \
  15206. + if (unlikely(should_convert_right_neighbor(pos))) { \
  15207. + warning("edward-1006", "unprocessed chained data"); \
  15208. + printk("d_cur = %d, d_next = %d, flow.len = %llu\n", \
  15209. + item_convert_data(pos)->d_cur, \
  15210. + item_convert_data(pos)->d_next, \
  15211. + item_convert_data(pos)->flow.length); \
  15212. + } \
  15213. +} while (0)
  15214. +#else
  15215. +#define check_convert_info(pos)
  15216. +#endif /* REISER4_DEBUG */
  15217. +
  15218. +void free_convert_data(flush_pos_t *pos);
  15219. +/* used in extent.c */
  15220. +int scan_set_current(flush_scan * scan, jnode * node, unsigned add_size,
  15221. + const coord_t *parent);
  15222. +int reiser4_scan_finished(flush_scan * scan);
  15223. +int reiser4_scanning_left(flush_scan * scan);
  15224. +int reiser4_scan_goto(flush_scan * scan, jnode * tonode);
  15225. +txn_atom *atom_locked_by_fq(flush_queue_t *fq);
  15226. +int reiser4_alloc_extent(flush_pos_t *flush_pos);
  15227. +squeeze_result squalloc_extent(znode *left, const coord_t *, flush_pos_t *,
  15228. + reiser4_key *stop_key);
  15229. +extern int reiser4_init_fqs(void);
  15230. +extern void reiser4_done_fqs(void);
  15231. +
  15232. +#if REISER4_DEBUG
  15233. +
  15234. +extern void reiser4_check_fq(const txn_atom *atom);
  15235. +extern atomic_t flush_cnt;
  15236. +
  15237. +#define check_preceder(blk) \
  15238. +assert("nikita-2588", blk < reiser4_block_count(reiser4_get_current_sb()));
  15239. +extern void check_pos(flush_pos_t *pos);
  15240. +#else
  15241. +#define check_preceder(b) noop
  15242. +#define check_pos(pos) noop
  15243. +#endif
  15244. +
  15245. +/* __REISER4_FLUSH_H__ */
  15246. +#endif
  15247. +
  15248. +/* Make Linus happy.
  15249. + Local variables:
  15250. + c-indentation-style: "K&R"
  15251. + mode-name: "LC"
  15252. + c-basic-offset: 8
  15253. + tab-width: 8
  15254. + fill-column: 90
  15255. + LocalWords: preceder
  15256. + End:
  15257. +*/
  15258. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/flush_queue.c linux-4.14.2/fs/reiser4/flush_queue.c
  15259. --- linux-4.14.2.orig/fs/reiser4/flush_queue.c 1970-01-01 01:00:00.000000000 +0100
  15260. +++ linux-4.14.2/fs/reiser4/flush_queue.c 2017-11-26 22:13:09.000000000 +0100
  15261. @@ -0,0 +1,677 @@
  15262. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  15263. + reiser4/README */
  15264. +
  15265. +#include "debug.h"
  15266. +#include "super.h"
  15267. +#include "txnmgr.h"
  15268. +#include "jnode.h"
  15269. +#include "znode.h"
  15270. +#include "page_cache.h"
  15271. +#include "wander.h"
  15272. +#include "vfs_ops.h"
  15273. +#include "writeout.h"
  15274. +#include "flush.h"
  15275. +
  15276. +#include <linux/bio.h>
  15277. +#include <linux/mm.h>
  15278. +#include <linux/pagemap.h>
  15279. +#include <linux/blkdev.h>
  15280. +#include <linux/writeback.h>
  15281. +
  15282. +/* A flush queue object is an accumulator for keeping jnodes prepared
  15283. + by the jnode_flush() function for writing to disk. Those "queued" jnodes are
  15284. + kept on the flush queue until memory pressure or atom commit asks
  15285. + flush queues to write some or all from their jnodes. */
  15286. +
  15287. +/*
  15288. + LOCKING:
  15289. +
  15290. + fq->guard spin lock protects fq->atom pointer and nothing else. fq->prepped
  15291. + list protected by atom spin lock. fq->prepped list uses the following
  15292. + locking:
  15293. +
  15294. + two ways to protect fq->prepped list for read-only list traversal:
  15295. +
  15296. + 1. atom spin-lock atom.
  15297. + 2. fq is IN_USE, atom->nr_running_queues increased.
  15298. +
  15299. + and one for list modification:
  15300. +
  15301. + 1. atom is spin-locked and one condition is true: fq is IN_USE or
  15302. + atom->nr_running_queues == 0.
  15303. +
  15304. + The deadlock-safe order for flush queues and atoms is: first lock atom, then
  15305. + lock flush queue, then lock jnode.
  15306. +*/
  15307. +
  15308. +#define fq_in_use(fq) ((fq)->state & FQ_IN_USE)
  15309. +#define fq_ready(fq) (!fq_in_use(fq))
  15310. +
  15311. +#define mark_fq_in_use(fq) do { (fq)->state |= FQ_IN_USE; } while (0)
  15312. +#define mark_fq_ready(fq) do { (fq)->state &= ~FQ_IN_USE; } while (0)
  15313. +
  15314. +/* get lock on atom from locked flush queue object */
  15315. +static txn_atom *atom_locked_by_fq_nolock(flush_queue_t *fq)
  15316. +{
  15317. + /* This code is similar to jnode_get_atom(), look at it for the
  15318. + * explanation. */
  15319. + txn_atom *atom;
  15320. +
  15321. + assert_spin_locked(&(fq->guard));
  15322. +
  15323. + while (1) {
  15324. + atom = fq->atom;
  15325. + if (atom == NULL)
  15326. + break;
  15327. +
  15328. + if (spin_trylock_atom(atom))
  15329. + break;
  15330. +
  15331. + atomic_inc(&atom->refcount);
  15332. + spin_unlock(&(fq->guard));
  15333. + spin_lock_atom(atom);
  15334. + spin_lock(&(fq->guard));
  15335. +
  15336. + if (fq->atom == atom) {
  15337. + atomic_dec(&atom->refcount);
  15338. + break;
  15339. + }
  15340. +
  15341. + spin_unlock(&(fq->guard));
  15342. + atom_dec_and_unlock(atom);
  15343. + spin_lock(&(fq->guard));
  15344. + }
  15345. +
  15346. + return atom;
  15347. +}
  15348. +
  15349. +txn_atom *atom_locked_by_fq(flush_queue_t *fq)
  15350. +{
  15351. + txn_atom *atom;
  15352. +
  15353. + spin_lock(&(fq->guard));
  15354. + atom = atom_locked_by_fq_nolock(fq);
  15355. + spin_unlock(&(fq->guard));
  15356. + return atom;
  15357. +}
  15358. +
  15359. +static void init_fq(flush_queue_t *fq)
  15360. +{
  15361. + memset(fq, 0, sizeof *fq);
  15362. +
  15363. + atomic_set(&fq->nr_submitted, 0);
  15364. +
  15365. + INIT_LIST_HEAD(ATOM_FQ_LIST(fq));
  15366. +
  15367. + init_waitqueue_head(&fq->wait);
  15368. + spin_lock_init(&fq->guard);
  15369. +}
  15370. +
  15371. +/* slab for flush queues */
  15372. +static struct kmem_cache *fq_slab;
  15373. +
  15374. +/**
  15375. + * reiser4_init_fqs - create flush queue cache
  15376. + *
  15377. + * Initializes slab cache of flush queues. It is part of reiser4 module
  15378. + * initialization.
  15379. + */
  15380. +int reiser4_init_fqs(void)
  15381. +{
  15382. + fq_slab = kmem_cache_create("fq",
  15383. + sizeof(flush_queue_t),
  15384. + 0, SLAB_HWCACHE_ALIGN, NULL);
  15385. + if (fq_slab == NULL)
  15386. + return RETERR(-ENOMEM);
  15387. + return 0;
  15388. +}
  15389. +
  15390. +/**
  15391. + * reiser4_done_fqs - delete flush queue cache
  15392. + *
  15393. + * This is called on reiser4 module unloading or system shutdown.
  15394. + */
  15395. +void reiser4_done_fqs(void)
  15396. +{
  15397. + destroy_reiser4_cache(&fq_slab);
  15398. +}
  15399. +
  15400. +/* create new flush queue object */
  15401. +static flush_queue_t *create_fq(gfp_t gfp)
  15402. +{
  15403. + flush_queue_t *fq;
  15404. +
  15405. + fq = kmem_cache_alloc(fq_slab, gfp);
  15406. + if (fq)
  15407. + init_fq(fq);
  15408. +
  15409. + return fq;
  15410. +}
  15411. +
  15412. +/* adjust atom's and flush queue's counters of queued nodes */
  15413. +static void count_enqueued_node(flush_queue_t *fq)
  15414. +{
  15415. + ON_DEBUG(fq->atom->num_queued++);
  15416. +}
  15417. +
  15418. +static void count_dequeued_node(flush_queue_t *fq)
  15419. +{
  15420. + assert("zam-993", fq->atom->num_queued > 0);
  15421. + ON_DEBUG(fq->atom->num_queued--);
  15422. +}
  15423. +
  15424. +/* attach flush queue object to the atom */
  15425. +static void attach_fq(txn_atom *atom, flush_queue_t *fq)
  15426. +{
  15427. + assert_spin_locked(&(atom->alock));
  15428. + list_add(&fq->alink, &atom->flush_queues);
  15429. + fq->atom = atom;
  15430. + ON_DEBUG(atom->nr_flush_queues++);
  15431. +}
  15432. +
  15433. +static void detach_fq(flush_queue_t *fq)
  15434. +{
  15435. + assert_spin_locked(&(fq->atom->alock));
  15436. +
  15437. + spin_lock(&(fq->guard));
  15438. + list_del_init(&fq->alink);
  15439. + assert("vs-1456", fq->atom->nr_flush_queues > 0);
  15440. + ON_DEBUG(fq->atom->nr_flush_queues--);
  15441. + fq->atom = NULL;
  15442. + spin_unlock(&(fq->guard));
  15443. +}
  15444. +
  15445. +/* destroy flush queue object */
  15446. +static void done_fq(flush_queue_t *fq)
  15447. +{
  15448. + assert("zam-763", list_empty_careful(ATOM_FQ_LIST(fq)));
  15449. + assert("zam-766", atomic_read(&fq->nr_submitted) == 0);
  15450. +
  15451. + kmem_cache_free(fq_slab, fq);
  15452. +}
  15453. +
  15454. +/* */
  15455. +static void mark_jnode_queued(flush_queue_t *fq, jnode * node)
  15456. +{
  15457. + JF_SET(node, JNODE_FLUSH_QUEUED);
  15458. + count_enqueued_node(fq);
  15459. +}
  15460. +
  15461. +/* Putting jnode into the flush queue. Both atom and jnode should be
  15462. + spin-locked. */
  15463. +void queue_jnode(flush_queue_t *fq, jnode * node)
  15464. +{
  15465. + assert_spin_locked(&(node->guard));
  15466. + assert("zam-713", node->atom != NULL);
  15467. + assert_spin_locked(&(node->atom->alock));
  15468. + assert("zam-716", fq->atom != NULL);
  15469. + assert("zam-717", fq->atom == node->atom);
  15470. + assert("zam-907", fq_in_use(fq));
  15471. +
  15472. + assert("zam-714", JF_ISSET(node, JNODE_DIRTY));
  15473. + assert("zam-826", JF_ISSET(node, JNODE_RELOC));
  15474. + assert("vs-1481", !JF_ISSET(node, JNODE_FLUSH_QUEUED));
  15475. + assert("vs-1481", NODE_LIST(node) != FQ_LIST);
  15476. +
  15477. + mark_jnode_queued(fq, node);
  15478. + list_move_tail(&node->capture_link, ATOM_FQ_LIST(fq));
  15479. +
  15480. + ON_DEBUG(count_jnode(node->atom, node, NODE_LIST(node),
  15481. + FQ_LIST, 1));
  15482. +}
  15483. +
  15484. +/* repeatable process for waiting io completion on a flush queue object */
  15485. +static int wait_io(flush_queue_t *fq, int *nr_io_errors)
  15486. +{
  15487. + assert("zam-738", fq->atom != NULL);
  15488. + assert_spin_locked(&(fq->atom->alock));
  15489. + assert("zam-736", fq_in_use(fq));
  15490. + assert("zam-911", list_empty_careful(ATOM_FQ_LIST(fq)));
  15491. +
  15492. + if (atomic_read(&fq->nr_submitted) != 0) {
  15493. + struct super_block *super;
  15494. +
  15495. + spin_unlock_atom(fq->atom);
  15496. +
  15497. + assert("nikita-3013", reiser4_schedulable());
  15498. +
  15499. + super = reiser4_get_current_sb();
  15500. +
  15501. + /* FIXME: this is instead of blk_run_queues() */
  15502. + //blk_flush_plug(current);
  15503. +
  15504. + if (!(super->s_flags & MS_RDONLY))
  15505. + wait_event(fq->wait,
  15506. + atomic_read(&fq->nr_submitted) == 0);
  15507. +
  15508. + /* Ask the caller to re-acquire the locks and call this
  15509. + function again. Note: this technique is commonly used in
  15510. + the txnmgr code. */
  15511. + return -E_REPEAT;
  15512. + }
  15513. +
  15514. + *nr_io_errors += atomic_read(&fq->nr_errors);
  15515. + return 0;
  15516. +}
  15517. +
  15518. +/* wait on I/O completion, re-submit dirty nodes to write */
  15519. +static int finish_fq(flush_queue_t *fq, int *nr_io_errors)
  15520. +{
  15521. + int ret;
  15522. + txn_atom *atom = fq->atom;
  15523. +
  15524. + assert("zam-801", atom != NULL);
  15525. + assert_spin_locked(&(atom->alock));
  15526. + assert("zam-762", fq_in_use(fq));
  15527. +
  15528. + ret = wait_io(fq, nr_io_errors);
  15529. + if (ret)
  15530. + return ret;
  15531. +
  15532. + detach_fq(fq);
  15533. + done_fq(fq);
  15534. +
  15535. + reiser4_atom_send_event(atom);
  15536. +
  15537. + return 0;
  15538. +}
  15539. +
  15540. +/* wait for all i/o for given atom to be completed, actually do one iteration
  15541. + on that and return -E_REPEAT if there more iterations needed */
  15542. +static int finish_all_fq(txn_atom * atom, int *nr_io_errors)
  15543. +{
  15544. + flush_queue_t *fq;
  15545. +
  15546. + assert_spin_locked(&(atom->alock));
  15547. +
  15548. + if (list_empty_careful(&atom->flush_queues))
  15549. + return 0;
  15550. +
  15551. + list_for_each_entry(fq, &atom->flush_queues, alink) {
  15552. + if (fq_ready(fq)) {
  15553. + int ret;
  15554. +
  15555. + mark_fq_in_use(fq);
  15556. + assert("vs-1247", fq->owner == NULL);
  15557. + ON_DEBUG(fq->owner = current);
  15558. + ret = finish_fq(fq, nr_io_errors);
  15559. +
  15560. + if (*nr_io_errors)
  15561. + reiser4_handle_error();
  15562. +
  15563. + if (ret) {
  15564. + reiser4_fq_put(fq);
  15565. + return ret;
  15566. + }
  15567. +
  15568. + spin_unlock_atom(atom);
  15569. +
  15570. + return -E_REPEAT;
  15571. + }
  15572. + }
  15573. +
  15574. + /* All flush queues are in use; atom remains locked */
  15575. + return -EBUSY;
  15576. +}
  15577. +
  15578. +/* wait all i/o for current atom */
  15579. +int current_atom_finish_all_fq(void)
  15580. +{
  15581. + txn_atom *atom;
  15582. + int nr_io_errors = 0;
  15583. + int ret = 0;
  15584. +
  15585. + do {
  15586. + while (1) {
  15587. + atom = get_current_atom_locked();
  15588. + ret = finish_all_fq(atom, &nr_io_errors);
  15589. + if (ret != -EBUSY)
  15590. + break;
  15591. + reiser4_atom_wait_event(atom);
  15592. + }
  15593. + } while (ret == -E_REPEAT);
  15594. +
  15595. + /* we do not need locked atom after this function finishes, SUCCESS or
  15596. + -EBUSY are two return codes when atom remains locked after
  15597. + finish_all_fq */
  15598. + if (!ret)
  15599. + spin_unlock_atom(atom);
  15600. +
  15601. + assert_spin_not_locked(&(atom->alock));
  15602. +
  15603. + if (ret)
  15604. + return ret;
  15605. +
  15606. + if (nr_io_errors)
  15607. + return RETERR(-EIO);
  15608. +
  15609. + return 0;
  15610. +}
  15611. +
  15612. +/* change node->atom field for all jnode from given list */
  15613. +static void
  15614. +scan_fq_and_update_atom_ref(struct list_head *list, txn_atom *atom)
  15615. +{
  15616. + jnode *cur;
  15617. +
  15618. + list_for_each_entry(cur, list, capture_link) {
  15619. + spin_lock_jnode(cur);
  15620. + cur->atom = atom;
  15621. + spin_unlock_jnode(cur);
  15622. + }
  15623. +}
  15624. +
  15625. +/* support for atom fusion operation */
  15626. +void reiser4_fuse_fq(txn_atom *to, txn_atom *from)
  15627. +{
  15628. + flush_queue_t *fq;
  15629. +
  15630. + assert_spin_locked(&(to->alock));
  15631. + assert_spin_locked(&(from->alock));
  15632. +
  15633. + list_for_each_entry(fq, &from->flush_queues, alink) {
  15634. + scan_fq_and_update_atom_ref(ATOM_FQ_LIST(fq), to);
  15635. + spin_lock(&(fq->guard));
  15636. + fq->atom = to;
  15637. + spin_unlock(&(fq->guard));
  15638. + }
  15639. +
  15640. + list_splice_init(&from->flush_queues, to->flush_queues.prev);
  15641. +
  15642. +#if REISER4_DEBUG
  15643. + to->num_queued += from->num_queued;
  15644. + to->nr_flush_queues += from->nr_flush_queues;
  15645. + from->nr_flush_queues = 0;
  15646. +#endif
  15647. +}
  15648. +
  15649. +#if REISER4_DEBUG
  15650. +int atom_fq_parts_are_clean(txn_atom * atom)
  15651. +{
  15652. + assert("zam-915", atom != NULL);
  15653. + return list_empty_careful(&atom->flush_queues);
  15654. +}
  15655. +#endif
  15656. +
  15657. +/*
  15658. + * Bio i/o completion routine for reiser4 write operations
  15659. + */
  15660. +static void end_io_handler(struct bio *bio)
  15661. +{
  15662. + int i;
  15663. + int nr_errors = 0;
  15664. + flush_queue_t *fq;
  15665. +
  15666. + assert("zam-958", bio_op(bio) == WRITE);
  15667. +
  15668. + /* we expect that bio->private is set to NULL or fq object which is used
  15669. + * for synchronization and error counting. */
  15670. + fq = bio->bi_private;
  15671. + /* Check all elements of io_vec for correct write completion. */
  15672. + for (i = 0; i < bio->bi_vcnt; i += 1) {
  15673. + struct page *pg = bio->bi_io_vec[i].bv_page;
  15674. +
  15675. + if (bio->bi_status) {
  15676. + SetPageError(pg);
  15677. + nr_errors++;
  15678. + }
  15679. +
  15680. + {
  15681. + /* jnode WRITEBACK ("write is in progress bit") is
  15682. + * atomically cleared here. */
  15683. + jnode *node;
  15684. +
  15685. + assert("zam-736", pg != NULL);
  15686. + assert("zam-736", PagePrivate(pg));
  15687. + node = jprivate(pg);
  15688. +
  15689. + JF_CLR(node, JNODE_WRITEBACK);
  15690. + }
  15691. +
  15692. + end_page_writeback(pg);
  15693. + put_page(pg);
  15694. + }
  15695. +
  15696. + if (fq) {
  15697. + /* count i/o error in fq object */
  15698. + atomic_add(nr_errors, &fq->nr_errors);
  15699. +
  15700. + /* If all write requests registered in this "fq" are done we up
  15701. + * the waiter. */
  15702. + if (atomic_sub_and_test(bio->bi_vcnt, &fq->nr_submitted))
  15703. + wake_up(&fq->wait);
  15704. + }
  15705. +
  15706. + bio_put(bio);
  15707. +}
  15708. +
  15709. +/* Count I/O requests which will be submitted by @bio in given flush queues
  15710. + @fq */
  15711. +void add_fq_to_bio(flush_queue_t *fq, struct bio *bio)
  15712. +{
  15713. + bio->bi_private = fq;
  15714. + bio->bi_end_io = end_io_handler;
  15715. +
  15716. + if (fq)
  15717. + atomic_add(bio->bi_vcnt, &fq->nr_submitted);
  15718. +}
  15719. +
  15720. +/* Move all queued nodes out from @fq->prepped list. */
  15721. +static void release_prepped_list(flush_queue_t *fq)
  15722. +{
  15723. + txn_atom *atom;
  15724. +
  15725. + assert("zam-904", fq_in_use(fq));
  15726. + atom = atom_locked_by_fq(fq);
  15727. +
  15728. + while (!list_empty(ATOM_FQ_LIST(fq))) {
  15729. + jnode *cur;
  15730. +
  15731. + cur = list_entry(ATOM_FQ_LIST(fq)->next, jnode, capture_link);
  15732. + list_del_init(&cur->capture_link);
  15733. +
  15734. + count_dequeued_node(fq);
  15735. + spin_lock_jnode(cur);
  15736. + assert("nikita-3154", !JF_ISSET(cur, JNODE_OVRWR));
  15737. + assert("nikita-3154", JF_ISSET(cur, JNODE_RELOC));
  15738. + assert("nikita-3154", JF_ISSET(cur, JNODE_FLUSH_QUEUED));
  15739. + JF_CLR(cur, JNODE_FLUSH_QUEUED);
  15740. +
  15741. + if (JF_ISSET(cur, JNODE_DIRTY)) {
  15742. + list_add_tail(&cur->capture_link,
  15743. + ATOM_DIRTY_LIST(atom,
  15744. + jnode_get_level(cur)));
  15745. + ON_DEBUG(count_jnode(atom, cur, FQ_LIST,
  15746. + DIRTY_LIST, 1));
  15747. + } else {
  15748. + list_add_tail(&cur->capture_link,
  15749. + ATOM_CLEAN_LIST(atom));
  15750. + ON_DEBUG(count_jnode(atom, cur, FQ_LIST,
  15751. + CLEAN_LIST, 1));
  15752. + }
  15753. +
  15754. + spin_unlock_jnode(cur);
  15755. + }
  15756. +
  15757. + if (--atom->nr_running_queues == 0)
  15758. + reiser4_atom_send_event(atom);
  15759. +
  15760. + spin_unlock_atom(atom);
  15761. +}
  15762. +
  15763. +/* Submit write requests for nodes on the already filled flush queue @fq.
  15764. +
  15765. + @fq: flush queue object which contains jnodes we can (and will) write.
  15766. + @return: number of submitted blocks (>=0) if success, otherwise -- an error
  15767. + code (<0). */
  15768. +int reiser4_write_fq(flush_queue_t *fq, long *nr_submitted, int flags)
  15769. +{
  15770. + int ret;
  15771. + txn_atom *atom;
  15772. +
  15773. + while (1) {
  15774. + atom = atom_locked_by_fq(fq);
  15775. + assert("zam-924", atom);
  15776. + /* do not write fq in parallel. */
  15777. + if (atom->nr_running_queues == 0
  15778. + || !(flags & WRITEOUT_SINGLE_STREAM))
  15779. + break;
  15780. + reiser4_atom_wait_event(atom);
  15781. + }
  15782. +
  15783. + atom->nr_running_queues++;
  15784. + spin_unlock_atom(atom);
  15785. +
  15786. + ret = write_jnode_list(ATOM_FQ_LIST(fq), fq, nr_submitted, flags);
  15787. + release_prepped_list(fq);
  15788. +
  15789. + return ret;
  15790. +}
  15791. +
  15792. +/* Getting flush queue object for exclusive use by one thread. May require
  15793. + several iterations which is indicated by -E_REPEAT return code.
  15794. +
  15795. + This function does not contain code for obtaining an atom lock because an
  15796. + atom lock is obtained by different ways in different parts of reiser4,
  15797. + usually it is current atom, but we need a possibility for getting fq for the
  15798. + atom of given jnode. */
  15799. +static int fq_by_atom_gfp(txn_atom *atom, flush_queue_t **new_fq, gfp_t gfp)
  15800. +{
  15801. + flush_queue_t *fq;
  15802. +
  15803. + assert_spin_locked(&(atom->alock));
  15804. +
  15805. + fq = list_entry(atom->flush_queues.next, flush_queue_t, alink);
  15806. + while (&atom->flush_queues != &fq->alink) {
  15807. + spin_lock(&(fq->guard));
  15808. +
  15809. + if (fq_ready(fq)) {
  15810. + mark_fq_in_use(fq);
  15811. + assert("vs-1246", fq->owner == NULL);
  15812. + ON_DEBUG(fq->owner = current);
  15813. + spin_unlock(&(fq->guard));
  15814. +
  15815. + if (*new_fq)
  15816. + done_fq(*new_fq);
  15817. +
  15818. + *new_fq = fq;
  15819. +
  15820. + return 0;
  15821. + }
  15822. +
  15823. + spin_unlock(&(fq->guard));
  15824. +
  15825. + fq = list_entry(fq->alink.next, flush_queue_t, alink);
  15826. + }
  15827. +
  15828. + /* Use previously allocated fq object */
  15829. + if (*new_fq) {
  15830. + mark_fq_in_use(*new_fq);
  15831. + assert("vs-1248", (*new_fq)->owner == 0);
  15832. + ON_DEBUG((*new_fq)->owner = current);
  15833. + attach_fq(atom, *new_fq);
  15834. +
  15835. + return 0;
  15836. + }
  15837. +
  15838. + spin_unlock_atom(atom);
  15839. +
  15840. + *new_fq = create_fq(gfp);
  15841. +
  15842. + if (*new_fq == NULL)
  15843. + return RETERR(-ENOMEM);
  15844. +
  15845. + return RETERR(-E_REPEAT);
  15846. +}
  15847. +
  15848. +int reiser4_fq_by_atom(txn_atom * atom, flush_queue_t **new_fq)
  15849. +{
  15850. + return fq_by_atom_gfp(atom, new_fq, reiser4_ctx_gfp_mask_get());
  15851. +}
  15852. +
  15853. +/* A wrapper around reiser4_fq_by_atom for getting a flush queue
  15854. + object for current atom, if success fq->atom remains locked. */
  15855. +flush_queue_t *get_fq_for_current_atom(void)
  15856. +{
  15857. + flush_queue_t *fq = NULL;
  15858. + txn_atom *atom;
  15859. + int ret;
  15860. +
  15861. + do {
  15862. + atom = get_current_atom_locked();
  15863. + ret = reiser4_fq_by_atom(atom, &fq);
  15864. + } while (ret == -E_REPEAT);
  15865. +
  15866. + if (ret)
  15867. + return ERR_PTR(ret);
  15868. + return fq;
  15869. +}
  15870. +
  15871. +/* Releasing flush queue object after exclusive use */
  15872. +void reiser4_fq_put_nolock(flush_queue_t *fq)
  15873. +{
  15874. + assert("zam-747", fq->atom != NULL);
  15875. + assert("zam-902", list_empty_careful(ATOM_FQ_LIST(fq)));
  15876. + mark_fq_ready(fq);
  15877. + assert("vs-1245", fq->owner == current);
  15878. + ON_DEBUG(fq->owner = NULL);
  15879. +}
  15880. +
  15881. +void reiser4_fq_put(flush_queue_t *fq)
  15882. +{
  15883. + txn_atom *atom;
  15884. +
  15885. + spin_lock(&(fq->guard));
  15886. + atom = atom_locked_by_fq_nolock(fq);
  15887. +
  15888. + assert("zam-746", atom != NULL);
  15889. +
  15890. + reiser4_fq_put_nolock(fq);
  15891. + reiser4_atom_send_event(atom);
  15892. +
  15893. + spin_unlock(&(fq->guard));
  15894. + spin_unlock_atom(atom);
  15895. +}
  15896. +
  15897. +/* A part of atom object initialization related to the embedded flush queue
  15898. + list head */
  15899. +
  15900. +void init_atom_fq_parts(txn_atom *atom)
  15901. +{
  15902. + INIT_LIST_HEAD(&atom->flush_queues);
  15903. +}
  15904. +
  15905. +#if REISER4_DEBUG
  15906. +
  15907. +void reiser4_check_fq(const txn_atom *atom)
  15908. +{
  15909. + /* check number of nodes on all atom's flush queues */
  15910. + flush_queue_t *fq;
  15911. + int count;
  15912. + struct list_head *pos;
  15913. +
  15914. + count = 0;
  15915. + list_for_each_entry(fq, &atom->flush_queues, alink) {
  15916. + spin_lock(&(fq->guard));
  15917. + /* calculate number of jnodes on fq' list of prepped jnodes */
  15918. + list_for_each(pos, ATOM_FQ_LIST(fq))
  15919. + count++;
  15920. + spin_unlock(&(fq->guard));
  15921. + }
  15922. + if (count != atom->fq)
  15923. + warning("", "fq counter %d, real %d\n", atom->fq, count);
  15924. +
  15925. +}
  15926. +
  15927. +#endif
  15928. +
  15929. +/*
  15930. + * Local variables:
  15931. + * c-indentation-style: "K&R"
  15932. + * mode-name: "LC"
  15933. + * c-basic-offset: 8
  15934. + * tab-width: 8
  15935. + * fill-column: 79
  15936. + * scroll-step: 1
  15937. + * End:
  15938. + */
  15939. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/forward.h linux-4.14.2/fs/reiser4/forward.h
  15940. --- linux-4.14.2.orig/fs/reiser4/forward.h 1970-01-01 01:00:00.000000000 +0100
  15941. +++ linux-4.14.2/fs/reiser4/forward.h 2017-11-26 22:13:09.000000000 +0100
  15942. @@ -0,0 +1,259 @@
  15943. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  15944. + reiser4/README */
  15945. +
  15946. +/* Forward declarations. Thank you Kernighan. */
  15947. +
  15948. +#if !defined(__REISER4_FORWARD_H__)
  15949. +#define __REISER4_FORWARD_H__
  15950. +
  15951. +#include <asm/errno.h>
  15952. +#include <linux/types.h>
  15953. +
  15954. +typedef struct zlock zlock;
  15955. +typedef struct lock_stack lock_stack;
  15956. +typedef struct lock_handle lock_handle;
  15957. +typedef struct znode znode;
  15958. +typedef struct flow flow_t;
  15959. +typedef struct coord coord_t;
  15960. +typedef struct tree_access_pointer tap_t;
  15961. +typedef struct reiser4_object_create_data reiser4_object_create_data;
  15962. +typedef union reiser4_plugin reiser4_plugin;
  15963. +typedef __u16 reiser4_plugin_id;
  15964. +typedef __u64 reiser4_plugin_groups;
  15965. +typedef struct item_plugin item_plugin;
  15966. +typedef struct jnode_plugin jnode_plugin;
  15967. +typedef struct reiser4_item_data reiser4_item_data;
  15968. +typedef union reiser4_key reiser4_key;
  15969. +typedef struct reiser4_tree reiser4_tree;
  15970. +typedef struct carry_cut_data carry_cut_data;
  15971. +typedef struct carry_kill_data carry_kill_data;
  15972. +typedef struct carry_tree_op carry_tree_op;
  15973. +typedef struct carry_tree_node carry_tree_node;
  15974. +typedef struct carry_plugin_info carry_plugin_info;
  15975. +typedef struct reiser4_journal reiser4_journal;
  15976. +typedef struct txn_atom txn_atom;
  15977. +typedef struct txn_handle txn_handle;
  15978. +typedef struct txn_mgr txn_mgr;
  15979. +typedef struct reiser4_dir_entry_desc reiser4_dir_entry_desc;
  15980. +typedef struct reiser4_context reiser4_context;
  15981. +typedef struct carry_level carry_level;
  15982. +typedef struct blocknr_set_entry blocknr_set_entry;
  15983. +typedef struct blocknr_list_entry blocknr_list_entry;
  15984. +/* super_block->s_fs_info points to this */
  15985. +typedef struct reiser4_super_info_data reiser4_super_info_data;
  15986. +/* next two objects are fields of reiser4_super_info_data */
  15987. +typedef struct reiser4_oid_allocator reiser4_oid_allocator;
  15988. +typedef struct reiser4_space_allocator reiser4_space_allocator;
  15989. +
  15990. +typedef struct flush_scan flush_scan;
  15991. +typedef struct flush_position flush_pos_t;
  15992. +
  15993. +typedef unsigned short pos_in_node_t;
  15994. +#define MAX_POS_IN_NODE 65535
  15995. +
  15996. +typedef struct jnode jnode;
  15997. +typedef struct reiser4_blocknr_hint reiser4_blocknr_hint;
  15998. +
  15999. +typedef struct uf_coord uf_coord_t;
  16000. +typedef struct hint hint_t;
  16001. +
  16002. +typedef struct ktxnmgrd_context ktxnmgrd_context;
  16003. +
  16004. +struct inode;
  16005. +struct page;
  16006. +struct file;
  16007. +struct dentry;
  16008. +struct super_block;
  16009. +
  16010. +/* return values of coord_by_key(). cbk == coord_by_key */
  16011. +typedef enum {
  16012. + CBK_COORD_FOUND = 0,
  16013. + CBK_COORD_NOTFOUND = -ENOENT,
  16014. +} lookup_result;
  16015. +
  16016. +/* results of lookup with directory file */
  16017. +typedef enum {
  16018. + FILE_NAME_FOUND = 0,
  16019. + FILE_NAME_NOTFOUND = -ENOENT,
  16020. + FILE_IO_ERROR = -EIO, /* FIXME: it seems silly to have special OOM,
  16021. + IO_ERROR return codes for each search. */
  16022. + FILE_OOM = -ENOMEM /* FIXME: it seems silly to have special OOM,
  16023. + IO_ERROR return codes for each search. */
  16024. +} file_lookup_result;
  16025. +
  16026. +/* behaviors of lookup. If coord we are looking for is actually in a tree,
  16027. + both coincide. */
  16028. +typedef enum {
  16029. + /* search exactly for the coord with key given */
  16030. + FIND_EXACT,
  16031. + /* search for coord with the maximal key not greater than one
  16032. + given */
  16033. + FIND_MAX_NOT_MORE_THAN /*LEFT_SLANT_BIAS */
  16034. +} lookup_bias;
  16035. +
  16036. +typedef enum {
  16037. + /* number of leaf level of the tree
  16038. + The fake root has (tree_level=0). */
  16039. + LEAF_LEVEL = 1,
  16040. +
  16041. + /* number of level one above leaf level of the tree.
  16042. +
  16043. + It is supposed that internal tree used by reiser4 to store file
  16044. + system data and meta data will have height 2 initially (when
  16045. + created by mkfs).
  16046. + */
  16047. + TWIG_LEVEL = 2,
  16048. +} tree_level;
  16049. +
  16050. +/* The "real" maximum ztree height is the 0-origin size of any per-level
  16051. + array, since the zero'th level is not used. */
  16052. +#define REAL_MAX_ZTREE_HEIGHT (REISER4_MAX_ZTREE_HEIGHT-LEAF_LEVEL)
  16053. +
  16054. +/* enumeration of possible mutual position of item and coord. This enum is
  16055. + return type of ->is_in_item() item plugin method which see. */
  16056. +typedef enum {
  16057. + /* coord is on the left of an item */
  16058. + IP_ON_THE_LEFT,
  16059. + /* coord is inside item */
  16060. + IP_INSIDE,
  16061. + /* coord is inside item, but to the right of the rightmost unit of
  16062. + this item */
  16063. + IP_RIGHT_EDGE,
  16064. + /* coord is on the right of an item */
  16065. + IP_ON_THE_RIGHT
  16066. +} interposition;
  16067. +
  16068. +/* type of lock to acquire on znode before returning it to caller */
  16069. +typedef enum {
  16070. + ZNODE_NO_LOCK = 0,
  16071. + ZNODE_READ_LOCK = 1,
  16072. + ZNODE_WRITE_LOCK = 2,
  16073. +} znode_lock_mode;
  16074. +
  16075. +/* type of lock request */
  16076. +typedef enum {
  16077. + ZNODE_LOCK_LOPRI = 0,
  16078. + ZNODE_LOCK_HIPRI = (1 << 0),
  16079. +
  16080. + /* By setting the ZNODE_LOCK_NONBLOCK flag in a lock request the call to
  16081. + longterm_lock_znode will not sleep waiting for the lock to become
  16082. + available. If the lock is unavailable, reiser4_znode_lock will
  16083. + immediately return the value -E_REPEAT. */
  16084. + ZNODE_LOCK_NONBLOCK = (1 << 1),
  16085. + /* An option for longterm_lock_znode which prevents atom fusion */
  16086. + ZNODE_LOCK_DONT_FUSE = (1 << 2)
  16087. +} znode_lock_request;
  16088. +
  16089. +typedef enum { READ_OP = 0, WRITE_OP = 1 } rw_op;
  16090. +
  16091. +/* used to specify direction of shift. These must be -1 and 1 */
  16092. +typedef enum {
  16093. + SHIFT_LEFT = 1,
  16094. + SHIFT_RIGHT = -1
  16095. +} shift_direction;
  16096. +
  16097. +typedef enum {
  16098. + LEFT_SIDE,
  16099. + RIGHT_SIDE
  16100. +} sideof;
  16101. +
  16102. +#define reiser4_round_up(value, order) \
  16103. + ((typeof(value))(((long) (value) + (order) - 1U) & \
  16104. + ~((order) - 1)))
  16105. +
  16106. +/* values returned by squalloc_right_neighbor and its auxiliary functions */
  16107. +typedef enum {
  16108. + /* unit of internal item is moved */
  16109. + SUBTREE_MOVED = 0,
  16110. + /* nothing else can be squeezed into left neighbor */
  16111. + SQUEEZE_TARGET_FULL = 1,
  16112. + /* all content of node is squeezed into its left neighbor */
  16113. + SQUEEZE_SOURCE_EMPTY = 2,
  16114. + /* one more item is copied (this is only returned by
  16115. + allocate_and_copy_extent to squalloc_twig)) */
  16116. + SQUEEZE_CONTINUE = 3
  16117. +} squeeze_result;
  16118. +
  16119. +/* Do not change items ids. If you do - there will be format change */
  16120. +typedef enum {
  16121. + STATIC_STAT_DATA_ID = 0x0,
  16122. + SIMPLE_DIR_ENTRY_ID = 0x1,
  16123. + COMPOUND_DIR_ID = 0x2,
  16124. + NODE_POINTER_ID = 0x3,
  16125. + EXTENT_POINTER_ID = 0x5,
  16126. + FORMATTING_ID = 0x6,
  16127. + CTAIL_ID = 0x7,
  16128. + BLACK_BOX_ID = 0x8,
  16129. + LAST_ITEM_ID = 0x9
  16130. +} item_id;
  16131. +
  16132. +/* Flags passed to jnode_flush() to allow it to distinguish default settings
  16133. + based on whether commit() was called or VM memory pressure was applied. */
  16134. +typedef enum {
  16135. + /* submit flush queue to disk at jnode_flush completion */
  16136. + JNODE_FLUSH_WRITE_BLOCKS = 1,
  16137. +
  16138. + /* flush is called for commit */
  16139. + JNODE_FLUSH_COMMIT = 2,
  16140. + /* not implemented */
  16141. + JNODE_FLUSH_MEMORY_FORMATTED = 4,
  16142. +
  16143. + /* not implemented */
  16144. + JNODE_FLUSH_MEMORY_UNFORMATTED = 8,
  16145. +} jnode_flush_flags;
  16146. +
  16147. +/* Flags to insert/paste carry operations. Currently they only used in
  16148. + flushing code, but in future, they can be used to optimize for repetitive
  16149. + accesses. */
  16150. +typedef enum {
  16151. + /* carry is not allowed to shift data to the left when trying to find
  16152. + free space */
  16153. + COPI_DONT_SHIFT_LEFT = (1 << 0),
  16154. + /* carry is not allowed to shift data to the right when trying to find
  16155. + free space */
  16156. + COPI_DONT_SHIFT_RIGHT = (1 << 1),
  16157. + /* carry is not allowed to allocate new node(s) when trying to find
  16158. + free space */
  16159. + COPI_DONT_ALLOCATE = (1 << 2),
  16160. + /* try to load left neighbor if its not in a cache */
  16161. + COPI_LOAD_LEFT = (1 << 3),
  16162. + /* try to load right neighbor if its not in a cache */
  16163. + COPI_LOAD_RIGHT = (1 << 4),
  16164. + /* shift insertion point to the left neighbor */
  16165. + COPI_GO_LEFT = (1 << 5),
  16166. + /* shift insertion point to the right neighbor */
  16167. + COPI_GO_RIGHT = (1 << 6),
  16168. + /* try to step back into original node if insertion into new node
  16169. + fails after shifting data there. */
  16170. + COPI_STEP_BACK = (1 << 7),
  16171. + /* use all possible space in the node */
  16172. + COPI_SWEEP = (1 << 8)
  16173. +} cop_insert_flag;
  16174. +
  16175. +typedef enum {
  16176. + SAFE_UNLINK, /* safe-link for unlink */
  16177. + SAFE_TRUNCATE /* safe-link for truncate */
  16178. +} reiser4_safe_link_t;
  16179. +
  16180. +/* this is to show on which list of atom jnode is */
  16181. +typedef enum {
  16182. + NOT_CAPTURED,
  16183. + DIRTY_LIST,
  16184. + CLEAN_LIST,
  16185. + FQ_LIST,
  16186. + WB_LIST,
  16187. + OVRWR_LIST
  16188. +} atom_list;
  16189. +
  16190. +/* __REISER4_FORWARD_H__ */
  16191. +#endif
  16192. +
  16193. +/* Make Linus happy.
  16194. + Local variables:
  16195. + c-indentation-style: "K&R"
  16196. + mode-name: "LC"
  16197. + c-basic-offset: 8
  16198. + tab-width: 8
  16199. + fill-column: 120
  16200. + End:
  16201. +*/
  16202. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/fsdata.c linux-4.14.2/fs/reiser4/fsdata.c
  16203. --- linux-4.14.2.orig/fs/reiser4/fsdata.c 1970-01-01 01:00:00.000000000 +0100
  16204. +++ linux-4.14.2/fs/reiser4/fsdata.c 2017-11-26 22:13:09.000000000 +0100
  16205. @@ -0,0 +1,801 @@
  16206. +/* Copyright 2001, 2002, 2003, 2004, 2005 by Hans Reiser, licensing governed by
  16207. + * reiser4/README */
  16208. +
  16209. +#include "fsdata.h"
  16210. +#include "inode.h"
  16211. +
  16212. +#include <linux/shrinker.h>
  16213. +
  16214. +/* cache or dir_cursors */
  16215. +static struct kmem_cache *d_cursor_cache;
  16216. +
  16217. +/* list of unused cursors */
  16218. +static LIST_HEAD(cursor_cache);
  16219. +
  16220. +/* number of cursors in list of ununsed cursors */
  16221. +static unsigned long d_cursor_unused = 0;
  16222. +
  16223. +/* spinlock protecting manipulations with dir_cursor's hash table and lists */
  16224. +DEFINE_SPINLOCK(d_c_lock);
  16225. +
  16226. +static reiser4_file_fsdata *create_fsdata(struct file *file);
  16227. +static int file_is_stateless(struct file *file);
  16228. +static void free_fsdata(reiser4_file_fsdata *fsdata);
  16229. +static void kill_cursor(dir_cursor *);
  16230. +
  16231. +static unsigned long d_cursor_shrink_scan(struct shrinker *shrink,
  16232. + struct shrink_control *sc)
  16233. +{
  16234. + dir_cursor *scan;
  16235. + unsigned long freed = 0;
  16236. +
  16237. + spin_lock(&d_c_lock);
  16238. + while (!list_empty(&cursor_cache) && sc->nr_to_scan) {
  16239. + scan = list_entry(cursor_cache.next, dir_cursor, alist);
  16240. + assert("nikita-3567", scan->ref == 0);
  16241. + kill_cursor(scan);
  16242. + freed++;
  16243. + sc->nr_to_scan--;
  16244. + }
  16245. + spin_unlock(&d_c_lock);
  16246. + return freed;
  16247. +}
  16248. +
  16249. +static unsigned long d_cursor_shrink_count (struct shrinker *shrink,
  16250. + struct shrink_control *sc)
  16251. +{
  16252. + return d_cursor_unused;
  16253. +}
  16254. +
  16255. +/*
  16256. + * actually, d_cursors are "priceless", because there is no way to
  16257. + * recover information stored in them. On the other hand, we don't
  16258. + * want to consume all kernel memory by them. As a compromise, just
  16259. + * assign higher "seeks" value to d_cursor cache, so that it will be
  16260. + * shrunk only if system is really tight on memory.
  16261. + */
  16262. +static struct shrinker d_cursor_shrinker = {
  16263. + .count_objects = d_cursor_shrink_count,
  16264. + .scan_objects = d_cursor_shrink_scan,
  16265. + .seeks = DEFAULT_SEEKS << 3
  16266. +};
  16267. +
  16268. +/**
  16269. + * reiser4_init_d_cursor - create d_cursor cache
  16270. + *
  16271. + * Initializes slab cache of d_cursors. It is part of reiser4 module
  16272. + * initialization.
  16273. + */
  16274. +int reiser4_init_d_cursor(void)
  16275. +{
  16276. + d_cursor_cache = kmem_cache_create("d_cursor", sizeof(dir_cursor), 0,
  16277. + SLAB_HWCACHE_ALIGN, NULL);
  16278. + if (d_cursor_cache == NULL)
  16279. + return RETERR(-ENOMEM);
  16280. +
  16281. + register_shrinker(&d_cursor_shrinker);
  16282. + return 0;
  16283. +}
  16284. +
  16285. +/**
  16286. + * reiser4_done_d_cursor - delete d_cursor cache and d_cursor shrinker
  16287. + *
  16288. + * This is called on reiser4 module unloading or system shutdown.
  16289. + */
  16290. +void reiser4_done_d_cursor(void)
  16291. +{
  16292. + unregister_shrinker(&d_cursor_shrinker);
  16293. +
  16294. + destroy_reiser4_cache(&d_cursor_cache);
  16295. +}
  16296. +
  16297. +#define D_CURSOR_TABLE_SIZE (256)
  16298. +
  16299. +static inline unsigned long
  16300. +d_cursor_hash(d_cursor_hash_table * table, const struct d_cursor_key *key)
  16301. +{
  16302. + assert("nikita-3555", IS_POW(D_CURSOR_TABLE_SIZE));
  16303. + return (key->oid + key->cid) & (D_CURSOR_TABLE_SIZE - 1);
  16304. +}
  16305. +
  16306. +static inline int d_cursor_eq(const struct d_cursor_key *k1,
  16307. + const struct d_cursor_key *k2)
  16308. +{
  16309. + return k1->cid == k2->cid && k1->oid == k2->oid;
  16310. +}
  16311. +
  16312. +/*
  16313. + * define functions to manipulate reiser4 super block's hash table of
  16314. + * dir_cursors
  16315. + */
  16316. +#define KMALLOC(size) kmalloc((size), reiser4_ctx_gfp_mask_get())
  16317. +#define KFREE(ptr, size) kfree(ptr)
  16318. +TYPE_SAFE_HASH_DEFINE(d_cursor,
  16319. + dir_cursor,
  16320. + struct d_cursor_key,
  16321. + key, hash, d_cursor_hash, d_cursor_eq);
  16322. +#undef KFREE
  16323. +#undef KMALLOC
  16324. +
  16325. +/**
  16326. + * reiser4_init_super_d_info - initialize per-super-block d_cursor resources
  16327. + * @super: super block to initialize
  16328. + *
  16329. + * Initializes per-super-block d_cursor's hash table and radix tree. It is part
  16330. + * of mount.
  16331. + */
  16332. +int reiser4_init_super_d_info(struct super_block *super)
  16333. +{
  16334. + struct d_cursor_info *p;
  16335. +
  16336. + p = &get_super_private(super)->d_info;
  16337. +
  16338. + INIT_RADIX_TREE(&p->tree, reiser4_ctx_gfp_mask_get());
  16339. + return d_cursor_hash_init(&p->table, D_CURSOR_TABLE_SIZE);
  16340. +}
  16341. +
  16342. +/**
  16343. + * reiser4_done_super_d_info - release per-super-block d_cursor resources
  16344. + * @super: super block being umounted
  16345. + *
  16346. + * It is called on umount. Kills all directory cursors attached to suoer block.
  16347. + */
  16348. +void reiser4_done_super_d_info(struct super_block *super)
  16349. +{
  16350. + struct d_cursor_info *d_info;
  16351. + dir_cursor *cursor, *next;
  16352. +
  16353. + d_info = &get_super_private(super)->d_info;
  16354. + for_all_in_htable(&d_info->table, d_cursor, cursor, next)
  16355. + kill_cursor(cursor);
  16356. +
  16357. + BUG_ON(d_info->tree.rnode != NULL);
  16358. + d_cursor_hash_done(&d_info->table);
  16359. +}
  16360. +
  16361. +/**
  16362. + * kill_cursor - free dir_cursor and reiser4_file_fsdata attached to it
  16363. + * @cursor: cursor to free
  16364. + *
  16365. + * Removes reiser4_file_fsdata attached to @cursor from readdir list of
  16366. + * reiser4_inode, frees that reiser4_file_fsdata. Removes @cursor from from
  16367. + * indices, hash table, list of unused cursors and frees it.
  16368. + */
  16369. +static void kill_cursor(dir_cursor *cursor)
  16370. +{
  16371. + unsigned long index;
  16372. +
  16373. + assert("nikita-3566", cursor->ref == 0);
  16374. + assert("nikita-3572", cursor->fsdata != NULL);
  16375. +
  16376. + index = (unsigned long)cursor->key.oid;
  16377. + list_del_init(&cursor->fsdata->dir.linkage);
  16378. + free_fsdata(cursor->fsdata);
  16379. + cursor->fsdata = NULL;
  16380. +
  16381. + if (list_empty_careful(&cursor->list))
  16382. + /* this is last cursor for a file. Kill radix-tree entry */
  16383. + radix_tree_delete(&cursor->info->tree, index);
  16384. + else {
  16385. + void **slot;
  16386. +
  16387. + /*
  16388. + * there are other cursors for the same oid.
  16389. + */
  16390. +
  16391. + /*
  16392. + * if radix tree point to the cursor being removed, re-target
  16393. + * radix tree slot to the next cursor in the (non-empty as was
  16394. + * checked above) element of the circular list of all cursors
  16395. + * for this oid.
  16396. + */
  16397. + slot = radix_tree_lookup_slot(&cursor->info->tree, index);
  16398. + assert("nikita-3571", *slot != NULL);
  16399. + if (*slot == cursor)
  16400. + *slot = list_entry(cursor->list.next, dir_cursor, list);
  16401. + /* remove cursor from circular list */
  16402. + list_del_init(&cursor->list);
  16403. + }
  16404. + /* remove cursor from the list of unused cursors */
  16405. + list_del_init(&cursor->alist);
  16406. + /* remove cursor from the hash table */
  16407. + d_cursor_hash_remove(&cursor->info->table, cursor);
  16408. + /* and free it */
  16409. + kmem_cache_free(d_cursor_cache, cursor);
  16410. + --d_cursor_unused;
  16411. +}
  16412. +
  16413. +/* possible actions that can be performed on all cursors for the given file */
  16414. +enum cursor_action {
  16415. + /*
  16416. + * load all detached state: this is called when stat-data is loaded
  16417. + * from the disk to recover information about all pending readdirs
  16418. + */
  16419. + CURSOR_LOAD,
  16420. + /*
  16421. + * detach all state from inode, leaving it in the cache. This is called
  16422. + * when inode is removed form the memory by memory pressure
  16423. + */
  16424. + CURSOR_DISPOSE,
  16425. + /*
  16426. + * detach cursors from the inode, and free them. This is called when
  16427. + * inode is destroyed
  16428. + */
  16429. + CURSOR_KILL
  16430. +};
  16431. +
  16432. +/*
  16433. + * return d_cursor data for the file system @inode is in.
  16434. + */
  16435. +static inline struct d_cursor_info *d_info(struct inode *inode)
  16436. +{
  16437. + return &get_super_private(inode->i_sb)->d_info;
  16438. +}
  16439. +
  16440. +/*
  16441. + * lookup d_cursor in the per-super-block radix tree.
  16442. + */
  16443. +static inline dir_cursor *lookup(struct d_cursor_info *info,
  16444. + unsigned long index)
  16445. +{
  16446. + return (dir_cursor *) radix_tree_lookup(&info->tree, index);
  16447. +}
  16448. +
  16449. +/*
  16450. + * attach @cursor to the radix tree. There may be multiple cursors for the
  16451. + * same oid, they are chained into circular list.
  16452. + */
  16453. +static void bind_cursor(dir_cursor * cursor, unsigned long index)
  16454. +{
  16455. + dir_cursor *head;
  16456. +
  16457. + head = lookup(cursor->info, index);
  16458. + if (head == NULL) {
  16459. + /* this is the first cursor for this index */
  16460. + INIT_LIST_HEAD(&cursor->list);
  16461. + radix_tree_insert(&cursor->info->tree, index, cursor);
  16462. + } else {
  16463. + /* some cursor already exists. Chain ours */
  16464. + list_add(&cursor->list, &head->list);
  16465. + }
  16466. +}
  16467. +
  16468. +/*
  16469. + * detach fsdata (if detachable) from file descriptor, and put cursor on the
  16470. + * "unused" list. Called when file descriptor is not longer in active use.
  16471. + */
  16472. +static void clean_fsdata(struct file *file)
  16473. +{
  16474. + dir_cursor *cursor;
  16475. + reiser4_file_fsdata *fsdata;
  16476. +
  16477. + assert("nikita-3570", file_is_stateless(file));
  16478. +
  16479. + fsdata = (reiser4_file_fsdata *) file->private_data;
  16480. + if (fsdata != NULL) {
  16481. + cursor = fsdata->cursor;
  16482. + if (cursor != NULL) {
  16483. + spin_lock(&d_c_lock);
  16484. + --cursor->ref;
  16485. + if (cursor->ref == 0) {
  16486. + list_add_tail(&cursor->alist, &cursor_cache);
  16487. + ++d_cursor_unused;
  16488. + }
  16489. + spin_unlock(&d_c_lock);
  16490. + file->private_data = NULL;
  16491. + }
  16492. + }
  16493. +}
  16494. +
  16495. +/*
  16496. + * global counter used to generate "client ids". These ids are encoded into
  16497. + * high bits of fpos.
  16498. + */
  16499. +static __u32 cid_counter = 0;
  16500. +#define CID_SHIFT (20)
  16501. +#define CID_MASK (0xfffffull)
  16502. +
  16503. +static void free_file_fsdata_nolock(struct file *);
  16504. +
  16505. +/**
  16506. + * insert_cursor - allocate file_fsdata, insert cursor to tree and hash table
  16507. + * @cursor:
  16508. + * @file:
  16509. + * @inode:
  16510. + *
  16511. + * Allocates reiser4_file_fsdata, attaches it to @cursor, inserts cursor to
  16512. + * reiser4 super block's hash table and radix tree.
  16513. + add detachable readdir
  16514. + * state to the @f
  16515. + */
  16516. +static int insert_cursor(dir_cursor *cursor, struct file *file, loff_t *fpos,
  16517. + struct inode *inode)
  16518. +{
  16519. + int result;
  16520. + reiser4_file_fsdata *fsdata;
  16521. +
  16522. + memset(cursor, 0, sizeof *cursor);
  16523. +
  16524. + /* this is either first call to readdir, or rewind. Anyway, create new
  16525. + * cursor. */
  16526. + fsdata = create_fsdata(NULL);
  16527. + if (fsdata != NULL) {
  16528. + result = radix_tree_preload(reiser4_ctx_gfp_mask_get());
  16529. + if (result == 0) {
  16530. + struct d_cursor_info *info;
  16531. + oid_t oid;
  16532. +
  16533. + info = d_info(inode);
  16534. + oid = get_inode_oid(inode);
  16535. + /* cid occupies higher 12 bits of f->f_pos. Don't
  16536. + * allow it to become negative: this confuses
  16537. + * nfsd_readdir() */
  16538. + cursor->key.cid = (++cid_counter) & 0x7ff;
  16539. + cursor->key.oid = oid;
  16540. + cursor->fsdata = fsdata;
  16541. + cursor->info = info;
  16542. + cursor->ref = 1;
  16543. +
  16544. + spin_lock_inode(inode);
  16545. + /* install cursor as @f's private_data, discarding old
  16546. + * one if necessary */
  16547. +#if REISER4_DEBUG
  16548. + if (file->private_data)
  16549. + warning("", "file has fsdata already");
  16550. +#endif
  16551. + clean_fsdata(file);
  16552. + free_file_fsdata_nolock(file);
  16553. + file->private_data = fsdata;
  16554. + fsdata->cursor = cursor;
  16555. + spin_unlock_inode(inode);
  16556. + spin_lock(&d_c_lock);
  16557. + /* insert cursor into hash table */
  16558. + d_cursor_hash_insert(&info->table, cursor);
  16559. + /* and chain it into radix-tree */
  16560. + bind_cursor(cursor, (unsigned long)oid);
  16561. + spin_unlock(&d_c_lock);
  16562. + radix_tree_preload_end();
  16563. + *fpos = ((__u64) cursor->key.cid) << CID_SHIFT;
  16564. + }
  16565. + } else
  16566. + result = RETERR(-ENOMEM);
  16567. + return result;
  16568. +}
  16569. +
  16570. +/**
  16571. + * process_cursors - do action on each cursor attached to inode
  16572. + * @inode:
  16573. + * @act: action to do
  16574. + *
  16575. + * Finds all cursors of @inode in reiser4's super block radix tree of cursors
  16576. + * and performs action specified by @act on each of cursors.
  16577. + */
  16578. +static void process_cursors(struct inode *inode, enum cursor_action act)
  16579. +{
  16580. + oid_t oid;
  16581. + dir_cursor *start;
  16582. + struct list_head *head;
  16583. + reiser4_context *ctx;
  16584. + struct d_cursor_info *info;
  16585. +
  16586. + /* this can be called by
  16587. + *
  16588. + * kswapd->...->prune_icache->..reiser4_destroy_inode
  16589. + *
  16590. + * without reiser4_context
  16591. + */
  16592. + ctx = reiser4_init_context(inode->i_sb);
  16593. + if (IS_ERR(ctx)) {
  16594. + warning("vs-23", "failed to init context");
  16595. + return;
  16596. + }
  16597. +
  16598. + assert("nikita-3558", inode != NULL);
  16599. +
  16600. + info = d_info(inode);
  16601. + oid = get_inode_oid(inode);
  16602. + spin_lock_inode(inode);
  16603. + head = get_readdir_list(inode);
  16604. + spin_lock(&d_c_lock);
  16605. + /* find any cursor for this oid: reference to it is hanging of radix
  16606. + * tree */
  16607. + start = lookup(info, (unsigned long)oid);
  16608. + if (start != NULL) {
  16609. + dir_cursor *scan;
  16610. + reiser4_file_fsdata *fsdata;
  16611. +
  16612. + /* process circular list of cursors for this oid */
  16613. + scan = start;
  16614. + do {
  16615. + dir_cursor *next;
  16616. +
  16617. + next = list_entry(scan->list.next, dir_cursor, list);
  16618. + fsdata = scan->fsdata;
  16619. + assert("nikita-3557", fsdata != NULL);
  16620. + if (scan->key.oid == oid) {
  16621. + switch (act) {
  16622. + case CURSOR_DISPOSE:
  16623. + list_del_init(&fsdata->dir.linkage);
  16624. + break;
  16625. + case CURSOR_LOAD:
  16626. + list_add(&fsdata->dir.linkage, head);
  16627. + break;
  16628. + case CURSOR_KILL:
  16629. + kill_cursor(scan);
  16630. + break;
  16631. + }
  16632. + }
  16633. + if (scan == next)
  16634. + /* last cursor was just killed */
  16635. + break;
  16636. + scan = next;
  16637. + } while (scan != start);
  16638. + }
  16639. + spin_unlock(&d_c_lock);
  16640. + /* check that we killed 'em all */
  16641. + assert("nikita-3568",
  16642. + ergo(act == CURSOR_KILL,
  16643. + list_empty_careful(get_readdir_list(inode))));
  16644. + assert("nikita-3569",
  16645. + ergo(act == CURSOR_KILL, lookup(info, oid) == NULL));
  16646. + spin_unlock_inode(inode);
  16647. + reiser4_exit_context(ctx);
  16648. +}
  16649. +
  16650. +/**
  16651. + * reiser4_dispose_cursors - removes cursors from inode's list
  16652. + * @inode: inode to dispose cursors of
  16653. + *
  16654. + * For each of cursors corresponding to @inode - removes reiser4_file_fsdata
  16655. + * attached to cursor from inode's readdir list. This is called when inode is
  16656. + * removed from the memory by memory pressure.
  16657. + */
  16658. +void reiser4_dispose_cursors(struct inode *inode)
  16659. +{
  16660. + process_cursors(inode, CURSOR_DISPOSE);
  16661. +}
  16662. +
  16663. +/**
  16664. + * reiser4_load_cursors - attach cursors to inode
  16665. + * @inode: inode to load cursors to
  16666. + *
  16667. + * For each of cursors corresponding to @inode - attaches reiser4_file_fsdata
  16668. + * attached to cursor to inode's readdir list. This is done when inode is
  16669. + * loaded into memory.
  16670. + */
  16671. +void reiser4_load_cursors(struct inode *inode)
  16672. +{
  16673. + process_cursors(inode, CURSOR_LOAD);
  16674. +}
  16675. +
  16676. +/**
  16677. + * reiser4_kill_cursors - kill all inode cursors
  16678. + * @inode: inode to kill cursors of
  16679. + *
  16680. + * Frees all cursors for this inode. This is called when inode is destroyed.
  16681. + */
  16682. +void reiser4_kill_cursors(struct inode *inode)
  16683. +{
  16684. + process_cursors(inode, CURSOR_KILL);
  16685. +}
  16686. +
  16687. +/**
  16688. + * file_is_stateless -
  16689. + * @file:
  16690. + *
  16691. + * true, if file descriptor @f is created by NFS server by "demand" to serve
  16692. + * one file system operation. This means that there may be "detached state"
  16693. + * for underlying inode.
  16694. + */
  16695. +static int file_is_stateless(struct file *file)
  16696. +{
  16697. + return reiser4_get_dentry_fsdata(file->f_path.dentry)->stateless;
  16698. +}
  16699. +
  16700. +/**
  16701. + * reiser4_get_dir_fpos -
  16702. + * @dir:
  16703. + * @fpos: effective value of dir->f_pos
  16704. + *
  16705. + * Calculates ->fpos from user-supplied cookie. Normally it is dir->f_pos, but
  16706. + * in the case of stateless directory operation (readdir-over-nfs), client id
  16707. + * was encoded in the high bits of cookie and should me masked off.
  16708. + */
  16709. +loff_t reiser4_get_dir_fpos(struct file *dir, loff_t fpos)
  16710. +{
  16711. + if (file_is_stateless(dir))
  16712. + return fpos & CID_MASK;
  16713. + else
  16714. + return fpos;
  16715. +}
  16716. +
  16717. +/**
  16718. + * reiser4_attach_fsdata - try to attach fsdata
  16719. + * @file:
  16720. + * @fpos: effective value of @file->f_pos
  16721. + * @inode:
  16722. + *
  16723. + * Finds or creates cursor for readdir-over-nfs.
  16724. + */
  16725. +int reiser4_attach_fsdata(struct file *file, loff_t *fpos, struct inode *inode)
  16726. +{
  16727. + loff_t pos;
  16728. + int result;
  16729. + dir_cursor *cursor;
  16730. +
  16731. + /*
  16732. + * we are serialized by inode->i_mutex
  16733. + */
  16734. + if (!file_is_stateless(file))
  16735. + return 0;
  16736. +
  16737. + pos = *fpos;
  16738. + result = 0;
  16739. + if (pos == 0) {
  16740. + /*
  16741. + * first call to readdir (or rewind to the beginning of
  16742. + * directory)
  16743. + */
  16744. + cursor = kmem_cache_alloc(d_cursor_cache,
  16745. + reiser4_ctx_gfp_mask_get());
  16746. + if (cursor != NULL)
  16747. + result = insert_cursor(cursor, file, fpos, inode);
  16748. + else
  16749. + result = RETERR(-ENOMEM);
  16750. + } else {
  16751. + /* try to find existing cursor */
  16752. + struct d_cursor_key key;
  16753. +
  16754. + key.cid = pos >> CID_SHIFT;
  16755. + key.oid = get_inode_oid(inode);
  16756. + spin_lock(&d_c_lock);
  16757. + cursor = d_cursor_hash_find(&d_info(inode)->table, &key);
  16758. + if (cursor != NULL) {
  16759. + /* cursor was found */
  16760. + if (cursor->ref == 0) {
  16761. + /* move it from unused list */
  16762. + list_del_init(&cursor->alist);
  16763. + --d_cursor_unused;
  16764. + }
  16765. + ++cursor->ref;
  16766. + }
  16767. + spin_unlock(&d_c_lock);
  16768. + if (cursor != NULL) {
  16769. + spin_lock_inode(inode);
  16770. + assert("nikita-3556", cursor->fsdata->back == NULL);
  16771. + clean_fsdata(file);
  16772. + free_file_fsdata_nolock(file);
  16773. + file->private_data = cursor->fsdata;
  16774. + spin_unlock_inode(inode);
  16775. + }
  16776. + }
  16777. + return result;
  16778. +}
  16779. +
  16780. +/**
  16781. + * reiser4_detach_fsdata - ???
  16782. + * @file:
  16783. + *
  16784. + * detach fsdata, if necessary
  16785. + */
  16786. +void reiser4_detach_fsdata(struct file *file)
  16787. +{
  16788. + struct inode *inode;
  16789. +
  16790. + if (!file_is_stateless(file))
  16791. + return;
  16792. +
  16793. + inode = file_inode(file);
  16794. + spin_lock_inode(inode);
  16795. + clean_fsdata(file);
  16796. + spin_unlock_inode(inode);
  16797. +}
  16798. +
  16799. +/* slab for reiser4_dentry_fsdata */
  16800. +static struct kmem_cache *dentry_fsdata_cache;
  16801. +
  16802. +/**
  16803. + * reiser4_init_dentry_fsdata - create cache of dentry_fsdata
  16804. + *
  16805. + * Initializes slab cache of structures attached to denty->d_fsdata. It is
  16806. + * part of reiser4 module initialization.
  16807. + */
  16808. +int reiser4_init_dentry_fsdata(void)
  16809. +{
  16810. + dentry_fsdata_cache = kmem_cache_create("dentry_fsdata",
  16811. + sizeof(struct reiser4_dentry_fsdata),
  16812. + 0,
  16813. + SLAB_HWCACHE_ALIGN |
  16814. + SLAB_RECLAIM_ACCOUNT,
  16815. + NULL);
  16816. + if (dentry_fsdata_cache == NULL)
  16817. + return RETERR(-ENOMEM);
  16818. + return 0;
  16819. +}
  16820. +
  16821. +/**
  16822. + * reiser4_done_dentry_fsdata - delete cache of dentry_fsdata
  16823. + *
  16824. + * This is called on reiser4 module unloading or system shutdown.
  16825. + */
  16826. +void reiser4_done_dentry_fsdata(void)
  16827. +{
  16828. + destroy_reiser4_cache(&dentry_fsdata_cache);
  16829. +}
  16830. +
  16831. +/**
  16832. + * reiser4_get_dentry_fsdata - get fs-specific dentry data
  16833. + * @dentry: queried dentry
  16834. + *
  16835. + * Allocates if necessary and returns per-dentry data that we attach to each
  16836. + * dentry.
  16837. + */
  16838. +struct reiser4_dentry_fsdata *reiser4_get_dentry_fsdata(struct dentry *dentry)
  16839. +{
  16840. + assert("nikita-1365", dentry != NULL);
  16841. +
  16842. + if (dentry->d_fsdata == NULL) {
  16843. + dentry->d_fsdata = kmem_cache_alloc(dentry_fsdata_cache,
  16844. + reiser4_ctx_gfp_mask_get());
  16845. + if (dentry->d_fsdata == NULL)
  16846. + return ERR_PTR(RETERR(-ENOMEM));
  16847. + memset(dentry->d_fsdata, 0,
  16848. + sizeof(struct reiser4_dentry_fsdata));
  16849. + }
  16850. + return dentry->d_fsdata;
  16851. +}
  16852. +
  16853. +/**
  16854. + * reiser4_free_dentry_fsdata - detach and free dentry_fsdata
  16855. + * @dentry: dentry to free fsdata of
  16856. + *
  16857. + * Detaches and frees fs-specific dentry data
  16858. + */
  16859. +void reiser4_free_dentry_fsdata(struct dentry *dentry)
  16860. +{
  16861. + if (dentry->d_fsdata != NULL) {
  16862. + kmem_cache_free(dentry_fsdata_cache, dentry->d_fsdata);
  16863. + dentry->d_fsdata = NULL;
  16864. + }
  16865. +}
  16866. +
  16867. +/* slab for reiser4_file_fsdata */
  16868. +static struct kmem_cache *file_fsdata_cache;
  16869. +
  16870. +/**
  16871. + * reiser4_init_file_fsdata - create cache of reiser4_file_fsdata
  16872. + *
  16873. + * Initializes slab cache of structures attached to file->private_data. It is
  16874. + * part of reiser4 module initialization.
  16875. + */
  16876. +int reiser4_init_file_fsdata(void)
  16877. +{
  16878. + file_fsdata_cache = kmem_cache_create("file_fsdata",
  16879. + sizeof(reiser4_file_fsdata),
  16880. + 0,
  16881. + SLAB_HWCACHE_ALIGN |
  16882. + SLAB_RECLAIM_ACCOUNT, NULL);
  16883. + if (file_fsdata_cache == NULL)
  16884. + return RETERR(-ENOMEM);
  16885. + return 0;
  16886. +}
  16887. +
  16888. +/**
  16889. + * reiser4_done_file_fsdata - delete cache of reiser4_file_fsdata
  16890. + *
  16891. + * This is called on reiser4 module unloading or system shutdown.
  16892. + */
  16893. +void reiser4_done_file_fsdata(void)
  16894. +{
  16895. + destroy_reiser4_cache(&file_fsdata_cache);
  16896. +}
  16897. +
  16898. +/**
  16899. + * create_fsdata - allocate and initialize reiser4_file_fsdata
  16900. + * @file: what to create file_fsdata for, may be NULL
  16901. + *
  16902. + * Allocates and initializes reiser4_file_fsdata structure.
  16903. + */
  16904. +static reiser4_file_fsdata *create_fsdata(struct file *file)
  16905. +{
  16906. + reiser4_file_fsdata *fsdata;
  16907. +
  16908. + fsdata = kmem_cache_alloc(file_fsdata_cache,
  16909. + reiser4_ctx_gfp_mask_get());
  16910. + if (fsdata != NULL) {
  16911. + memset(fsdata, 0, sizeof *fsdata);
  16912. + fsdata->back = file;
  16913. + INIT_LIST_HEAD(&fsdata->dir.linkage);
  16914. + }
  16915. + return fsdata;
  16916. +}
  16917. +
  16918. +/**
  16919. + * free_fsdata - free reiser4_file_fsdata
  16920. + * @fsdata: object to free
  16921. + *
  16922. + * Dual to create_fsdata(). Free reiser4_file_fsdata.
  16923. + */
  16924. +static void free_fsdata(reiser4_file_fsdata *fsdata)
  16925. +{
  16926. + BUG_ON(fsdata == NULL);
  16927. + kmem_cache_free(file_fsdata_cache, fsdata);
  16928. +}
  16929. +
  16930. +/**
  16931. + * reiser4_get_file_fsdata - get fs-specific file data
  16932. + * @file: queried file
  16933. + *
  16934. + * Returns fs-specific data of @file. If it is NULL, allocates it and attaches
  16935. + * to @file.
  16936. + */
  16937. +reiser4_file_fsdata *reiser4_get_file_fsdata(struct file *file)
  16938. +{
  16939. + assert("nikita-1603", file != NULL);
  16940. +
  16941. + if (file->private_data == NULL) {
  16942. + reiser4_file_fsdata *fsdata;
  16943. + struct inode *inode;
  16944. +
  16945. + fsdata = create_fsdata(file);
  16946. + if (fsdata == NULL)
  16947. + return ERR_PTR(RETERR(-ENOMEM));
  16948. +
  16949. + inode = file_inode(file);
  16950. + spin_lock_inode(inode);
  16951. + if (file->private_data == NULL) {
  16952. + file->private_data = fsdata;
  16953. + fsdata = NULL;
  16954. + }
  16955. + spin_unlock_inode(inode);
  16956. + if (fsdata != NULL)
  16957. + /* other thread initialized ->fsdata */
  16958. + kmem_cache_free(file_fsdata_cache, fsdata);
  16959. + }
  16960. + assert("nikita-2665", file->private_data != NULL);
  16961. + return file->private_data;
  16962. +}
  16963. +
  16964. +/**
  16965. + * free_file_fsdata_nolock - detach and free reiser4_file_fsdata
  16966. + * @file:
  16967. + *
  16968. + * Detaches reiser4_file_fsdata from @file, removes reiser4_file_fsdata from
  16969. + * readdir list, frees if it is not linked to d_cursor object.
  16970. + */
  16971. +static void free_file_fsdata_nolock(struct file *file)
  16972. +{
  16973. + reiser4_file_fsdata *fsdata;
  16974. +
  16975. + assert("", spin_inode_is_locked(file_inode(file)));
  16976. + fsdata = file->private_data;
  16977. + if (fsdata != NULL) {
  16978. + list_del_init(&fsdata->dir.linkage);
  16979. + if (fsdata->cursor == NULL)
  16980. + free_fsdata(fsdata);
  16981. + }
  16982. + file->private_data = NULL;
  16983. +}
  16984. +
  16985. +/**
  16986. + * reiser4_free_file_fsdata - detach from struct file and free reiser4_file_fsdata
  16987. + * @file:
  16988. + *
  16989. + * Spinlocks inode and calls free_file_fsdata_nolock to do the work.
  16990. + */
  16991. +void reiser4_free_file_fsdata(struct file *file)
  16992. +{
  16993. + spin_lock_inode(file_inode(file));
  16994. + free_file_fsdata_nolock(file);
  16995. + spin_unlock_inode(file_inode(file));
  16996. +}
  16997. +
  16998. +/*
  16999. + * Local variables:
  17000. + * c-indentation-style: "K&R"
  17001. + * mode-name: "LC"
  17002. + * c-basic-offset: 8
  17003. + * tab-width: 8
  17004. + * fill-column: 79
  17005. + * End:
  17006. + */
  17007. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/fsdata.h linux-4.14.2/fs/reiser4/fsdata.h
  17008. --- linux-4.14.2.orig/fs/reiser4/fsdata.h 1970-01-01 01:00:00.000000000 +0100
  17009. +++ linux-4.14.2/fs/reiser4/fsdata.h 2017-11-26 22:13:09.000000000 +0100
  17010. @@ -0,0 +1,203 @@
  17011. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  17012. + * reiser4/README */
  17013. +
  17014. +#if !defined(__REISER4_FSDATA_H__)
  17015. +#define __REISER4_FSDATA_H__
  17016. +
  17017. +#include "debug.h"
  17018. +#include "kassign.h"
  17019. +#include "seal.h"
  17020. +#include "type_safe_hash.h"
  17021. +#include "plugin/file/file.h"
  17022. +#include "readahead.h"
  17023. +
  17024. +/*
  17025. + * comment about reiser4_dentry_fsdata
  17026. + *
  17027. + *
  17028. + */
  17029. +
  17030. +/*
  17031. + * locking: fields of per file descriptor readdir_pos and ->f_pos are
  17032. + * protected by ->i_mutex on inode. Under this lock following invariant
  17033. + * holds:
  17034. + *
  17035. + * file descriptor is "looking" at the entry_no-th directory entry from
  17036. + * the beginning of directory. This entry has key dir_entry_key and is
  17037. + * pos-th entry with duplicate-key sequence.
  17038. + *
  17039. + */
  17040. +
  17041. +/* logical position within directory */
  17042. +struct dir_pos {
  17043. + /* key of directory entry (actually, part of a key sufficient to
  17044. + identify directory entry) */
  17045. + de_id dir_entry_key;
  17046. + /* ordinal number of directory entry among all entries with the same
  17047. + key. (Starting from 0.) */
  17048. + unsigned pos;
  17049. +};
  17050. +
  17051. +struct readdir_pos {
  17052. + /* f_pos corresponding to this readdir position */
  17053. + __u64 fpos;
  17054. + /* logical position within directory */
  17055. + struct dir_pos position;
  17056. + /* logical number of directory entry within
  17057. + directory */
  17058. + __u64 entry_no;
  17059. +};
  17060. +
  17061. +/*
  17062. + * this is used to speed up lookups for directory entry: on initial call to
  17063. + * ->lookup() seal and coord of directory entry (if found, that is) are stored
  17064. + * in struct dentry and reused later to avoid tree traversals.
  17065. + */
  17066. +struct de_location {
  17067. + /* seal covering directory entry */
  17068. + seal_t entry_seal;
  17069. + /* coord of directory entry */
  17070. + coord_t entry_coord;
  17071. + /* ordinal number of directory entry among all entries with the same
  17072. + key. (Starting from 0.) */
  17073. + int pos;
  17074. +};
  17075. +
  17076. +/**
  17077. + * reiser4_dentry_fsdata - reiser4-specific data attached to dentries
  17078. + *
  17079. + * This is allocated dynamically and released in d_op->d_release()
  17080. + *
  17081. + * Currently it only contains cached location (hint) of directory entry, but
  17082. + * it is expected that other information will be accumulated here.
  17083. + */
  17084. +struct reiser4_dentry_fsdata {
  17085. + /*
  17086. + * here will go fields filled by ->lookup() to speedup next
  17087. + * create/unlink, like blocknr of znode with stat-data, or key of
  17088. + * stat-data.
  17089. + */
  17090. + struct de_location dec;
  17091. + int stateless; /* created through reiser4_decode_fh, needs
  17092. + * special treatment in readdir. */
  17093. +};
  17094. +
  17095. +extern int reiser4_init_dentry_fsdata(void);
  17096. +extern void reiser4_done_dentry_fsdata(void);
  17097. +extern struct reiser4_dentry_fsdata *reiser4_get_dentry_fsdata(struct dentry *);
  17098. +extern void reiser4_free_dentry_fsdata(struct dentry *dentry);
  17099. +
  17100. +/**
  17101. + * reiser4_file_fsdata - reiser4-specific data attached to file->private_data
  17102. + *
  17103. + * This is allocated dynamically and released in inode->i_fop->release
  17104. + */
  17105. +typedef struct reiser4_file_fsdata {
  17106. + /*
  17107. + * pointer back to the struct file which this reiser4_file_fsdata is
  17108. + * part of
  17109. + */
  17110. + struct file *back;
  17111. + /* detached cursor for stateless readdir. */
  17112. + struct dir_cursor *cursor;
  17113. + /*
  17114. + * We need both directory and regular file parts here, because there
  17115. + * are file system objects that are files and directories.
  17116. + */
  17117. + struct {
  17118. + /*
  17119. + * position in directory. It is updated each time directory is
  17120. + * modified
  17121. + */
  17122. + struct readdir_pos readdir;
  17123. + /* head of this list is reiser4_inode->lists.readdir_list */
  17124. + struct list_head linkage;
  17125. + } dir;
  17126. + /* hints to speed up operations with regular files: read and write. */
  17127. + struct {
  17128. + hint_t hint;
  17129. + } reg;
  17130. +} reiser4_file_fsdata;
  17131. +
  17132. +extern int reiser4_init_file_fsdata(void);
  17133. +extern void reiser4_done_file_fsdata(void);
  17134. +extern reiser4_file_fsdata *reiser4_get_file_fsdata(struct file *);
  17135. +extern void reiser4_free_file_fsdata(struct file *);
  17136. +
  17137. +/*
  17138. + * d_cursor is reiser4_file_fsdata not attached to struct file. d_cursors are
  17139. + * used to address problem reiser4 has with readdir accesses via NFS. See
  17140. + * plugin/file_ops_readdir.c for more details.
  17141. + */
  17142. +struct d_cursor_key{
  17143. + __u16 cid;
  17144. + __u64 oid;
  17145. +};
  17146. +
  17147. +/*
  17148. + * define structures d_cursor_hash_table d_cursor_hash_link which are used to
  17149. + * maintain hash table of dir_cursor-s in reiser4's super block
  17150. + */
  17151. +typedef struct dir_cursor dir_cursor;
  17152. +TYPE_SAFE_HASH_DECLARE(d_cursor, dir_cursor);
  17153. +
  17154. +struct dir_cursor {
  17155. + int ref;
  17156. + reiser4_file_fsdata *fsdata;
  17157. +
  17158. + /* link to reiser4 super block hash table of cursors */
  17159. + d_cursor_hash_link hash;
  17160. +
  17161. + /*
  17162. + * this is to link cursors to reiser4 super block's radix tree of
  17163. + * cursors if there are more than one cursor of the same objectid
  17164. + */
  17165. + struct list_head list;
  17166. + struct d_cursor_key key;
  17167. + struct d_cursor_info *info;
  17168. + /* list of unused cursors */
  17169. + struct list_head alist;
  17170. +};
  17171. +
  17172. +extern int reiser4_init_d_cursor(void);
  17173. +extern void reiser4_done_d_cursor(void);
  17174. +
  17175. +extern int reiser4_init_super_d_info(struct super_block *);
  17176. +extern void reiser4_done_super_d_info(struct super_block *);
  17177. +
  17178. +extern loff_t reiser4_get_dir_fpos(struct file *, loff_t);
  17179. +extern int reiser4_attach_fsdata(struct file *, loff_t *, struct inode *);
  17180. +extern void reiser4_detach_fsdata(struct file *);
  17181. +
  17182. +/* these are needed for "stateless" readdir. See plugin/file_ops_readdir.c for
  17183. + more details */
  17184. +void reiser4_dispose_cursors(struct inode *inode);
  17185. +void reiser4_load_cursors(struct inode *inode);
  17186. +void reiser4_kill_cursors(struct inode *inode);
  17187. +void reiser4_adjust_dir_file(struct inode *dir, const struct dentry *de,
  17188. + int offset, int adj);
  17189. +
  17190. +/*
  17191. + * this structure is embedded to reise4_super_info_data. It maintains d_cursors
  17192. + * (detached readdir state). See plugin/file_ops_readdir.c for more details.
  17193. + */
  17194. +struct d_cursor_info {
  17195. + d_cursor_hash_table table;
  17196. + struct radix_tree_root tree;
  17197. +};
  17198. +
  17199. +/* spinlock protecting readdir cursors */
  17200. +extern spinlock_t d_c_lock;
  17201. +
  17202. +/* __REISER4_FSDATA_H__ */
  17203. +#endif
  17204. +
  17205. +/*
  17206. + * Local variables:
  17207. + * c-indentation-style: "K&R"
  17208. + * mode-name: "LC"
  17209. + * c-basic-offset: 8
  17210. + * tab-width: 8
  17211. + * fill-column: 120
  17212. + * End:
  17213. + */
  17214. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/init_super.c linux-4.14.2/fs/reiser4/init_super.c
  17215. --- linux-4.14.2.orig/fs/reiser4/init_super.c 1970-01-01 01:00:00.000000000 +0100
  17216. +++ linux-4.14.2/fs/reiser4/init_super.c 2017-11-26 22:13:09.000000000 +0100
  17217. @@ -0,0 +1,806 @@
  17218. +/* Copyright by Hans Reiser, 2003 */
  17219. +
  17220. +#include "super.h"
  17221. +#include "inode.h"
  17222. +#include "plugin/plugin_set.h"
  17223. +
  17224. +#include <linux/swap.h>
  17225. +
  17226. +/**
  17227. + * init_fs_info - allocate reiser4 specific super block
  17228. + * @super: super block of filesystem
  17229. + *
  17230. + * Allocates and initialize reiser4_super_info_data, attaches it to
  17231. + * super->s_fs_info, initializes structures maintaining d_cursor-s.
  17232. + */
  17233. +int reiser4_init_fs_info(struct super_block *super)
  17234. +{
  17235. + reiser4_super_info_data *sbinfo;
  17236. +
  17237. + sbinfo = kzalloc(sizeof(reiser4_super_info_data),
  17238. + reiser4_ctx_gfp_mask_get());
  17239. + if (!sbinfo)
  17240. + return RETERR(-ENOMEM);
  17241. +
  17242. + super->s_fs_info = sbinfo;
  17243. + super->s_op = NULL;
  17244. +
  17245. + ON_DEBUG(INIT_LIST_HEAD(&sbinfo->all_jnodes));
  17246. + ON_DEBUG(spin_lock_init(&sbinfo->all_guard));
  17247. +
  17248. + mutex_init(&sbinfo->delete_mutex);
  17249. + spin_lock_init(&(sbinfo->guard));
  17250. +
  17251. + /* initialize per-super-block d_cursor resources */
  17252. + reiser4_init_super_d_info(super);
  17253. +
  17254. + return 0;
  17255. +}
  17256. +
  17257. +/**
  17258. + * Release reiser4 specific super block
  17259. + *
  17260. + * release per-super-block d_cursor resources
  17261. + * free reiser4_super_info_data.
  17262. + */
  17263. +void reiser4_done_fs_info(struct super_block *super)
  17264. +{
  17265. + assert("zam-990", super->s_fs_info != NULL);
  17266. +
  17267. + reiser4_done_super_d_info(super);
  17268. + kfree(super->s_fs_info);
  17269. + super->s_fs_info = NULL;
  17270. +}
  17271. +
  17272. +/* type of option parseable by parse_option() */
  17273. +typedef enum {
  17274. + /* value of option is arbitrary string */
  17275. + OPT_STRING,
  17276. +
  17277. + /*
  17278. + * option specifies bit in a bitmask. When option is set - bit in
  17279. + * sbinfo->fs_flags is set. Examples are bsdgroups, 32bittimes, mtflush,
  17280. + * dont_load_bitmap, atomic_write.
  17281. + */
  17282. + OPT_BIT,
  17283. +
  17284. + /*
  17285. + * value of option should conform to sprintf() format. Examples are
  17286. + * tmgr.atom_max_size=N, tmgr.atom_max_age=N
  17287. + */
  17288. + OPT_FORMAT,
  17289. +
  17290. + /*
  17291. + * option can take one of predefined values. Example is onerror=panic or
  17292. + * onerror=remount-ro
  17293. + */
  17294. + OPT_ONEOF,
  17295. +
  17296. + /*
  17297. + * option take one of txmod plugin labels.
  17298. + * Example is "txmod=journal" or "txmod=wa"
  17299. + */
  17300. + OPT_TXMOD,
  17301. +} opt_type_t;
  17302. +
  17303. +#if 0
  17304. +struct opt_bitmask_bit {
  17305. + const char *bit_name;
  17306. + int bit_nr;
  17307. +};
  17308. +#endif
  17309. +
  17310. +#define MAX_ONEOF_LIST 10
  17311. +
  17312. +/* description of option parseable by parse_option() */
  17313. +struct opt_desc {
  17314. + /* option name.
  17315. +
  17316. + parsed portion of string has a form "name=value".
  17317. + */
  17318. + const char *name;
  17319. + /* type of option */
  17320. + opt_type_t type;
  17321. + union {
  17322. + /* where to store value of string option (type == OPT_STRING) */
  17323. + char **string;
  17324. + /* description of bits for bit option (type == OPT_BIT) */
  17325. + struct {
  17326. + int nr;
  17327. + void *addr;
  17328. + } bit;
  17329. + /* description of format and targets for format option (type
  17330. + == OPT_FORMAT) */
  17331. + struct {
  17332. + const char *format;
  17333. + int nr_args;
  17334. + void *arg1;
  17335. + void *arg2;
  17336. + void *arg3;
  17337. + void *arg4;
  17338. + } f;
  17339. + struct {
  17340. + int *result;
  17341. + const char *list[MAX_ONEOF_LIST];
  17342. + } oneof;
  17343. + struct {
  17344. + reiser4_txmod_id *result;
  17345. + } txmod;
  17346. + struct {
  17347. + void *addr;
  17348. + int nr_bits;
  17349. + /* struct opt_bitmask_bit *bits; */
  17350. + } bitmask;
  17351. + } u;
  17352. +};
  17353. +
  17354. +/**
  17355. + * parse_option - parse one option
  17356. + * @opt_strin: starting point of parsing
  17357. + * @opt: option description
  17358. + *
  17359. + * foo=bar,
  17360. + * ^ ^ ^
  17361. + * | | +-- replaced to '\0'
  17362. + * | +-- val_start
  17363. + * +-- opt_string
  17364. + * Figures out option type and handles option correspondingly.
  17365. + */
  17366. +static int parse_option(char *opt_string, struct opt_desc *opt)
  17367. +{
  17368. + char *val_start;
  17369. + int result;
  17370. + const char *err_msg;
  17371. +
  17372. + /* NOTE-NIKITA think about using lib/cmdline.c functions here. */
  17373. +
  17374. + val_start = strchr(opt_string, '=');
  17375. + if (val_start != NULL) {
  17376. + *val_start = '\0';
  17377. + ++val_start;
  17378. + }
  17379. +
  17380. + err_msg = NULL;
  17381. + result = 0;
  17382. + switch (opt->type) {
  17383. + case OPT_STRING:
  17384. + if (val_start == NULL) {
  17385. + err_msg = "String arg missing";
  17386. + result = RETERR(-EINVAL);
  17387. + } else
  17388. + *opt->u.string = val_start;
  17389. + break;
  17390. + case OPT_BIT:
  17391. + if (val_start != NULL)
  17392. + err_msg = "Value ignored";
  17393. + else
  17394. + set_bit(opt->u.bit.nr, opt->u.bit.addr);
  17395. + break;
  17396. + case OPT_FORMAT:
  17397. + if (val_start == NULL) {
  17398. + err_msg = "Formatted arg missing";
  17399. + result = RETERR(-EINVAL);
  17400. + break;
  17401. + }
  17402. + if (sscanf(val_start, opt->u.f.format,
  17403. + opt->u.f.arg1, opt->u.f.arg2, opt->u.f.arg3,
  17404. + opt->u.f.arg4) != opt->u.f.nr_args) {
  17405. + err_msg = "Wrong conversion";
  17406. + result = RETERR(-EINVAL);
  17407. + }
  17408. + break;
  17409. + case OPT_ONEOF:
  17410. + {
  17411. + int i = 0;
  17412. +
  17413. + if (val_start == NULL) {
  17414. + err_msg = "Value is missing";
  17415. + result = RETERR(-EINVAL);
  17416. + break;
  17417. + }
  17418. + err_msg = "Wrong option value";
  17419. + result = RETERR(-EINVAL);
  17420. + while (opt->u.oneof.list[i]) {
  17421. + if (!strcmp(opt->u.oneof.list[i], val_start)) {
  17422. + result = 0;
  17423. + err_msg = NULL;
  17424. + *opt->u.oneof.result = i;
  17425. + break;
  17426. + }
  17427. + i++;
  17428. + }
  17429. + break;
  17430. + }
  17431. + break;
  17432. + case OPT_TXMOD:
  17433. + {
  17434. + reiser4_txmod_id i = 0;
  17435. +
  17436. + if (val_start == NULL) {
  17437. + err_msg = "Value is missing";
  17438. + result = RETERR(-EINVAL);
  17439. + break;
  17440. + }
  17441. + err_msg = "Wrong option value";
  17442. + result = RETERR(-EINVAL);
  17443. + while (i < LAST_TXMOD_ID) {
  17444. + if (!strcmp(txmod_plugins[i].h.label,
  17445. + val_start)) {
  17446. + result = 0;
  17447. + err_msg = NULL;
  17448. + *opt->u.txmod.result = i;
  17449. + break;
  17450. + }
  17451. + i++;
  17452. + }
  17453. + break;
  17454. + }
  17455. + default:
  17456. + wrong_return_value("nikita-2100", "opt -> type");
  17457. + break;
  17458. + }
  17459. + if (err_msg != NULL) {
  17460. + warning("nikita-2496", "%s when parsing option \"%s%s%s\"",
  17461. + err_msg, opt->name, val_start ? "=" : "",
  17462. + val_start ? : "");
  17463. + }
  17464. + return result;
  17465. +}
  17466. +
  17467. +/**
  17468. + * parse_options - parse reiser4 mount options
  17469. + * @opt_string: starting point
  17470. + * @opts: array of option description
  17471. + * @nr_opts: number of elements in @opts
  17472. + *
  17473. + * Parses comma separated list of reiser4 mount options.
  17474. + */
  17475. +static int parse_options(char *opt_string, struct opt_desc *opts, int nr_opts)
  17476. +{
  17477. + int result;
  17478. +
  17479. + result = 0;
  17480. + while ((result == 0) && opt_string && *opt_string) {
  17481. + int j;
  17482. + char *next;
  17483. +
  17484. + next = strchr(opt_string, ',');
  17485. + if (next != NULL) {
  17486. + *next = '\0';
  17487. + ++next;
  17488. + }
  17489. + for (j = 0; j < nr_opts; ++j) {
  17490. + if (!strncmp(opt_string, opts[j].name,
  17491. + strlen(opts[j].name))) {
  17492. + result = parse_option(opt_string, &opts[j]);
  17493. + break;
  17494. + }
  17495. + }
  17496. + if (j == nr_opts) {
  17497. + warning("nikita-2307", "Unrecognized option: \"%s\"",
  17498. + opt_string);
  17499. + /* traditionally, -EINVAL is returned on wrong mount
  17500. + option */
  17501. + result = RETERR(-EINVAL);
  17502. + }
  17503. + opt_string = next;
  17504. + }
  17505. + return result;
  17506. +}
  17507. +
  17508. +#define NUM_OPT(label, fmt, addr) \
  17509. + { \
  17510. + .name = (label), \
  17511. + .type = OPT_FORMAT, \
  17512. + .u = { \
  17513. + .f = { \
  17514. + .format = (fmt), \
  17515. + .nr_args = 1, \
  17516. + .arg1 = (addr), \
  17517. + .arg2 = NULL, \
  17518. + .arg3 = NULL, \
  17519. + .arg4 = NULL \
  17520. + } \
  17521. + } \
  17522. + }
  17523. +
  17524. +#define SB_FIELD_OPT(field, fmt) NUM_OPT(#field, fmt, &sbinfo->field)
  17525. +
  17526. +#define BIT_OPT(label, bitnr) \
  17527. + { \
  17528. + .name = label, \
  17529. + .type = OPT_BIT, \
  17530. + .u = { \
  17531. + .bit = { \
  17532. + .nr = bitnr, \
  17533. + .addr = &sbinfo->fs_flags \
  17534. + } \
  17535. + } \
  17536. + }
  17537. +
  17538. +#define MAX_NR_OPTIONS (30)
  17539. +
  17540. +#if REISER4_DEBUG
  17541. +# define OPT_ARRAY_CHECK(opt, array) \
  17542. + if ((opt) > (array) + MAX_NR_OPTIONS) { \
  17543. + warning("zam-1046", "opt array is overloaded"); break; \
  17544. + }
  17545. +#else
  17546. +# define OPT_ARRAY_CHECK(opt, array) noop
  17547. +#endif
  17548. +
  17549. +#define PUSH_OPT(opt, array, ...) \
  17550. +do { \
  17551. + struct opt_desc o = __VA_ARGS__; \
  17552. + OPT_ARRAY_CHECK(opt, array); \
  17553. + *(opt) ++ = o; \
  17554. +} while (0)
  17555. +
  17556. +static noinline void push_sb_field_opts(struct opt_desc **p,
  17557. + struct opt_desc *opts,
  17558. + reiser4_super_info_data *sbinfo)
  17559. +{
  17560. +#define PUSH_SB_FIELD_OPT(field, format) \
  17561. + PUSH_OPT(*p, opts, SB_FIELD_OPT(field, format))
  17562. + /*
  17563. + * tmgr.atom_max_size=N
  17564. + * Atoms containing more than N blocks will be forced to commit. N is
  17565. + * decimal.
  17566. + */
  17567. + PUSH_SB_FIELD_OPT(tmgr.atom_max_size, "%u");
  17568. + /*
  17569. + * tmgr.atom_max_age=N
  17570. + * Atoms older than N seconds will be forced to commit. N is decimal.
  17571. + */
  17572. + PUSH_SB_FIELD_OPT(tmgr.atom_max_age, "%u");
  17573. + /*
  17574. + * tmgr.atom_min_size=N
  17575. + * In committing an atom to free dirty pages, force the atom less than
  17576. + * N in size to fuse with another one.
  17577. + */
  17578. + PUSH_SB_FIELD_OPT(tmgr.atom_min_size, "%u");
  17579. + /*
  17580. + * tmgr.atom_max_flushers=N
  17581. + * limit of concurrent flushers for one atom. 0 means no limit.
  17582. + */
  17583. + PUSH_SB_FIELD_OPT(tmgr.atom_max_flushers, "%u");
  17584. + /*
  17585. + * tree.cbk_cache_slots=N
  17586. + * Number of slots in the cbk cache.
  17587. + */
  17588. + PUSH_SB_FIELD_OPT(tree.cbk_cache.nr_slots, "%u");
  17589. + /*
  17590. + * If flush finds more than FLUSH_RELOCATE_THRESHOLD adjacent dirty
  17591. + * leaf-level blocks it will force them to be relocated.
  17592. + */
  17593. + PUSH_SB_FIELD_OPT(flush.relocate_threshold, "%u");
  17594. + /*
  17595. + * If flush finds can find a block allocation closer than at most
  17596. + * FLUSH_RELOCATE_DISTANCE from the preceder it will relocate to that
  17597. + * position.
  17598. + */
  17599. + PUSH_SB_FIELD_OPT(flush.relocate_distance, "%u");
  17600. + /*
  17601. + * If we have written this much or more blocks before encountering busy
  17602. + * jnode in flush list - abort flushing hoping that next time we get
  17603. + * called this jnode will be clean already, and we will save some
  17604. + * seeks.
  17605. + */
  17606. + PUSH_SB_FIELD_OPT(flush.written_threshold, "%u");
  17607. + /* The maximum number of nodes to scan left on a level during flush. */
  17608. + PUSH_SB_FIELD_OPT(flush.scan_maxnodes, "%u");
  17609. + /* preferred IO size */
  17610. + PUSH_SB_FIELD_OPT(optimal_io_size, "%u");
  17611. + /* carry flags used for insertion of new nodes */
  17612. + PUSH_SB_FIELD_OPT(tree.carry.new_node_flags, "%u");
  17613. + /* carry flags used for insertion of new extents */
  17614. + PUSH_SB_FIELD_OPT(tree.carry.new_extent_flags, "%u");
  17615. + /* carry flags used for paste operations */
  17616. + PUSH_SB_FIELD_OPT(tree.carry.paste_flags, "%u");
  17617. + /* carry flags used for insert operations */
  17618. + PUSH_SB_FIELD_OPT(tree.carry.insert_flags, "%u");
  17619. +
  17620. +#ifdef CONFIG_REISER4_BADBLOCKS
  17621. + /*
  17622. + * Alternative master superblock location in case if it's original
  17623. + * location is not writeable/accessable. This is offset in BYTES.
  17624. + */
  17625. + PUSH_SB_FIELD_OPT(altsuper, "%lu");
  17626. +#endif
  17627. +}
  17628. +
  17629. +/**
  17630. + * reiser4_init_super_data - initialize reiser4 private super block
  17631. + * @super: super block to initialize
  17632. + * @opt_string: list of reiser4 mount options
  17633. + *
  17634. + * Sets various reiser4 parameters to default values. Parses mount options and
  17635. + * overwrites default settings.
  17636. + */
  17637. +int reiser4_init_super_data(struct super_block *super, char *opt_string)
  17638. +{
  17639. + int result;
  17640. + struct opt_desc *opts, *p;
  17641. + reiser4_super_info_data *sbinfo = get_super_private(super);
  17642. +
  17643. + /* initialize super, export, dentry operations */
  17644. + sbinfo->ops.super = reiser4_super_operations;
  17645. + sbinfo->ops.export = reiser4_export_operations;
  17646. + sbinfo->ops.dentry = reiser4_dentry_operations;
  17647. + super->s_op = &sbinfo->ops.super;
  17648. + super->s_export_op = &sbinfo->ops.export;
  17649. +
  17650. + /* initialize transaction manager parameters to default values */
  17651. + sbinfo->tmgr.atom_max_size = totalram_pages / 4;
  17652. + sbinfo->tmgr.atom_max_age = REISER4_ATOM_MAX_AGE / HZ;
  17653. + sbinfo->tmgr.atom_min_size = 256;
  17654. + sbinfo->tmgr.atom_max_flushers = ATOM_MAX_FLUSHERS;
  17655. +
  17656. + /* initialize cbk cache parameter */
  17657. + sbinfo->tree.cbk_cache.nr_slots = CBK_CACHE_SLOTS;
  17658. +
  17659. + /* initialize flush parameters */
  17660. + sbinfo->flush.relocate_threshold = FLUSH_RELOCATE_THRESHOLD;
  17661. + sbinfo->flush.relocate_distance = FLUSH_RELOCATE_DISTANCE;
  17662. + sbinfo->flush.written_threshold = FLUSH_WRITTEN_THRESHOLD;
  17663. + sbinfo->flush.scan_maxnodes = FLUSH_SCAN_MAXNODES;
  17664. +
  17665. + sbinfo->optimal_io_size = REISER4_OPTIMAL_IO_SIZE;
  17666. +
  17667. + /* preliminary tree initializations */
  17668. + sbinfo->tree.super = super;
  17669. + sbinfo->tree.carry.new_node_flags = REISER4_NEW_NODE_FLAGS;
  17670. + sbinfo->tree.carry.new_extent_flags = REISER4_NEW_EXTENT_FLAGS;
  17671. + sbinfo->tree.carry.paste_flags = REISER4_PASTE_FLAGS;
  17672. + sbinfo->tree.carry.insert_flags = REISER4_INSERT_FLAGS;
  17673. + rwlock_init(&(sbinfo->tree.tree_lock));
  17674. + spin_lock_init(&(sbinfo->tree.epoch_lock));
  17675. +
  17676. + /* initialize default readahead params */
  17677. + sbinfo->ra_params.max = totalram_pages / 4;
  17678. + sbinfo->ra_params.flags = 0;
  17679. +
  17680. + /* allocate memory for structure describing reiser4 mount options */
  17681. + opts = kmalloc(sizeof(struct opt_desc) * MAX_NR_OPTIONS,
  17682. + reiser4_ctx_gfp_mask_get());
  17683. + if (opts == NULL)
  17684. + return RETERR(-ENOMEM);
  17685. +
  17686. + /* initialize structure describing reiser4 mount options */
  17687. + p = opts;
  17688. +
  17689. + push_sb_field_opts(&p, opts, sbinfo);
  17690. + /* turn on BSD-style gid assignment */
  17691. +
  17692. +#define PUSH_BIT_OPT(name, bit) \
  17693. + PUSH_OPT(p, opts, BIT_OPT(name, bit))
  17694. +
  17695. + PUSH_BIT_OPT("bsdgroups", REISER4_BSD_GID);
  17696. + /* turn on 32 bit times */
  17697. + PUSH_BIT_OPT("32bittimes", REISER4_32_BIT_TIMES);
  17698. + /*
  17699. + * Don't load all bitmap blocks at mount time, it is useful for
  17700. + * machines with tiny RAM and large disks.
  17701. + */
  17702. + PUSH_BIT_OPT("dont_load_bitmap", REISER4_DONT_LOAD_BITMAP);
  17703. + /* disable transaction commits during write() */
  17704. + PUSH_BIT_OPT("atomic_write", REISER4_ATOMIC_WRITE);
  17705. + /* enable issuing of discard requests */
  17706. + PUSH_BIT_OPT("discard", REISER4_DISCARD);
  17707. + /* disable hole punching at flush time */
  17708. + PUSH_BIT_OPT("dont_punch_holes", REISER4_DONT_PUNCH_HOLES);
  17709. +
  17710. + PUSH_OPT(p, opts,
  17711. + {
  17712. + /*
  17713. + * tree traversal readahead parameters:
  17714. + * -o readahead:MAXNUM:FLAGS
  17715. + * MAXNUM - max number fo nodes to request readahead for: -1UL
  17716. + * will set it to max_sane_readahead()
  17717. + * FLAGS - combination of bits: RA_ADJCENT_ONLY, RA_ALL_LEVELS,
  17718. + * CONTINUE_ON_PRESENT
  17719. + */
  17720. + .name = "readahead",
  17721. + .type = OPT_FORMAT,
  17722. + .u = {
  17723. + .f = {
  17724. + .format = "%u:%u",
  17725. + .nr_args = 2,
  17726. + .arg1 = &sbinfo->ra_params.max,
  17727. + .arg2 = &sbinfo->ra_params.flags,
  17728. + .arg3 = NULL,
  17729. + .arg4 = NULL
  17730. + }
  17731. + }
  17732. + }
  17733. + );
  17734. +
  17735. + /* What to do in case of fs error */
  17736. + PUSH_OPT(p, opts,
  17737. + {
  17738. + .name = "onerror",
  17739. + .type = OPT_ONEOF,
  17740. + .u = {
  17741. + .oneof = {
  17742. + .result = &sbinfo->onerror,
  17743. + .list = {
  17744. + "remount-ro", "panic", NULL
  17745. + },
  17746. + }
  17747. + }
  17748. + }
  17749. + );
  17750. +
  17751. + /*
  17752. + * What trancaction model (journal, cow, etc)
  17753. + * is used to commit transactions
  17754. + */
  17755. + PUSH_OPT(p, opts,
  17756. + {
  17757. + .name = "txmod",
  17758. + .type = OPT_TXMOD,
  17759. + .u = {
  17760. + .txmod = {
  17761. + .result = &sbinfo->txmod
  17762. + }
  17763. + }
  17764. + }
  17765. + );
  17766. +
  17767. + /* modify default settings to values set by mount options */
  17768. + result = parse_options(opt_string, opts, p - opts);
  17769. + kfree(opts);
  17770. + if (result != 0)
  17771. + return result;
  17772. +
  17773. + /* correct settings to sanity values */
  17774. + sbinfo->tmgr.atom_max_age *= HZ;
  17775. + if (sbinfo->tmgr.atom_max_age <= 0)
  17776. + /* overflow */
  17777. + sbinfo->tmgr.atom_max_age = REISER4_ATOM_MAX_AGE;
  17778. +
  17779. + /* round optimal io size up to 512 bytes */
  17780. + sbinfo->optimal_io_size >>= VFS_BLKSIZE_BITS;
  17781. + sbinfo->optimal_io_size <<= VFS_BLKSIZE_BITS;
  17782. + if (sbinfo->optimal_io_size == 0) {
  17783. + warning("nikita-2497", "optimal_io_size is too small");
  17784. + return RETERR(-EINVAL);
  17785. + }
  17786. + return result;
  17787. +}
  17788. +
  17789. +/**
  17790. + * reiser4_init_read_super - read reiser4 master super block
  17791. + * @super: super block to fill
  17792. + * @silent: if 0 - print warnings
  17793. + *
  17794. + * Reads reiser4 master super block either from predefined location or from
  17795. + * location specified by altsuper mount option, initializes disk format plugin.
  17796. + */
  17797. +int reiser4_init_read_super(struct super_block *super, int silent)
  17798. +{
  17799. + struct buffer_head *super_bh;
  17800. + struct reiser4_master_sb *master_sb;
  17801. + reiser4_super_info_data *sbinfo = get_super_private(super);
  17802. + unsigned long blocksize;
  17803. +
  17804. + read_super_block:
  17805. +#ifdef CONFIG_REISER4_BADBLOCKS
  17806. + if (sbinfo->altsuper)
  17807. + /*
  17808. + * read reiser4 master super block at position specified by
  17809. + * mount option
  17810. + */
  17811. + super_bh = sb_bread(super,
  17812. + (sector_t)(sbinfo->altsuper / super->s_blocksize));
  17813. + else
  17814. +#endif
  17815. + /* read reiser4 master super block at 16-th 4096 block */
  17816. + super_bh = sb_bread(super,
  17817. + (sector_t)(REISER4_MAGIC_OFFSET / super->s_blocksize));
  17818. + if (!super_bh)
  17819. + return RETERR(-EIO);
  17820. +
  17821. + master_sb = (struct reiser4_master_sb *)super_bh->b_data;
  17822. + /* check reiser4 magic string */
  17823. + if (!strncmp(master_sb->magic, REISER4_SUPER_MAGIC_STRING,
  17824. + sizeof(REISER4_SUPER_MAGIC_STRING))) {
  17825. + /* reiser4 master super block contains filesystem blocksize */
  17826. + blocksize = le16_to_cpu(get_unaligned(&master_sb->blocksize));
  17827. +
  17828. + if (blocksize != PAGE_SIZE) {
  17829. + /*
  17830. + * currenly reiser4's blocksize must be equal to
  17831. + * pagesize
  17832. + */
  17833. + if (!silent)
  17834. + warning("nikita-2609",
  17835. + "%s: wrong block size %ld\n", super->s_id,
  17836. + blocksize);
  17837. + brelse(super_bh);
  17838. + return RETERR(-EINVAL);
  17839. + }
  17840. + if (blocksize != super->s_blocksize) {
  17841. + /*
  17842. + * filesystem uses different blocksize. Reread master
  17843. + * super block with correct blocksize
  17844. + */
  17845. + brelse(super_bh);
  17846. + if (!sb_set_blocksize(super, (int)blocksize))
  17847. + return RETERR(-EINVAL);
  17848. + goto read_super_block;
  17849. + }
  17850. +
  17851. + sbinfo->df_plug =
  17852. + disk_format_plugin_by_unsafe_id(
  17853. + le16_to_cpu(get_unaligned(&master_sb->disk_plugin_id)));
  17854. + if (sbinfo->df_plug == NULL) {
  17855. + if (!silent)
  17856. + warning("nikita-26091",
  17857. + "%s: unknown disk format plugin %d\n",
  17858. + super->s_id,
  17859. + le16_to_cpu(get_unaligned(&master_sb->disk_plugin_id)));
  17860. + brelse(super_bh);
  17861. + return RETERR(-EINVAL);
  17862. + }
  17863. + sbinfo->diskmap_block = le64_to_cpu(get_unaligned(&master_sb->diskmap));
  17864. + brelse(super_bh);
  17865. + return 0;
  17866. + }
  17867. +
  17868. + /* there is no reiser4 on the device */
  17869. + if (!silent)
  17870. + warning("nikita-2608",
  17871. + "%s: wrong master super block magic", super->s_id);
  17872. + brelse(super_bh);
  17873. + return RETERR(-EINVAL);
  17874. +}
  17875. +
  17876. +static struct {
  17877. + reiser4_plugin_type type;
  17878. + reiser4_plugin_id id;
  17879. +} default_plugins[PSET_LAST] = {
  17880. + [PSET_FILE] = {
  17881. + .type = REISER4_FILE_PLUGIN_TYPE,
  17882. + .id = UNIX_FILE_PLUGIN_ID
  17883. + },
  17884. + [PSET_DIR] = {
  17885. + .type = REISER4_DIR_PLUGIN_TYPE,
  17886. + .id = HASHED_DIR_PLUGIN_ID
  17887. + },
  17888. + [PSET_HASH] = {
  17889. + .type = REISER4_HASH_PLUGIN_TYPE,
  17890. + .id = R5_HASH_ID
  17891. + },
  17892. + [PSET_FIBRATION] = {
  17893. + .type = REISER4_FIBRATION_PLUGIN_TYPE,
  17894. + .id = FIBRATION_DOT_O
  17895. + },
  17896. + [PSET_PERM] = {
  17897. + .type = REISER4_PERM_PLUGIN_TYPE,
  17898. + .id = NULL_PERM_ID
  17899. + },
  17900. + [PSET_FORMATTING] = {
  17901. + .type = REISER4_FORMATTING_PLUGIN_TYPE,
  17902. + .id = SMALL_FILE_FORMATTING_ID
  17903. + },
  17904. + [PSET_SD] = {
  17905. + .type = REISER4_ITEM_PLUGIN_TYPE,
  17906. + .id = STATIC_STAT_DATA_ID
  17907. + },
  17908. + [PSET_DIR_ITEM] = {
  17909. + .type = REISER4_ITEM_PLUGIN_TYPE,
  17910. + .id = COMPOUND_DIR_ID
  17911. + },
  17912. + [PSET_CIPHER] = {
  17913. + .type = REISER4_CIPHER_PLUGIN_TYPE,
  17914. + .id = NONE_CIPHER_ID
  17915. + },
  17916. + [PSET_DIGEST] = {
  17917. + .type = REISER4_DIGEST_PLUGIN_TYPE,
  17918. + .id = SHA256_32_DIGEST_ID
  17919. + },
  17920. + [PSET_COMPRESSION] = {
  17921. + .type = REISER4_COMPRESSION_PLUGIN_TYPE,
  17922. + .id = LZO1_COMPRESSION_ID
  17923. + },
  17924. + [PSET_COMPRESSION_MODE] = {
  17925. + .type = REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
  17926. + .id = CONVX_COMPRESSION_MODE_ID
  17927. + },
  17928. + [PSET_CLUSTER] = {
  17929. + .type = REISER4_CLUSTER_PLUGIN_TYPE,
  17930. + .id = CLUSTER_64K_ID
  17931. + },
  17932. + [PSET_CREATE] = {
  17933. + .type = REISER4_FILE_PLUGIN_TYPE,
  17934. + .id = UNIX_FILE_PLUGIN_ID
  17935. + }
  17936. +};
  17937. +
  17938. +/* access to default plugin table */
  17939. +reiser4_plugin *get_default_plugin(pset_member memb)
  17940. +{
  17941. + return plugin_by_id(default_plugins[memb].type,
  17942. + default_plugins[memb].id);
  17943. +}
  17944. +
  17945. +/**
  17946. + * reiser4_init_root_inode - obtain inode of root directory
  17947. + * @super: super block of filesystem
  17948. + *
  17949. + * Obtains inode of root directory (reading it from disk), initializes plugin
  17950. + * set it was not initialized.
  17951. + */
  17952. +int reiser4_init_root_inode(struct super_block *super)
  17953. +{
  17954. + reiser4_super_info_data *sbinfo = get_super_private(super);
  17955. + struct inode *inode;
  17956. + int result = 0;
  17957. +
  17958. + inode = reiser4_iget(super, sbinfo->df_plug->root_dir_key(super), 0);
  17959. + if (IS_ERR(inode))
  17960. + return RETERR(PTR_ERR(inode));
  17961. +
  17962. + super->s_root = d_make_root(inode);
  17963. + if (!super->s_root) {
  17964. + return RETERR(-ENOMEM);
  17965. + }
  17966. +
  17967. + super->s_root->d_op = &sbinfo->ops.dentry;
  17968. +
  17969. + if (!is_inode_loaded(inode)) {
  17970. + pset_member memb;
  17971. + plugin_set *pset;
  17972. +
  17973. + pset = reiser4_inode_data(inode)->pset;
  17974. + for (memb = 0; memb < PSET_LAST; ++memb) {
  17975. +
  17976. + if (aset_get(pset, memb) != NULL)
  17977. + continue;
  17978. +
  17979. + result = grab_plugin_pset(inode, NULL, memb);
  17980. + if (result != 0)
  17981. + break;
  17982. +
  17983. + reiser4_inode_clr_flag(inode, REISER4_SDLEN_KNOWN);
  17984. + }
  17985. +
  17986. + if (result == 0) {
  17987. + if (REISER4_DEBUG) {
  17988. + for (memb = 0; memb < PSET_LAST; ++memb)
  17989. + assert("nikita-3500",
  17990. + aset_get(pset, memb) != NULL);
  17991. + }
  17992. + } else
  17993. + warning("nikita-3448", "Cannot set plugins of root: %i",
  17994. + result);
  17995. + reiser4_iget_complete(inode);
  17996. +
  17997. + /* As the default pset kept in the root dir may has been changed
  17998. + (length is unknown), call update_sd. */
  17999. + if (!reiser4_inode_get_flag(inode, REISER4_SDLEN_KNOWN)) {
  18000. + result = reiser4_grab_space(
  18001. + inode_file_plugin(inode)->estimate.update(inode),
  18002. + BA_CAN_COMMIT);
  18003. +
  18004. + if (result == 0)
  18005. + result = reiser4_update_sd(inode);
  18006. +
  18007. + all_grabbed2free();
  18008. + }
  18009. + }
  18010. +
  18011. + super->s_maxbytes = MAX_LFS_FILESIZE;
  18012. + return result;
  18013. +}
  18014. +
  18015. +/*
  18016. + * Local variables:
  18017. + * c-indentation-style: "K&R"
  18018. + * mode-name: "LC"
  18019. + * c-basic-offset: 8
  18020. + * tab-width: 8
  18021. + * fill-column: 79
  18022. + * End:
  18023. + */
  18024. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/inode.c linux-4.14.2/fs/reiser4/inode.c
  18025. --- linux-4.14.2.orig/fs/reiser4/inode.c 1970-01-01 01:00:00.000000000 +0100
  18026. +++ linux-4.14.2/fs/reiser4/inode.c 2017-11-26 22:13:09.000000000 +0100
  18027. @@ -0,0 +1,711 @@
  18028. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  18029. + reiser4/README */
  18030. +
  18031. +/* Inode specific operations. */
  18032. +
  18033. +#include "forward.h"
  18034. +#include "debug.h"
  18035. +#include "key.h"
  18036. +#include "kassign.h"
  18037. +#include "coord.h"
  18038. +#include "seal.h"
  18039. +#include "dscale.h"
  18040. +#include "plugin/item/item.h"
  18041. +#include "plugin/security/perm.h"
  18042. +#include "plugin/plugin.h"
  18043. +#include "plugin/object.h"
  18044. +#include "znode.h"
  18045. +#include "vfs_ops.h"
  18046. +#include "inode.h"
  18047. +#include "super.h"
  18048. +#include "reiser4.h"
  18049. +
  18050. +#include <linux/fs.h> /* for struct super_block, address_space */
  18051. +
  18052. +/* return reiser4 internal tree which inode belongs to */
  18053. +/* Audited by: green(2002.06.17) */
  18054. +reiser4_tree *reiser4_tree_by_inode(const struct inode *inode/* inode queried*/)
  18055. +{
  18056. + assert("nikita-256", inode != NULL);
  18057. + assert("nikita-257", inode->i_sb != NULL);
  18058. + return reiser4_get_tree(inode->i_sb);
  18059. +}
  18060. +
  18061. +/* return reiser4-specific inode flags */
  18062. +static inline unsigned long *inode_flags(const struct inode *const inode)
  18063. +{
  18064. + assert("nikita-2842", inode != NULL);
  18065. + return &reiser4_inode_data(inode)->flags;
  18066. +}
  18067. +
  18068. +/* set reiser4-specific flag @f in @inode */
  18069. +void reiser4_inode_set_flag(struct inode *inode, reiser4_file_plugin_flags f)
  18070. +{
  18071. + assert("nikita-2248", inode != NULL);
  18072. + set_bit((int)f, inode_flags(inode));
  18073. +}
  18074. +
  18075. +/* clear reiser4-specific flag @f in @inode */
  18076. +void reiser4_inode_clr_flag(struct inode *inode, reiser4_file_plugin_flags f)
  18077. +{
  18078. + assert("nikita-2250", inode != NULL);
  18079. + clear_bit((int)f, inode_flags(inode));
  18080. +}
  18081. +
  18082. +/* true if reiser4-specific flag @f is set in @inode */
  18083. +int reiser4_inode_get_flag(const struct inode *inode,
  18084. + reiser4_file_plugin_flags f)
  18085. +{
  18086. + assert("nikita-2251", inode != NULL);
  18087. + return test_bit((int)f, inode_flags(inode));
  18088. +}
  18089. +
  18090. +/* convert oid to inode number */
  18091. +ino_t oid_to_ino(oid_t oid)
  18092. +{
  18093. + return (ino_t) oid;
  18094. +}
  18095. +
  18096. +/* convert oid to user visible inode number */
  18097. +ino_t oid_to_uino(oid_t oid)
  18098. +{
  18099. + /* reiser4 object is uniquely identified by oid which is 64 bit
  18100. + quantity. Kernel in-memory inode is indexed (in the hash table) by
  18101. + 32 bit i_ino field, but this is not a problem, because there is a
  18102. + way to further distinguish inodes with identical inode numbers
  18103. + (find_actor supplied to iget()).
  18104. +
  18105. + But user space expects unique 32 bit inode number. Obviously this
  18106. + is impossible. Work-around is to somehow hash oid into user visible
  18107. + inode number.
  18108. + */
  18109. + oid_t max_ino = (ino_t) ~0;
  18110. +
  18111. + if (REISER4_INO_IS_OID || (oid <= max_ino))
  18112. + return oid;
  18113. + else
  18114. + /* this is remotely similar to algorithm used to find next pid
  18115. + to use for process: after wrap-around start from some
  18116. + offset rather than from 0. Idea is that there are some long
  18117. + living objects with which we don't want to collide.
  18118. + */
  18119. + return REISER4_UINO_SHIFT + ((oid - max_ino) & (max_ino >> 1));
  18120. +}
  18121. +
  18122. +/* check that "inode" is on reiser4 file-system */
  18123. +int is_reiser4_inode(const struct inode *inode/* inode queried */)
  18124. +{
  18125. + return inode != NULL && is_reiser4_super(inode->i_sb);
  18126. +}
  18127. +
  18128. +/* Maximal length of a name that can be stored in directory @inode.
  18129. +
  18130. + This is used in check during file creation and lookup. */
  18131. +int reiser4_max_filename_len(const struct inode *inode/* inode queried */)
  18132. +{
  18133. + assert("nikita-287", is_reiser4_inode(inode));
  18134. + assert("nikita-1710", inode_dir_item_plugin(inode));
  18135. + if (inode_dir_item_plugin(inode)->s.dir.max_name_len)
  18136. + return inode_dir_item_plugin(inode)->s.dir.max_name_len(inode);
  18137. + else
  18138. + return 255;
  18139. +}
  18140. +
  18141. +#if REISER4_USE_COLLISION_LIMIT
  18142. +/* Maximal number of hash collisions for this directory. */
  18143. +int max_hash_collisions(const struct inode *dir/* inode queried */)
  18144. +{
  18145. + assert("nikita-1711", dir != NULL);
  18146. + return reiser4_inode_data(dir)->plugin.max_collisions;
  18147. +}
  18148. +#endif /* REISER4_USE_COLLISION_LIMIT */
  18149. +
  18150. +/* Install file, inode, and address_space operation on @inode, depending on
  18151. + its mode. */
  18152. +int setup_inode_ops(struct inode *inode /* inode to intialize */ ,
  18153. + reiser4_object_create_data * data /* parameters to create
  18154. + * object */ )
  18155. +{
  18156. + reiser4_super_info_data *sinfo;
  18157. + file_plugin *fplug;
  18158. + dir_plugin *dplug;
  18159. +
  18160. + fplug = inode_file_plugin(inode);
  18161. + dplug = inode_dir_plugin(inode);
  18162. +
  18163. + sinfo = get_super_private(inode->i_sb);
  18164. +
  18165. + switch (inode->i_mode & S_IFMT) {
  18166. + case S_IFSOCK:
  18167. + case S_IFBLK:
  18168. + case S_IFCHR:
  18169. + case S_IFIFO:
  18170. + {
  18171. + dev_t rdev; /* to keep gcc happy */
  18172. +
  18173. + assert("vs-46", fplug != NULL);
  18174. + /* ugly hack with rdev */
  18175. + if (data == NULL) {
  18176. + rdev = inode->i_rdev;
  18177. + inode->i_rdev = 0;
  18178. + } else
  18179. + rdev = data->rdev;
  18180. + inode->i_blocks = 0;
  18181. + assert("vs-42", fplug->h.id == SPECIAL_FILE_PLUGIN_ID);
  18182. + inode->i_op = file_plugins[fplug->h.id].inode_ops;
  18183. + /* initialize inode->i_fop and inode->i_rdev for block
  18184. + and char devices */
  18185. + init_special_inode(inode, inode->i_mode, rdev);
  18186. + /* all address space operations are null */
  18187. + inode->i_mapping->a_ops =
  18188. + file_plugins[fplug->h.id].as_ops;
  18189. + break;
  18190. + }
  18191. + case S_IFLNK:
  18192. + assert("vs-46", fplug != NULL);
  18193. + assert("vs-42", fplug->h.id == SYMLINK_FILE_PLUGIN_ID);
  18194. + inode->i_op = file_plugins[fplug->h.id].inode_ops;
  18195. + inode->i_fop = NULL;
  18196. + /* all address space operations are null */
  18197. + inode->i_mapping->a_ops = file_plugins[fplug->h.id].as_ops;
  18198. + break;
  18199. + case S_IFDIR:
  18200. + assert("vs-46", dplug != NULL);
  18201. + assert("vs-43", (dplug->h.id == HASHED_DIR_PLUGIN_ID ||
  18202. + dplug->h.id == SEEKABLE_HASHED_DIR_PLUGIN_ID));
  18203. + inode->i_op = dir_plugins[dplug->h.id].inode_ops;
  18204. + inode->i_fop = dir_plugins[dplug->h.id].file_ops;
  18205. + inode->i_mapping->a_ops = dir_plugins[dplug->h.id].as_ops;
  18206. + break;
  18207. + case S_IFREG:
  18208. + assert("vs-46", fplug != NULL);
  18209. + assert("vs-43", (fplug->h.id == UNIX_FILE_PLUGIN_ID ||
  18210. + fplug->h.id == CRYPTCOMPRESS_FILE_PLUGIN_ID));
  18211. + inode->i_op = file_plugins[fplug->h.id].inode_ops;
  18212. + inode->i_fop = file_plugins[fplug->h.id].file_ops;
  18213. + inode->i_mapping->a_ops = file_plugins[fplug->h.id].as_ops;
  18214. + break;
  18215. + default:
  18216. + warning("nikita-291", "wrong file mode: %o for %llu",
  18217. + inode->i_mode,
  18218. + (unsigned long long)get_inode_oid(inode));
  18219. + reiser4_make_bad_inode(inode);
  18220. + return RETERR(-EINVAL);
  18221. + }
  18222. + return 0;
  18223. +}
  18224. +
  18225. +/* Initialize inode from disk data. Called with inode locked.
  18226. + Return inode locked. */
  18227. +static int init_inode(struct inode *inode /* inode to intialise */ ,
  18228. + coord_t *coord/* coord of stat data */)
  18229. +{
  18230. + int result;
  18231. + item_plugin *iplug;
  18232. + void *body;
  18233. + int length;
  18234. + reiser4_inode *state;
  18235. +
  18236. + assert("nikita-292", coord != NULL);
  18237. + assert("nikita-293", inode != NULL);
  18238. +
  18239. + coord_clear_iplug(coord);
  18240. + result = zload(coord->node);
  18241. + if (result)
  18242. + return result;
  18243. + iplug = item_plugin_by_coord(coord);
  18244. + body = item_body_by_coord(coord);
  18245. + length = item_length_by_coord(coord);
  18246. +
  18247. + assert("nikita-295", iplug != NULL);
  18248. + assert("nikita-296", body != NULL);
  18249. + assert("nikita-297", length > 0);
  18250. +
  18251. + /* inode is under I_LOCK now */
  18252. +
  18253. + state = reiser4_inode_data(inode);
  18254. + /* call stat-data plugin method to load sd content into inode */
  18255. + result = iplug->s.sd.init_inode(inode, body, length);
  18256. + set_plugin(&state->pset, PSET_SD, item_plugin_to_plugin(iplug));
  18257. + if (result == 0) {
  18258. + result = setup_inode_ops(inode, NULL);
  18259. + if (result == 0 && inode->i_sb->s_root &&
  18260. + inode->i_sb->s_root->d_inode)
  18261. + result = finish_pset(inode);
  18262. + }
  18263. + zrelse(coord->node);
  18264. + return result;
  18265. +}
  18266. +
  18267. +/* read `inode' from the disk. This is what was previously in
  18268. + reiserfs_read_inode2().
  18269. +
  18270. + Must be called with inode locked. Return inode still locked.
  18271. +*/
  18272. +static int read_inode(struct inode *inode /* inode to read from disk */ ,
  18273. + const reiser4_key * key /* key of stat data */ ,
  18274. + int silent)
  18275. +{
  18276. + int result;
  18277. + lock_handle lh;
  18278. + reiser4_inode *info;
  18279. + coord_t coord;
  18280. +
  18281. + assert("nikita-298", inode != NULL);
  18282. + assert("nikita-1945", !is_inode_loaded(inode));
  18283. +
  18284. + info = reiser4_inode_data(inode);
  18285. + assert("nikita-300", info->locality_id != 0);
  18286. +
  18287. + coord_init_zero(&coord);
  18288. + init_lh(&lh);
  18289. + /* locate stat-data in a tree and return znode locked */
  18290. + result = lookup_sd(inode, ZNODE_READ_LOCK, &coord, &lh, key, silent);
  18291. + assert("nikita-301", !is_inode_loaded(inode));
  18292. + if (result == 0) {
  18293. + /* use stat-data plugin to load sd into inode. */
  18294. + result = init_inode(inode, &coord);
  18295. + if (result == 0) {
  18296. + /* initialize stat-data seal */
  18297. + spin_lock_inode(inode);
  18298. + reiser4_seal_init(&info->sd_seal, &coord, key);
  18299. + info->sd_coord = coord;
  18300. + spin_unlock_inode(inode);
  18301. +
  18302. + /* call file plugin's method to initialize plugin
  18303. + * specific part of inode */
  18304. + if (inode_file_plugin(inode)->init_inode_data)
  18305. + inode_file_plugin(inode)->init_inode_data(inode,
  18306. + NULL,
  18307. + 0);
  18308. + /* load detached directory cursors for stateless
  18309. + * directory readers (NFS). */
  18310. + reiser4_load_cursors(inode);
  18311. +
  18312. + /* Check the opened inode for consistency. */
  18313. + result =
  18314. + get_super_private(inode->i_sb)->df_plug->
  18315. + check_open(inode);
  18316. + }
  18317. + }
  18318. + /* lookup_sd() doesn't release coord because we want znode
  18319. + stay read-locked while stat-data fields are accessed in
  18320. + init_inode() */
  18321. + done_lh(&lh);
  18322. +
  18323. + if (result != 0)
  18324. + reiser4_make_bad_inode(inode);
  18325. + return result;
  18326. +}
  18327. +
  18328. +/* initialise new reiser4 inode being inserted into hash table. */
  18329. +static int init_locked_inode(struct inode *inode /* new inode */ ,
  18330. + void *opaque /* key of stat data passed to
  18331. + * the iget5_locked as cookie */)
  18332. +{
  18333. + reiser4_key *key;
  18334. +
  18335. + assert("nikita-1995", inode != NULL);
  18336. + assert("nikita-1996", opaque != NULL);
  18337. + key = opaque;
  18338. + set_inode_oid(inode, get_key_objectid(key));
  18339. + reiser4_inode_data(inode)->locality_id = get_key_locality(key);
  18340. + return 0;
  18341. +}
  18342. +
  18343. +/* reiser4_inode_find_actor() - "find actor" supplied by reiser4 to
  18344. + iget5_locked().
  18345. +
  18346. + This function is called by iget5_locked() to distinguish reiser4 inodes
  18347. + having the same inode numbers. Such inodes can only exist due to some error
  18348. + condition. One of them should be bad. Inodes with identical inode numbers
  18349. + (objectids) are distinguished by their packing locality.
  18350. +
  18351. +*/
  18352. +static int reiser4_inode_find_actor(struct inode *inode /* inode from hash table
  18353. + * to check */ ,
  18354. + void *opaque /* "cookie" passed to
  18355. + * iget5_locked(). This
  18356. + * is stat-data key */)
  18357. +{
  18358. + reiser4_key *key;
  18359. +
  18360. + key = opaque;
  18361. + return
  18362. + /* oid is unique, so first term is enough, actually. */
  18363. + get_inode_oid(inode) == get_key_objectid(key) &&
  18364. + /*
  18365. + * also, locality should be checked, but locality is stored in
  18366. + * the reiser4-specific part of the inode, and actor can be
  18367. + * called against arbitrary inode that happened to be in this
  18368. + * hash chain. Hence we first have to check that this is
  18369. + * reiser4 inode at least. is_reiser4_inode() is probably too
  18370. + * early to call, as inode may have ->i_op not yet
  18371. + * initialised.
  18372. + */
  18373. + is_reiser4_super(inode->i_sb) &&
  18374. + /*
  18375. + * usually objectid is unique, but pseudo files use counter to
  18376. + * generate objectid. All pseudo files are placed into special
  18377. + * (otherwise unused) locality.
  18378. + */
  18379. + reiser4_inode_data(inode)->locality_id == get_key_locality(key);
  18380. +}
  18381. +
  18382. +/* hook for kmem_cache_create */
  18383. +void loading_init_once(reiser4_inode * info)
  18384. +{
  18385. + mutex_init(&info->loading);
  18386. +}
  18387. +
  18388. +/* for reiser4_alloc_inode */
  18389. +void loading_alloc(reiser4_inode * info)
  18390. +{
  18391. + assert("vs-1717", !mutex_is_locked(&info->loading));
  18392. +}
  18393. +
  18394. +/* for reiser4_destroy */
  18395. +void loading_destroy(reiser4_inode * info)
  18396. +{
  18397. + assert("vs-1717a", !mutex_is_locked(&info->loading));
  18398. +}
  18399. +
  18400. +static void loading_begin(reiser4_inode * info)
  18401. +{
  18402. + mutex_lock(&info->loading);
  18403. +}
  18404. +
  18405. +static void loading_end(reiser4_inode * info)
  18406. +{
  18407. + mutex_unlock(&info->loading);
  18408. +}
  18409. +
  18410. +/**
  18411. + * reiser4_iget - obtain inode via iget5_locked, read from disk if necessary
  18412. + * @super: super block of filesystem
  18413. + * @key: key of inode's stat-data
  18414. + * @silent:
  18415. + *
  18416. + * This is our helper function a la iget(). This is be called by
  18417. + * lookup_common() and reiser4_read_super(). Return inode locked or error
  18418. + * encountered.
  18419. + */
  18420. +struct inode *reiser4_iget(struct super_block *super, const reiser4_key *key,
  18421. + int silent)
  18422. +{
  18423. + struct inode *inode;
  18424. + int result;
  18425. + reiser4_inode *info;
  18426. +
  18427. + assert("nikita-302", super != NULL);
  18428. + assert("nikita-303", key != NULL);
  18429. +
  18430. + result = 0;
  18431. +
  18432. + /* call iget(). Our ->read_inode() is dummy, so this will either
  18433. + find inode in cache or return uninitialised inode */
  18434. + inode = iget5_locked(super,
  18435. + (unsigned long)get_key_objectid(key),
  18436. + reiser4_inode_find_actor,
  18437. + init_locked_inode, (reiser4_key *) key);
  18438. + if (inode == NULL)
  18439. + return ERR_PTR(RETERR(-ENOMEM));
  18440. + if (is_bad_inode(inode)) {
  18441. + warning("nikita-304", "Bad inode found");
  18442. + reiser4_print_key("key", key);
  18443. + iput(inode);
  18444. + return ERR_PTR(RETERR(-EIO));
  18445. + }
  18446. +
  18447. + info = reiser4_inode_data(inode);
  18448. +
  18449. + /* Reiser4 inode state bit REISER4_LOADED is used to distinguish fully
  18450. + loaded and initialized inode from just allocated inode. If
  18451. + REISER4_LOADED bit is not set, reiser4_iget() completes loading under
  18452. + info->loading. The place in reiser4 which uses not initialized inode
  18453. + is the reiser4 repacker, see repacker-related functions in
  18454. + plugin/item/extent.c */
  18455. + if (!is_inode_loaded(inode)) {
  18456. + loading_begin(info);
  18457. + if (!is_inode_loaded(inode)) {
  18458. + /* locking: iget5_locked returns locked inode */
  18459. + assert("nikita-1941", !is_inode_loaded(inode));
  18460. + assert("nikita-1949",
  18461. + reiser4_inode_find_actor(inode,
  18462. + (reiser4_key *) key));
  18463. + /* now, inode has objectid as ->i_ino and locality in
  18464. + reiser4-specific part. This is enough for
  18465. + read_inode() to read stat data from the disk */
  18466. + result = read_inode(inode, key, silent);
  18467. + } else
  18468. + loading_end(info);
  18469. + }
  18470. +
  18471. + if (inode->i_state & I_NEW)
  18472. + unlock_new_inode(inode);
  18473. +
  18474. + if (is_bad_inode(inode)) {
  18475. + assert("vs-1717", result != 0);
  18476. + loading_end(info);
  18477. + iput(inode);
  18478. + inode = ERR_PTR(result);
  18479. + } else if (REISER4_DEBUG) {
  18480. + reiser4_key found_key;
  18481. +
  18482. + assert("vs-1717", result == 0);
  18483. + build_sd_key(inode, &found_key);
  18484. + if (!keyeq(&found_key, key)) {
  18485. + warning("nikita-305", "Wrong key in sd");
  18486. + reiser4_print_key("sought for", key);
  18487. + reiser4_print_key("found", &found_key);
  18488. + }
  18489. + if (inode->i_nlink == 0) {
  18490. + warning("nikita-3559", "Unlinked inode found: %llu\n",
  18491. + (unsigned long long)get_inode_oid(inode));
  18492. + }
  18493. + }
  18494. + return inode;
  18495. +}
  18496. +
  18497. +/* reiser4_iget() may return not fully initialized inode, this function should
  18498. + * be called after one completes reiser4 inode initializing. */
  18499. +void reiser4_iget_complete(struct inode *inode)
  18500. +{
  18501. + assert("zam-988", is_reiser4_inode(inode));
  18502. +
  18503. + if (!is_inode_loaded(inode)) {
  18504. + reiser4_inode_set_flag(inode, REISER4_LOADED);
  18505. + loading_end(reiser4_inode_data(inode));
  18506. + }
  18507. +}
  18508. +
  18509. +void reiser4_make_bad_inode(struct inode *inode)
  18510. +{
  18511. + assert("nikita-1934", inode != NULL);
  18512. +
  18513. + /* clear LOADED bit */
  18514. + reiser4_inode_clr_flag(inode, REISER4_LOADED);
  18515. + make_bad_inode(inode);
  18516. + return;
  18517. +}
  18518. +
  18519. +file_plugin *inode_file_plugin(const struct inode *inode)
  18520. +{
  18521. + assert("nikita-1997", inode != NULL);
  18522. + return reiser4_inode_data(inode)->pset->file;
  18523. +}
  18524. +
  18525. +dir_plugin *inode_dir_plugin(const struct inode *inode)
  18526. +{
  18527. + assert("nikita-1998", inode != NULL);
  18528. + return reiser4_inode_data(inode)->pset->dir;
  18529. +}
  18530. +
  18531. +formatting_plugin *inode_formatting_plugin(const struct inode *inode)
  18532. +{
  18533. + assert("nikita-2000", inode != NULL);
  18534. + return reiser4_inode_data(inode)->pset->formatting;
  18535. +}
  18536. +
  18537. +hash_plugin *inode_hash_plugin(const struct inode *inode)
  18538. +{
  18539. + assert("nikita-2001", inode != NULL);
  18540. + return reiser4_inode_data(inode)->pset->hash;
  18541. +}
  18542. +
  18543. +fibration_plugin *inode_fibration_plugin(const struct inode *inode)
  18544. +{
  18545. + assert("nikita-2001", inode != NULL);
  18546. + return reiser4_inode_data(inode)->pset->fibration;
  18547. +}
  18548. +
  18549. +cipher_plugin *inode_cipher_plugin(const struct inode *inode)
  18550. +{
  18551. + assert("edward-36", inode != NULL);
  18552. + return reiser4_inode_data(inode)->pset->cipher;
  18553. +}
  18554. +
  18555. +compression_plugin *inode_compression_plugin(const struct inode *inode)
  18556. +{
  18557. + assert("edward-37", inode != NULL);
  18558. + return reiser4_inode_data(inode)->pset->compression;
  18559. +}
  18560. +
  18561. +compression_mode_plugin *inode_compression_mode_plugin(const struct inode *
  18562. + inode)
  18563. +{
  18564. + assert("edward-1330", inode != NULL);
  18565. + return reiser4_inode_data(inode)->pset->compression_mode;
  18566. +}
  18567. +
  18568. +cluster_plugin *inode_cluster_plugin(const struct inode *inode)
  18569. +{
  18570. + assert("edward-1328", inode != NULL);
  18571. + return reiser4_inode_data(inode)->pset->cluster;
  18572. +}
  18573. +
  18574. +file_plugin *inode_create_plugin(const struct inode *inode)
  18575. +{
  18576. + assert("edward-1329", inode != NULL);
  18577. + return reiser4_inode_data(inode)->pset->create;
  18578. +}
  18579. +
  18580. +digest_plugin *inode_digest_plugin(const struct inode *inode)
  18581. +{
  18582. + assert("edward-86", inode != NULL);
  18583. + return reiser4_inode_data(inode)->pset->digest;
  18584. +}
  18585. +
  18586. +item_plugin *inode_sd_plugin(const struct inode *inode)
  18587. +{
  18588. + assert("vs-534", inode != NULL);
  18589. + return reiser4_inode_data(inode)->pset->sd;
  18590. +}
  18591. +
  18592. +item_plugin *inode_dir_item_plugin(const struct inode *inode)
  18593. +{
  18594. + assert("vs-534", inode != NULL);
  18595. + return reiser4_inode_data(inode)->pset->dir_item;
  18596. +}
  18597. +
  18598. +file_plugin *child_create_plugin(const struct inode *inode)
  18599. +{
  18600. + assert("edward-1329", inode != NULL);
  18601. + return reiser4_inode_data(inode)->hset->create;
  18602. +}
  18603. +
  18604. +void inode_set_extension(struct inode *inode, sd_ext_bits ext)
  18605. +{
  18606. + reiser4_inode *state;
  18607. +
  18608. + assert("nikita-2716", inode != NULL);
  18609. + assert("nikita-2717", ext < LAST_SD_EXTENSION);
  18610. + assert("nikita-3491", spin_inode_is_locked(inode));
  18611. +
  18612. + state = reiser4_inode_data(inode);
  18613. + state->extmask |= 1 << ext;
  18614. + /* force re-calculation of stat-data length on next call to
  18615. + update_sd(). */
  18616. + reiser4_inode_clr_flag(inode, REISER4_SDLEN_KNOWN);
  18617. +}
  18618. +
  18619. +void inode_clr_extension(struct inode *inode, sd_ext_bits ext)
  18620. +{
  18621. + reiser4_inode *state;
  18622. +
  18623. + assert("vpf-1926", inode != NULL);
  18624. + assert("vpf-1927", ext < LAST_SD_EXTENSION);
  18625. + assert("vpf-1928", spin_inode_is_locked(inode));
  18626. +
  18627. + state = reiser4_inode_data(inode);
  18628. + state->extmask &= ~(1 << ext);
  18629. + /* force re-calculation of stat-data length on next call to
  18630. + update_sd(). */
  18631. + reiser4_inode_clr_flag(inode, REISER4_SDLEN_KNOWN);
  18632. +}
  18633. +
  18634. +void inode_check_scale_nolock(struct inode *inode, __u64 old, __u64 new)
  18635. +{
  18636. + assert("edward-1287", inode != NULL);
  18637. + if (!dscale_fit(old, new))
  18638. + reiser4_inode_clr_flag(inode, REISER4_SDLEN_KNOWN);
  18639. + return;
  18640. +}
  18641. +
  18642. +void inode_check_scale(struct inode *inode, __u64 old, __u64 new)
  18643. +{
  18644. + assert("nikita-2875", inode != NULL);
  18645. + spin_lock_inode(inode);
  18646. + inode_check_scale_nolock(inode, old, new);
  18647. + spin_unlock_inode(inode);
  18648. +}
  18649. +
  18650. +/*
  18651. + * initialize ->ordering field of inode. This field defines how file stat-data
  18652. + * and body is ordered within a tree with respect to other objects within the
  18653. + * same parent directory.
  18654. + */
  18655. +void
  18656. +init_inode_ordering(struct inode *inode,
  18657. + reiser4_object_create_data * crd, int create)
  18658. +{
  18659. + reiser4_key key;
  18660. +
  18661. + if (create) {
  18662. + struct inode *parent;
  18663. +
  18664. + parent = crd->parent;
  18665. + assert("nikita-3224", inode_dir_plugin(parent) != NULL);
  18666. + inode_dir_plugin(parent)->build_entry_key(parent,
  18667. + &crd->dentry->d_name,
  18668. + &key);
  18669. + } else {
  18670. + coord_t *coord;
  18671. +
  18672. + coord = &reiser4_inode_data(inode)->sd_coord;
  18673. + coord_clear_iplug(coord);
  18674. + /* safe to use ->sd_coord, because node is under long term
  18675. + * lock */
  18676. + WITH_DATA(coord->node, item_key_by_coord(coord, &key));
  18677. + }
  18678. +
  18679. + set_inode_ordering(inode, get_key_ordering(&key));
  18680. +}
  18681. +
  18682. +znode *inode_get_vroot(struct inode *inode)
  18683. +{
  18684. + reiser4_block_nr blk;
  18685. + znode *result;
  18686. +
  18687. + spin_lock_inode(inode);
  18688. + blk = reiser4_inode_data(inode)->vroot;
  18689. + spin_unlock_inode(inode);
  18690. + if (!disk_addr_eq(&UBER_TREE_ADDR, &blk))
  18691. + result = zlook(reiser4_tree_by_inode(inode), &blk);
  18692. + else
  18693. + result = NULL;
  18694. + return result;
  18695. +}
  18696. +
  18697. +void inode_set_vroot(struct inode *inode, znode *vroot)
  18698. +{
  18699. + spin_lock_inode(inode);
  18700. + reiser4_inode_data(inode)->vroot = *znode_get_block(vroot);
  18701. + spin_unlock_inode(inode);
  18702. +}
  18703. +
  18704. +#if REISER4_DEBUG
  18705. +
  18706. +void reiser4_inode_invariant(const struct inode *inode)
  18707. +{
  18708. + assert("nikita-3077", spin_inode_is_locked(inode));
  18709. +}
  18710. +
  18711. +int inode_has_no_jnodes(reiser4_inode * r4_inode)
  18712. +{
  18713. + return jnode_tree_by_reiser4_inode(r4_inode)->rnode == NULL &&
  18714. + r4_inode->nr_jnodes == 0;
  18715. +}
  18716. +
  18717. +#endif
  18718. +
  18719. +/* true if directory is empty (only contains dot and dotdot) */
  18720. +/* FIXME: shouldn't it be dir plugin method? */
  18721. +int is_dir_empty(const struct inode *dir)
  18722. +{
  18723. + assert("nikita-1976", dir != NULL);
  18724. +
  18725. + /* rely on our method to maintain directory i_size being equal to the
  18726. + number of entries. */
  18727. + return dir->i_size <= 2 ? 0 : RETERR(-ENOTEMPTY);
  18728. +}
  18729. +
  18730. +/* Make Linus happy.
  18731. + Local variables:
  18732. + c-indentation-style: "K&R"
  18733. + mode-name: "LC"
  18734. + c-basic-offset: 8
  18735. + tab-width: 8
  18736. + fill-column: 120
  18737. + End:
  18738. +*/
  18739. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/inode.h linux-4.14.2/fs/reiser4/inode.h
  18740. --- linux-4.14.2.orig/fs/reiser4/inode.h 1970-01-01 01:00:00.000000000 +0100
  18741. +++ linux-4.14.2/fs/reiser4/inode.h 2017-11-26 22:13:09.000000000 +0100
  18742. @@ -0,0 +1,506 @@
  18743. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  18744. + reiser4/README */
  18745. +
  18746. +/* Inode functions. */
  18747. +
  18748. +#if !defined(__REISER4_INODE_H__)
  18749. +#define __REISER4_INODE_H__
  18750. +
  18751. +#include "forward.h"
  18752. +#include "debug.h"
  18753. +#include "key.h"
  18754. +#include "seal.h"
  18755. +#include "plugin/plugin.h"
  18756. +#include "plugin/file/cryptcompress.h"
  18757. +#include "plugin/file/file.h"
  18758. +#include "plugin/dir/dir.h"
  18759. +#include "plugin/plugin_set.h"
  18760. +#include "plugin/security/perm.h"
  18761. +#include "vfs_ops.h"
  18762. +#include "jnode.h"
  18763. +#include "fsdata.h"
  18764. +
  18765. +#include <linux/types.h> /* for __u?? , ino_t */
  18766. +#include <linux/fs.h> /* for struct super_block, struct
  18767. + * rw_semaphore, etc */
  18768. +#include <linux/spinlock.h>
  18769. +#include <asm/types.h>
  18770. +
  18771. +/* reiser4-specific inode flags. They are "transient" and are not
  18772. + supposed to be stored on disk. Used to trace "state" of
  18773. + inode
  18774. +*/
  18775. +typedef enum {
  18776. + /* this is light-weight inode, inheriting some state from its
  18777. + parent */
  18778. + REISER4_LIGHT_WEIGHT = 0,
  18779. + /* stat data wasn't yet created */
  18780. + REISER4_NO_SD = 1,
  18781. + /* internal immutable flag. Currently is only used
  18782. + to avoid race condition during file creation.
  18783. + See comment in create_object(). */
  18784. + REISER4_IMMUTABLE = 2,
  18785. + /* inode was read from storage */
  18786. + REISER4_LOADED = 3,
  18787. + /* this bit is set for symlinks. inode->i_private points to target
  18788. + name of symlink. */
  18789. + REISER4_GENERIC_PTR_USED = 4,
  18790. + /* set if size of stat-data item for this inode is known. If this is
  18791. + * set we can avoid recalculating size of stat-data on each update. */
  18792. + REISER4_SDLEN_KNOWN = 5,
  18793. + /* reiser4_inode->crypt points to the crypto stat */
  18794. + REISER4_CRYPTO_STAT_LOADED = 6,
  18795. + /* cryptcompress_inode_data points to the secret key */
  18796. + REISER4_SECRET_KEY_INSTALLED = 7,
  18797. + /* File (possibly) has pages corresponding to the tail items, that
  18798. + * were created by ->readpage. It is set by mmap_unix_file() and
  18799. + * sendfile_unix_file(). This bit is inspected by write_unix_file and
  18800. + * kill-hook of tail items. It is never cleared once set. This bit is
  18801. + * modified and inspected under i_mutex. */
  18802. + REISER4_HAS_MMAP = 8,
  18803. + REISER4_PART_MIXED = 9,
  18804. + REISER4_PART_IN_CONV = 10,
  18805. + /* This flag indicates that file plugin conversion is in progress */
  18806. + REISER4_FILE_CONV_IN_PROGRESS = 11
  18807. +} reiser4_file_plugin_flags;
  18808. +
  18809. +/* state associated with each inode.
  18810. + reiser4 inode.
  18811. +
  18812. + NOTE-NIKITA In 2.5 kernels it is not necessary that all file-system inodes
  18813. + be of the same size. File-system allocates inodes by itself through
  18814. + s_op->allocate_inode() method. So, it is possible to adjust size of inode
  18815. + at the time of its creation.
  18816. +
  18817. + Invariants involving parts of this data-type:
  18818. +
  18819. + [inode->eflushed]
  18820. +
  18821. +*/
  18822. +
  18823. +typedef struct reiser4_inode reiser4_inode;
  18824. +/* return pointer to reiser4-specific part of inode */
  18825. +static inline reiser4_inode *reiser4_inode_data(const struct inode *inode
  18826. + /* inode queried */ );
  18827. +
  18828. +#if BITS_PER_LONG == 64
  18829. +
  18830. +#define REISER4_INO_IS_OID (1)
  18831. +typedef struct {;
  18832. +} oid_hi_t;
  18833. +
  18834. +/* BITS_PER_LONG == 64 */
  18835. +#else
  18836. +
  18837. +#define REISER4_INO_IS_OID (0)
  18838. +typedef __u32 oid_hi_t;
  18839. +
  18840. +/* BITS_PER_LONG == 64 */
  18841. +#endif
  18842. +
  18843. +struct reiser4_inode {
  18844. + /* spin lock protecting fields of this structure. */
  18845. + spinlock_t guard;
  18846. + /* main plugin set that control the file
  18847. + (see comments in plugin/plugin_set.c) */
  18848. + plugin_set *pset;
  18849. + /* plugin set for inheritance
  18850. + (see comments in plugin/plugin_set.c) */
  18851. + plugin_set *hset;
  18852. + /* high 32 bits of object id */
  18853. + oid_hi_t oid_hi;
  18854. + /* seal for stat-data */
  18855. + seal_t sd_seal;
  18856. + /* locality id for this file */
  18857. + oid_t locality_id;
  18858. +#if REISER4_LARGE_KEY
  18859. + __u64 ordering;
  18860. +#endif
  18861. + /* coord of stat-data in sealed node */
  18862. + coord_t sd_coord;
  18863. + /* bit-mask of stat-data extentions used by this file */
  18864. + __u64 extmask;
  18865. + /* bitmask of non-default plugins for this inode */
  18866. + __u16 plugin_mask;
  18867. + /* bitmask of set heir plugins for this inode. */
  18868. + __u16 heir_mask;
  18869. + union {
  18870. + struct list_head readdir_list;
  18871. + struct list_head not_used;
  18872. + } lists;
  18873. + /* per-inode flags. Filled by values of reiser4_file_plugin_flags */
  18874. + unsigned long flags;
  18875. + union {
  18876. + /* fields specific to unix_file plugin */
  18877. + struct unix_file_info unix_file_info;
  18878. + /* fields specific to cryptcompress file plugin */
  18879. + struct cryptcompress_info cryptcompress_info;
  18880. + } file_plugin_data;
  18881. +
  18882. + /* this semaphore is to serialize readers and writers of @pset->file
  18883. + * when file plugin conversion is enabled
  18884. + */
  18885. + struct rw_semaphore conv_sem;
  18886. +
  18887. + /* tree of jnodes. Phantom jnodes (ones not attched to any atom) are
  18888. + tagged in that tree by EFLUSH_TAG_ANONYMOUS */
  18889. + struct radix_tree_root jnodes_tree;
  18890. +#if REISER4_DEBUG
  18891. + /* number of unformatted node jnodes of this file in jnode hash table */
  18892. + unsigned long nr_jnodes;
  18893. +#endif
  18894. +
  18895. + /* block number of virtual root for this object. See comment above
  18896. + * fs/reiser4/search.c:handle_vroot() */
  18897. + reiser4_block_nr vroot;
  18898. + struct mutex loading;
  18899. +};
  18900. +
  18901. +void loading_init_once(reiser4_inode *);
  18902. +void loading_alloc(reiser4_inode *);
  18903. +void loading_destroy(reiser4_inode *);
  18904. +
  18905. +struct reiser4_inode_object {
  18906. + /* private part */
  18907. + reiser4_inode p;
  18908. + /* generic fields not specific to reiser4, but used by VFS */
  18909. + struct inode vfs_inode;
  18910. +};
  18911. +
  18912. +/* return pointer to the reiser4 specific portion of @inode */
  18913. +static inline reiser4_inode *reiser4_inode_data(const struct inode *inode
  18914. + /* inode queried */ )
  18915. +{
  18916. + assert("nikita-254", inode != NULL);
  18917. + return &container_of(inode, struct reiser4_inode_object, vfs_inode)->p;
  18918. +}
  18919. +
  18920. +static inline struct inode *inode_by_reiser4_inode(const reiser4_inode *
  18921. + r4_inode /* inode queried */
  18922. + )
  18923. +{
  18924. + return &container_of(r4_inode, struct reiser4_inode_object,
  18925. + p)->vfs_inode;
  18926. +}
  18927. +
  18928. +/*
  18929. + * reiser4 inodes are identified by 64bit object-id (oid_t), but in struct
  18930. + * inode ->i_ino field is of type ino_t (long) that can be either 32 or 64
  18931. + * bits.
  18932. + *
  18933. + * If ->i_ino is 32 bits we store remaining 32 bits in reiser4 specific part
  18934. + * of inode, otherwise whole oid is stored in i_ino.
  18935. + *
  18936. + * Wrappers below ([sg]et_inode_oid()) are used to hide this difference.
  18937. + */
  18938. +
  18939. +#define OID_HI_SHIFT (sizeof(ino_t) * 8)
  18940. +
  18941. +#if REISER4_INO_IS_OID
  18942. +
  18943. +static inline oid_t get_inode_oid(const struct inode *inode)
  18944. +{
  18945. + return inode->i_ino;
  18946. +}
  18947. +
  18948. +static inline void set_inode_oid(struct inode *inode, oid_t oid)
  18949. +{
  18950. + inode->i_ino = oid;
  18951. +}
  18952. +
  18953. +/* REISER4_INO_IS_OID */
  18954. +#else
  18955. +
  18956. +static inline oid_t get_inode_oid(const struct inode *inode)
  18957. +{
  18958. + return
  18959. + ((__u64) reiser4_inode_data(inode)->oid_hi << OID_HI_SHIFT) |
  18960. + inode->i_ino;
  18961. +}
  18962. +
  18963. +static inline void set_inode_oid(struct inode *inode, oid_t oid)
  18964. +{
  18965. + assert("nikita-2519", inode != NULL);
  18966. + inode->i_ino = (ino_t) (oid);
  18967. + reiser4_inode_data(inode)->oid_hi = (oid) >> OID_HI_SHIFT;
  18968. + assert("nikita-2521", get_inode_oid(inode) == (oid));
  18969. +}
  18970. +
  18971. +/* REISER4_INO_IS_OID */
  18972. +#endif
  18973. +
  18974. +static inline oid_t get_inode_locality(const struct inode *inode)
  18975. +{
  18976. + return reiser4_inode_data(inode)->locality_id;
  18977. +}
  18978. +
  18979. +#if REISER4_LARGE_KEY
  18980. +static inline __u64 get_inode_ordering(const struct inode *inode)
  18981. +{
  18982. + return reiser4_inode_data(inode)->ordering;
  18983. +}
  18984. +
  18985. +static inline void set_inode_ordering(const struct inode *inode, __u64 ordering)
  18986. +{
  18987. + reiser4_inode_data(inode)->ordering = ordering;
  18988. +}
  18989. +
  18990. +#else
  18991. +
  18992. +#define get_inode_ordering(inode) (0)
  18993. +#define set_inode_ordering(inode, val) noop
  18994. +
  18995. +#endif
  18996. +
  18997. +/* return inode in which @uf_info is embedded */
  18998. +static inline struct inode *
  18999. +unix_file_info_to_inode(const struct unix_file_info *uf_info)
  19000. +{
  19001. + return &container_of(uf_info, struct reiser4_inode_object,
  19002. + p.file_plugin_data.unix_file_info)->vfs_inode;
  19003. +}
  19004. +
  19005. +extern ino_t oid_to_ino(oid_t oid) __attribute__ ((const));
  19006. +extern ino_t oid_to_uino(oid_t oid) __attribute__ ((const));
  19007. +
  19008. +extern reiser4_tree *reiser4_tree_by_inode(const struct inode *inode);
  19009. +
  19010. +#if REISER4_DEBUG
  19011. +extern void reiser4_inode_invariant(const struct inode *inode);
  19012. +extern int inode_has_no_jnodes(reiser4_inode *);
  19013. +#else
  19014. +#define reiser4_inode_invariant(inode) noop
  19015. +#endif
  19016. +
  19017. +static inline int spin_inode_is_locked(const struct inode *inode)
  19018. +{
  19019. + assert_spin_locked(&reiser4_inode_data(inode)->guard);
  19020. + return 1;
  19021. +}
  19022. +
  19023. +/**
  19024. + * spin_lock_inode - lock reiser4_inode' embedded spinlock
  19025. + * @inode: inode to lock
  19026. + *
  19027. + * In debug mode it checks that lower priority locks are not held and
  19028. + * increments reiser4_context's lock counters on which lock ordering checking
  19029. + * is based.
  19030. + */
  19031. +static inline void spin_lock_inode(struct inode *inode)
  19032. +{
  19033. + assert("", LOCK_CNT_NIL(spin_locked));
  19034. + /* check lock ordering */
  19035. + assert_spin_not_locked(&d_c_lock);
  19036. +
  19037. + spin_lock(&reiser4_inode_data(inode)->guard);
  19038. +
  19039. + LOCK_CNT_INC(spin_locked_inode);
  19040. + LOCK_CNT_INC(spin_locked);
  19041. +
  19042. + reiser4_inode_invariant(inode);
  19043. +}
  19044. +
  19045. +/**
  19046. + * spin_unlock_inode - unlock reiser4_inode' embedded spinlock
  19047. + * @inode: inode to unlock
  19048. + *
  19049. + * In debug mode it checks that spinlock is held and decrements
  19050. + * reiser4_context's lock counters on which lock ordering checking is based.
  19051. + */
  19052. +static inline void spin_unlock_inode(struct inode *inode)
  19053. +{
  19054. + assert_spin_locked(&reiser4_inode_data(inode)->guard);
  19055. + assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_inode));
  19056. + assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
  19057. +
  19058. + reiser4_inode_invariant(inode);
  19059. +
  19060. + LOCK_CNT_DEC(spin_locked_inode);
  19061. + LOCK_CNT_DEC(spin_locked);
  19062. +
  19063. + spin_unlock(&reiser4_inode_data(inode)->guard);
  19064. +}
  19065. +
  19066. +extern znode *inode_get_vroot(struct inode *inode);
  19067. +extern void inode_set_vroot(struct inode *inode, znode * vroot);
  19068. +
  19069. +extern int reiser4_max_filename_len(const struct inode *inode);
  19070. +extern int max_hash_collisions(const struct inode *dir);
  19071. +extern void reiser4_unlock_inode(struct inode *inode);
  19072. +extern int is_reiser4_inode(const struct inode *inode);
  19073. +extern int setup_inode_ops(struct inode *inode, reiser4_object_create_data *);
  19074. +extern struct inode *reiser4_iget(struct super_block *super,
  19075. + const reiser4_key * key, int silent);
  19076. +extern void reiser4_iget_complete(struct inode *inode);
  19077. +extern void reiser4_inode_set_flag(struct inode *inode,
  19078. + reiser4_file_plugin_flags f);
  19079. +extern void reiser4_inode_clr_flag(struct inode *inode,
  19080. + reiser4_file_plugin_flags f);
  19081. +extern int reiser4_inode_get_flag(const struct inode *inode,
  19082. + reiser4_file_plugin_flags f);
  19083. +
  19084. +/* has inode been initialized? */
  19085. +static inline int
  19086. +is_inode_loaded(const struct inode *inode/* inode queried */)
  19087. +{
  19088. + assert("nikita-1120", inode != NULL);
  19089. + return reiser4_inode_get_flag(inode, REISER4_LOADED);
  19090. +}
  19091. +
  19092. +extern file_plugin *inode_file_plugin(const struct inode *inode);
  19093. +extern dir_plugin *inode_dir_plugin(const struct inode *inode);
  19094. +extern formatting_plugin *inode_formatting_plugin(const struct inode *inode);
  19095. +extern hash_plugin *inode_hash_plugin(const struct inode *inode);
  19096. +extern fibration_plugin *inode_fibration_plugin(const struct inode *inode);
  19097. +extern cipher_plugin *inode_cipher_plugin(const struct inode *inode);
  19098. +extern digest_plugin *inode_digest_plugin(const struct inode *inode);
  19099. +extern compression_plugin *inode_compression_plugin(const struct inode *inode);
  19100. +extern compression_mode_plugin *inode_compression_mode_plugin(const struct inode
  19101. + *inode);
  19102. +extern cluster_plugin *inode_cluster_plugin(const struct inode *inode);
  19103. +extern file_plugin *inode_create_plugin(const struct inode *inode);
  19104. +extern item_plugin *inode_sd_plugin(const struct inode *inode);
  19105. +extern item_plugin *inode_dir_item_plugin(const struct inode *inode);
  19106. +extern file_plugin *child_create_plugin(const struct inode *inode);
  19107. +
  19108. +extern void reiser4_make_bad_inode(struct inode *inode);
  19109. +
  19110. +extern void inode_set_extension(struct inode *inode, sd_ext_bits ext);
  19111. +extern void inode_clr_extension(struct inode *inode, sd_ext_bits ext);
  19112. +extern void inode_check_scale(struct inode *inode, __u64 old, __u64 new);
  19113. +extern void inode_check_scale_nolock(struct inode *inode, __u64 old, __u64 new);
  19114. +
  19115. +#define INODE_SET_SIZE(i, value) \
  19116. +({ \
  19117. + struct inode *__i; \
  19118. + typeof(value) __v; \
  19119. + \
  19120. + __i = (i); \
  19121. + __v = (value); \
  19122. + inode_check_scale(__i, __i->i_size, __v); \
  19123. + i_size_write(__i, __v); \
  19124. +})
  19125. +
  19126. +/*
  19127. + * update field @field in inode @i to contain value @value.
  19128. + */
  19129. +#define INODE_SET_FIELD(i, field, value) \
  19130. +({ \
  19131. + struct inode *__i; \
  19132. + typeof(value) __v; \
  19133. + \
  19134. + __i = (i); \
  19135. + __v = (value); \
  19136. + inode_check_scale(__i, __i->field, __v); \
  19137. + __i->field = __v; \
  19138. +})
  19139. +
  19140. +#define INODE_INC_FIELD(i, field) \
  19141. +({ \
  19142. + struct inode *__i; \
  19143. + \
  19144. + __i = (i); \
  19145. + inode_check_scale(__i, __i->field, __i->field + 1); \
  19146. + ++ __i->field; \
  19147. +})
  19148. +
  19149. +#define INODE_DEC_FIELD(i, field) \
  19150. +({ \
  19151. + struct inode *__i; \
  19152. + \
  19153. + __i = (i); \
  19154. + inode_check_scale(__i, __i->field, __i->field - 1); \
  19155. + -- __i->field; \
  19156. +})
  19157. +
  19158. +/*
  19159. + * Update field i_nlink in inode @i using library function @op.
  19160. + */
  19161. +#define INODE_SET_NLINK(i, value) \
  19162. +({ \
  19163. + struct inode *__i; \
  19164. + typeof(value) __v; \
  19165. + \
  19166. + __i = (i); \
  19167. + __v = (value); \
  19168. + inode_check_scale(__i, __i->i_nlink, __v); \
  19169. + set_nlink(__i, __v); \
  19170. +})
  19171. +
  19172. +#define INODE_INC_NLINK(i) \
  19173. + ({ \
  19174. + struct inode *__i; \
  19175. + \
  19176. + __i = (i); \
  19177. + inode_check_scale(__i, __i->i_nlink, __i->i_nlink + 1); \
  19178. + inc_nlink(__i); \
  19179. +})
  19180. +
  19181. +#define INODE_DROP_NLINK(i) \
  19182. + ({ \
  19183. + struct inode *__i; \
  19184. + \
  19185. + __i = (i); \
  19186. + inode_check_scale(__i, __i->i_nlink, __i->i_nlink - 1); \
  19187. + drop_nlink(__i); \
  19188. +})
  19189. +
  19190. +#define INODE_CLEAR_NLINK(i) \
  19191. + ({ \
  19192. + struct inode *__i; \
  19193. + \
  19194. + __i = (i); \
  19195. + inode_check_scale(__i, __i->i_nlink, 0); \
  19196. + clear_nlink(__i); \
  19197. +})
  19198. +
  19199. +
  19200. +static inline void inode_add_blocks(struct inode *inode, __u64 blocks)
  19201. +{
  19202. + inode_add_bytes(inode, blocks << inode->i_blkbits);
  19203. +}
  19204. +
  19205. +static inline void inode_sub_blocks(struct inode *inode, __u64 blocks)
  19206. +{
  19207. + inode_sub_bytes(inode, blocks << inode->i_blkbits);
  19208. +}
  19209. +
  19210. +
  19211. +/* See comment before reiser4_readdir_common() for description. */
  19212. +static inline struct list_head *get_readdir_list(const struct inode *inode)
  19213. +{
  19214. + return &reiser4_inode_data(inode)->lists.readdir_list;
  19215. +}
  19216. +
  19217. +extern void init_inode_ordering(struct inode *inode,
  19218. + reiser4_object_create_data * crd, int create);
  19219. +
  19220. +static inline struct radix_tree_root *jnode_tree_by_inode(struct inode *inode)
  19221. +{
  19222. + return &reiser4_inode_data(inode)->jnodes_tree;
  19223. +}
  19224. +
  19225. +static inline struct radix_tree_root *jnode_tree_by_reiser4_inode(reiser4_inode
  19226. + *r4_inode)
  19227. +{
  19228. + return &r4_inode->jnodes_tree;
  19229. +}
  19230. +
  19231. +#if REISER4_DEBUG
  19232. +extern void print_inode(const char *prefix, const struct inode *i);
  19233. +#endif
  19234. +
  19235. +int is_dir_empty(const struct inode *);
  19236. +
  19237. +/* __REISER4_INODE_H__ */
  19238. +#endif
  19239. +
  19240. +/* Make Linus happy.
  19241. + Local variables:
  19242. + c-indentation-style: "K&R"
  19243. + mode-name: "LC"
  19244. + c-basic-offset: 8
  19245. + tab-width: 8
  19246. + fill-column: 120
  19247. + End:
  19248. +*/
  19249. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/ioctl.h linux-4.14.2/fs/reiser4/ioctl.h
  19250. --- linux-4.14.2.orig/fs/reiser4/ioctl.h 1970-01-01 01:00:00.000000000 +0100
  19251. +++ linux-4.14.2/fs/reiser4/ioctl.h 2017-11-26 22:13:09.000000000 +0100
  19252. @@ -0,0 +1,41 @@
  19253. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  19254. + * reiser4/README */
  19255. +
  19256. +#if !defined(__REISER4_IOCTL_H__)
  19257. +#define __REISER4_IOCTL_H__
  19258. +
  19259. +#include <linux/fs.h>
  19260. +
  19261. +/*
  19262. + * ioctl(2) command used to "unpack" reiser4 file, that is, convert it into
  19263. + * extents and fix in this state. This is used by applications that rely on
  19264. + *
  19265. + * . files being block aligned, and
  19266. + *
  19267. + * . files never migrating on disk
  19268. + *
  19269. + * for example, boot loaders (LILO) need this.
  19270. + *
  19271. + * This ioctl should be used as
  19272. + *
  19273. + * result = ioctl(fd, REISER4_IOC_UNPACK);
  19274. + *
  19275. + * File behind fd descriptor will be converted to the extents (if necessary),
  19276. + * and its stat-data will be updated so that it will never be converted back
  19277. + * into tails again.
  19278. + */
  19279. +#define REISER4_IOC_UNPACK _IOW(0xCD, 1, long)
  19280. +
  19281. +/* __REISER4_IOCTL_H__ */
  19282. +#endif
  19283. +
  19284. +/* Make Linus happy.
  19285. + Local variables:
  19286. + c-indentation-style: "K&R"
  19287. + mode-name: "LC"
  19288. + c-basic-offset: 8
  19289. + tab-width: 8
  19290. + fill-column: 120
  19291. + scroll-step: 1
  19292. + End:
  19293. +*/
  19294. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/jnode.c linux-4.14.2/fs/reiser4/jnode.c
  19295. --- linux-4.14.2.orig/fs/reiser4/jnode.c 1970-01-01 01:00:00.000000000 +0100
  19296. +++ linux-4.14.2/fs/reiser4/jnode.c 2017-11-26 22:13:09.000000000 +0100
  19297. @@ -0,0 +1,1905 @@
  19298. +/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
  19299. + * reiser4/README */
  19300. +/* Jnode manipulation functions. */
  19301. +/* Jnode is entity used to track blocks with data and meta-data in reiser4.
  19302. +
  19303. + In particular, jnodes are used to track transactional information
  19304. + associated with each block. Each znode contains jnode as ->zjnode field.
  19305. +
  19306. + Jnode stands for either Josh or Journal node.
  19307. +*/
  19308. +
  19309. +/*
  19310. + * Taxonomy.
  19311. + *
  19312. + * Jnode represents block containing data or meta-data. There are jnodes
  19313. + * for:
  19314. + *
  19315. + * unformatted blocks (jnodes proper). There are plans, however to
  19316. + * have a handle per extent unit rather than per each unformatted
  19317. + * block, because there are so many of them.
  19318. + *
  19319. + * For bitmaps. Each bitmap is actually represented by two jnodes--one
  19320. + * for working and another for "commit" data, together forming bnode.
  19321. + *
  19322. + * For io-heads. These are used by log writer.
  19323. + *
  19324. + * For formatted nodes (znode). See comment at the top of znode.c for
  19325. + * details specific to the formatted nodes (znodes).
  19326. + *
  19327. + * Node data.
  19328. + *
  19329. + * Jnode provides access to the data of node it represents. Data are
  19330. + * stored in a page. Page is kept in a page cache. This means, that jnodes
  19331. + * are highly interconnected with page cache and VM internals.
  19332. + *
  19333. + * jnode has a pointer to page (->pg) containing its data. Pointer to data
  19334. + * themselves is cached in ->data field to avoid frequent calls to
  19335. + * page_address().
  19336. + *
  19337. + * jnode and page are attached to each other by jnode_attach_page(). This
  19338. + * function places pointer to jnode in set_page_private(), sets PG_private
  19339. + * flag and increments page counter.
  19340. + *
  19341. + * Opposite operation is performed by page_clear_jnode().
  19342. + *
  19343. + * jnode->pg is protected by jnode spin lock, and page->private is
  19344. + * protected by page lock. See comment at the top of page_cache.c for
  19345. + * more.
  19346. + *
  19347. + * page can be detached from jnode for two reasons:
  19348. + *
  19349. + * . jnode is removed from a tree (file is truncated, of formatted
  19350. + * node is removed by balancing).
  19351. + *
  19352. + * . during memory pressure, VM calls ->releasepage() method
  19353. + * (reiser4_releasepage()) to evict page from memory.
  19354. + *
  19355. + * (there, of course, is also umount, but this is special case we are not
  19356. + * concerned with here).
  19357. + *
  19358. + * To protect jnode page from eviction, one calls jload() function that
  19359. + * "pins" page in memory (loading it if necessary), increments
  19360. + * jnode->d_count, and kmap()s page. Page is unpinned through call to
  19361. + * jrelse().
  19362. + *
  19363. + * Jnode life cycle.
  19364. + *
  19365. + * jnode is created, placed in hash table, and, optionally, in per-inode
  19366. + * radix tree. Page can be attached to jnode, pinned, released, etc.
  19367. + *
  19368. + * When jnode is captured into atom its reference counter is
  19369. + * increased. While being part of an atom, jnode can be "early
  19370. + * flushed". This means that as part of flush procedure, jnode is placed
  19371. + * into "relocate set", and its page is submitted to the disk. After io
  19372. + * completes, page can be detached, then loaded again, re-dirtied, etc.
  19373. + *
  19374. + * Thread acquired reference to jnode by calling jref() and releases it by
  19375. + * jput(). When last reference is removed, jnode is still retained in
  19376. + * memory (cached) if it has page attached, _unless_ it is scheduled for
  19377. + * destruction (has JNODE_HEARD_BANSHEE bit set).
  19378. + *
  19379. + * Tree read-write lock was used as "existential" lock for jnodes. That is,
  19380. + * jnode->x_count could be changed from 0 to 1 only under tree write lock,
  19381. + * that is, tree lock protected unreferenced jnodes stored in the hash
  19382. + * table, from recycling.
  19383. + *
  19384. + * This resulted in high contention on tree lock, because jref()/jput() is
  19385. + * frequent operation. To ameliorate this problem, RCU is used: when jput()
  19386. + * is just about to release last reference on jnode it sets JNODE_RIP bit
  19387. + * on it, and then proceed with jnode destruction (removing jnode from hash
  19388. + * table, cbk_cache, detaching page, etc.). All places that change jnode
  19389. + * reference counter from 0 to 1 (jlookup(), zlook(), zget(), and
  19390. + * cbk_cache_scan_slots()) check for JNODE_RIP bit (this is done by
  19391. + * jnode_rip_check() function), and pretend that nothing was found in hash
  19392. + * table if bit is set.
  19393. + *
  19394. + * jput defers actual return of jnode into slab cache to some later time
  19395. + * (by call_rcu()), this guarantees that other threads can safely continue
  19396. + * working with JNODE_RIP-ped jnode.
  19397. + *
  19398. + */
  19399. +
  19400. +#include "reiser4.h"
  19401. +#include "debug.h"
  19402. +#include "dformat.h"
  19403. +#include "jnode.h"
  19404. +#include "plugin/plugin_header.h"
  19405. +#include "plugin/plugin.h"
  19406. +#include "txnmgr.h"
  19407. +/*#include "jnode.h"*/
  19408. +#include "znode.h"
  19409. +#include "tree.h"
  19410. +#include "tree_walk.h"
  19411. +#include "super.h"
  19412. +#include "inode.h"
  19413. +#include "page_cache.h"
  19414. +
  19415. +#include <asm/uaccess.h> /* UML needs this for PAGE_OFFSET */
  19416. +#include <linux/types.h>
  19417. +#include <linux/slab.h>
  19418. +#include <linux/pagemap.h>
  19419. +#include <linux/swap.h>
  19420. +#include <linux/fs.h> /* for struct address_space */
  19421. +#include <linux/writeback.h> /* for inode_wb_list_lock */
  19422. +
  19423. +static struct kmem_cache *_jnode_slab = NULL;
  19424. +
  19425. +static void jnode_set_type(jnode * node, jnode_type type);
  19426. +static int jdelete(jnode * node);
  19427. +static int jnode_try_drop(jnode * node);
  19428. +
  19429. +#if REISER4_DEBUG
  19430. +static int jnode_invariant(jnode * node, int tlocked, int jlocked);
  19431. +#endif
  19432. +
  19433. +/* true if valid page is attached to jnode */
  19434. +static inline int jnode_is_parsed(jnode * node)
  19435. +{
  19436. + return JF_ISSET(node, JNODE_PARSED);
  19437. +}
  19438. +
  19439. +/* hash table support */
  19440. +
  19441. +/* compare two jnode keys for equality. Used by hash-table macros */
  19442. +static inline int jnode_key_eq(const struct jnode_key *k1,
  19443. + const struct jnode_key *k2)
  19444. +{
  19445. + assert("nikita-2350", k1 != NULL);
  19446. + assert("nikita-2351", k2 != NULL);
  19447. +
  19448. + return (k1->index == k2->index && k1->objectid == k2->objectid);
  19449. +}
  19450. +
  19451. +/* Hash jnode by its key (inode plus offset). Used by hash-table macros */
  19452. +static inline __u32 jnode_key_hashfn(j_hash_table * table,
  19453. + const struct jnode_key *key)
  19454. +{
  19455. + assert("nikita-2352", key != NULL);
  19456. + assert("nikita-3346", IS_POW(table->_buckets));
  19457. +
  19458. + /* yes, this is remarkable simply (where not stupid) hash function. */
  19459. + return (key->objectid + key->index) & (table->_buckets - 1);
  19460. +}
  19461. +
  19462. +/* The hash table definition */
  19463. +#define KMALLOC(size) reiser4_vmalloc(size)
  19464. +#define KFREE(ptr, size) vfree(ptr)
  19465. +TYPE_SAFE_HASH_DEFINE(j, jnode, struct jnode_key, key.j, link.j,
  19466. + jnode_key_hashfn, jnode_key_eq);
  19467. +#undef KFREE
  19468. +#undef KMALLOC
  19469. +
  19470. +/* call this to initialise jnode hash table */
  19471. +int jnodes_tree_init(reiser4_tree * tree/* tree to initialise jnodes for */)
  19472. +{
  19473. + assert("nikita-2359", tree != NULL);
  19474. + return j_hash_init(&tree->jhash_table, 16384);
  19475. +}
  19476. +
  19477. +/* call this to destroy jnode hash table. This is called during umount. */
  19478. +int jnodes_tree_done(reiser4_tree * tree/* tree to destroy jnodes for */)
  19479. +{
  19480. + j_hash_table *jtable;
  19481. + jnode *node;
  19482. + jnode *next;
  19483. +
  19484. + assert("nikita-2360", tree != NULL);
  19485. +
  19486. + /*
  19487. + * Scan hash table and free all jnodes.
  19488. + */
  19489. + jtable = &tree->jhash_table;
  19490. + if (jtable->_table) {
  19491. + for_all_in_htable(jtable, j, node, next) {
  19492. + assert("nikita-2361", !atomic_read(&node->x_count));
  19493. + jdrop(node);
  19494. + }
  19495. +
  19496. + j_hash_done(&tree->jhash_table);
  19497. + }
  19498. + return 0;
  19499. +}
  19500. +
  19501. +/**
  19502. + * init_jnodes - create jnode cache
  19503. + *
  19504. + * Initializes slab cache jnodes. It is part of reiser4 module initialization.
  19505. + */
  19506. +int init_jnodes(void)
  19507. +{
  19508. + assert("umka-168", _jnode_slab == NULL);
  19509. +
  19510. + _jnode_slab = kmem_cache_create("jnode", sizeof(jnode), 0,
  19511. + SLAB_HWCACHE_ALIGN |
  19512. + SLAB_RECLAIM_ACCOUNT, NULL);
  19513. + if (_jnode_slab == NULL)
  19514. + return RETERR(-ENOMEM);
  19515. +
  19516. + return 0;
  19517. +}
  19518. +
  19519. +/**
  19520. + * done_znodes - delete znode cache
  19521. + *
  19522. + * This is called on reiser4 module unloading or system shutdown.
  19523. + */
  19524. +void done_jnodes(void)
  19525. +{
  19526. + destroy_reiser4_cache(&_jnode_slab);
  19527. +}
  19528. +
  19529. +/* Initialize a jnode. */
  19530. +void jnode_init(jnode * node, reiser4_tree * tree, jnode_type type)
  19531. +{
  19532. + memset(node, 0, sizeof(jnode));
  19533. + ON_DEBUG(node->magic = JMAGIC);
  19534. + jnode_set_type(node, type);
  19535. + atomic_set(&node->d_count, 0);
  19536. + atomic_set(&node->x_count, 0);
  19537. + spin_lock_init(&node->guard);
  19538. + spin_lock_init(&node->load);
  19539. + node->atom = NULL;
  19540. + node->tree = tree;
  19541. + INIT_LIST_HEAD(&node->capture_link);
  19542. +
  19543. + ASSIGN_NODE_LIST(node, NOT_CAPTURED);
  19544. +
  19545. +#if REISER4_DEBUG
  19546. + {
  19547. + reiser4_super_info_data *sbinfo;
  19548. +
  19549. + sbinfo = get_super_private(tree->super);
  19550. + spin_lock_irq(&sbinfo->all_guard);
  19551. + list_add(&node->jnodes, &sbinfo->all_jnodes);
  19552. + spin_unlock_irq(&sbinfo->all_guard);
  19553. + }
  19554. +#endif
  19555. +}
  19556. +
  19557. +#if REISER4_DEBUG
  19558. +/*
  19559. + * Remove jnode from ->all_jnodes list.
  19560. + */
  19561. +static void jnode_done(jnode * node, reiser4_tree * tree)
  19562. +{
  19563. + reiser4_super_info_data *sbinfo;
  19564. +
  19565. + sbinfo = get_super_private(tree->super);
  19566. +
  19567. + spin_lock_irq(&sbinfo->all_guard);
  19568. + assert("nikita-2422", !list_empty(&node->jnodes));
  19569. + list_del_init(&node->jnodes);
  19570. + spin_unlock_irq(&sbinfo->all_guard);
  19571. +}
  19572. +#endif
  19573. +
  19574. +/* return already existing jnode of page */
  19575. +jnode *jnode_by_page(struct page *pg)
  19576. +{
  19577. + assert("nikita-2400", PageLocked(pg));
  19578. + assert("nikita-2068", PagePrivate(pg));
  19579. + assert("nikita-2067", jprivate(pg) != NULL);
  19580. + return jprivate(pg);
  19581. +}
  19582. +
  19583. +/* exported functions to allocate/free jnode objects outside this file */
  19584. +jnode *jalloc(void)
  19585. +{
  19586. + jnode *jal = kmem_cache_alloc(_jnode_slab, reiser4_ctx_gfp_mask_get());
  19587. + return jal;
  19588. +}
  19589. +
  19590. +/* return jnode back to the slab allocator */
  19591. +inline void jfree(jnode * node)
  19592. +{
  19593. + assert("nikita-2663", (list_empty_careful(&node->capture_link) &&
  19594. + NODE_LIST(node) == NOT_CAPTURED));
  19595. + assert("nikita-3222", list_empty(&node->jnodes));
  19596. + assert("nikita-3221", jnode_page(node) == NULL);
  19597. +
  19598. + /* not yet phash_jnode_destroy(node); */
  19599. +
  19600. + kmem_cache_free(_jnode_slab, node);
  19601. +}
  19602. +
  19603. +/*
  19604. + * This function is supplied as RCU callback. It actually frees jnode when
  19605. + * last reference to it is gone.
  19606. + */
  19607. +static void jnode_free_actor(struct rcu_head *head)
  19608. +{
  19609. + jnode *node;
  19610. + jnode_type jtype;
  19611. +
  19612. + node = container_of(head, jnode, rcu);
  19613. + jtype = jnode_get_type(node);
  19614. +
  19615. + ON_DEBUG(jnode_done(node, jnode_get_tree(node)));
  19616. +
  19617. + switch (jtype) {
  19618. + case JNODE_IO_HEAD:
  19619. + case JNODE_BITMAP:
  19620. + case JNODE_UNFORMATTED_BLOCK:
  19621. + jfree(node);
  19622. + break;
  19623. + case JNODE_FORMATTED_BLOCK:
  19624. + zfree(JZNODE(node));
  19625. + break;
  19626. + case JNODE_INODE:
  19627. + default:
  19628. + wrong_return_value("nikita-3197", "Wrong jnode type");
  19629. + }
  19630. +}
  19631. +
  19632. +/*
  19633. + * Free a jnode. Post a callback to be executed later through RCU when all
  19634. + * references to @node are released.
  19635. + */
  19636. +static inline void jnode_free(jnode * node, jnode_type jtype)
  19637. +{
  19638. + if (jtype != JNODE_INODE) {
  19639. + /*assert("nikita-3219", list_empty(&node->rcu.list)); */
  19640. + call_rcu(&node->rcu, jnode_free_actor);
  19641. + } else
  19642. + jnode_list_remove(node);
  19643. +}
  19644. +
  19645. +/* allocate new unformatted jnode */
  19646. +static jnode *jnew_unformatted(void)
  19647. +{
  19648. + jnode *jal;
  19649. +
  19650. + jal = jalloc();
  19651. + if (jal == NULL)
  19652. + return NULL;
  19653. +
  19654. + jnode_init(jal, current_tree, JNODE_UNFORMATTED_BLOCK);
  19655. + jal->key.j.mapping = NULL;
  19656. + jal->key.j.index = (unsigned long)-1;
  19657. + jal->key.j.objectid = 0;
  19658. + return jal;
  19659. +}
  19660. +
  19661. +/* look for jnode with given mapping and offset within hash table */
  19662. +jnode *jlookup(reiser4_tree * tree, oid_t objectid, unsigned long index)
  19663. +{
  19664. + struct jnode_key jkey;
  19665. + jnode *node;
  19666. +
  19667. + jkey.objectid = objectid;
  19668. + jkey.index = index;
  19669. +
  19670. + /*
  19671. + * hash table is _not_ protected by any lock during lookups. All we
  19672. + * have to do is to disable preemption to keep RCU happy.
  19673. + */
  19674. +
  19675. + rcu_read_lock();
  19676. + node = j_hash_find(&tree->jhash_table, &jkey);
  19677. + if (node != NULL) {
  19678. + /* protect @node from recycling */
  19679. + jref(node);
  19680. + assert("nikita-2955", jnode_invariant(node, 0, 0));
  19681. + node = jnode_rip_check(tree, node);
  19682. + }
  19683. + rcu_read_unlock();
  19684. + return node;
  19685. +}
  19686. +
  19687. +/* per inode radix tree of jnodes is protected by tree's read write spin lock */
  19688. +static jnode *jfind_nolock(struct address_space *mapping, unsigned long index)
  19689. +{
  19690. + assert("vs-1694", mapping->host != NULL);
  19691. +
  19692. + return radix_tree_lookup(jnode_tree_by_inode(mapping->host), index);
  19693. +}
  19694. +
  19695. +jnode *jfind(struct address_space *mapping, unsigned long index)
  19696. +{
  19697. + reiser4_tree *tree;
  19698. + jnode *node;
  19699. +
  19700. + assert("vs-1694", mapping->host != NULL);
  19701. + tree = reiser4_tree_by_inode(mapping->host);
  19702. +
  19703. + read_lock_tree(tree);
  19704. + node = jfind_nolock(mapping, index);
  19705. + if (node != NULL)
  19706. + jref(node);
  19707. + read_unlock_tree(tree);
  19708. + return node;
  19709. +}
  19710. +
  19711. +static void inode_attach_jnode(jnode * node)
  19712. +{
  19713. + struct inode *inode;
  19714. + reiser4_inode *info;
  19715. + struct radix_tree_root *rtree;
  19716. +
  19717. + assert_rw_write_locked(&(jnode_get_tree(node)->tree_lock));
  19718. + assert("zam-1043", node->key.j.mapping != NULL);
  19719. + inode = node->key.j.mapping->host;
  19720. + info = reiser4_inode_data(inode);
  19721. + rtree = jnode_tree_by_reiser4_inode(info);
  19722. + if (rtree->rnode == NULL) {
  19723. + /* prevent inode from being pruned when it has jnodes attached
  19724. + to it */
  19725. + spin_lock_irq(&inode->i_data.tree_lock);
  19726. + inode->i_data.nrpages++;
  19727. + spin_unlock_irq(&inode->i_data.tree_lock);
  19728. + }
  19729. + assert("zam-1049", equi(rtree->rnode != NULL, info->nr_jnodes != 0));
  19730. + check_me("zam-1045",
  19731. + !radix_tree_insert(rtree, node->key.j.index, node));
  19732. + ON_DEBUG(info->nr_jnodes++);
  19733. +}
  19734. +
  19735. +static void inode_detach_jnode(jnode * node)
  19736. +{
  19737. + struct inode *inode;
  19738. + reiser4_inode *info;
  19739. + struct radix_tree_root *rtree;
  19740. +
  19741. + assert_rw_write_locked(&(jnode_get_tree(node)->tree_lock));
  19742. + assert("zam-1044", node->key.j.mapping != NULL);
  19743. + inode = node->key.j.mapping->host;
  19744. + info = reiser4_inode_data(inode);
  19745. + rtree = jnode_tree_by_reiser4_inode(info);
  19746. +
  19747. + assert("zam-1051", info->nr_jnodes != 0);
  19748. + assert("zam-1052", rtree->rnode != NULL);
  19749. + ON_DEBUG(info->nr_jnodes--);
  19750. +
  19751. + /* delete jnode from inode's radix tree of jnodes */
  19752. + check_me("zam-1046", radix_tree_delete(rtree, node->key.j.index));
  19753. + if (rtree->rnode == NULL) {
  19754. + /* inode can be pruned now */
  19755. + spin_lock_irq(&inode->i_data.tree_lock);
  19756. + inode->i_data.nrpages--;
  19757. + spin_unlock_irq(&inode->i_data.tree_lock);
  19758. + }
  19759. +}
  19760. +
  19761. +/* put jnode into hash table (where they can be found by flush who does not know
  19762. + mapping) and to inode's tree of jnodes (where they can be found (hopefully
  19763. + faster) in places where mapping is known). Currently it is used by
  19764. + fs/reiser4/plugin/item/extent_file_ops.c:index_extent_jnode when new jnode is
  19765. + created */
  19766. +static void
  19767. +hash_unformatted_jnode(jnode * node, struct address_space *mapping,
  19768. + unsigned long index)
  19769. +{
  19770. + j_hash_table *jtable;
  19771. +
  19772. + assert("vs-1446", jnode_is_unformatted(node));
  19773. + assert("vs-1442", node->key.j.mapping == 0);
  19774. + assert("vs-1443", node->key.j.objectid == 0);
  19775. + assert("vs-1444", node->key.j.index == (unsigned long)-1);
  19776. + assert_rw_write_locked(&(jnode_get_tree(node)->tree_lock));
  19777. +
  19778. + node->key.j.mapping = mapping;
  19779. + node->key.j.objectid = get_inode_oid(mapping->host);
  19780. + node->key.j.index = index;
  19781. +
  19782. + jtable = &jnode_get_tree(node)->jhash_table;
  19783. +
  19784. + /* race with some other thread inserting jnode into the hash table is
  19785. + * impossible, because we keep the page lock. */
  19786. + /*
  19787. + * following assertion no longer holds because of RCU: it is possible
  19788. + * jnode is in the hash table, but with JNODE_RIP bit set.
  19789. + */
  19790. + /* assert("nikita-3211", j_hash_find(jtable, &node->key.j) == NULL); */
  19791. + j_hash_insert_rcu(jtable, node);
  19792. + inode_attach_jnode(node);
  19793. +}
  19794. +
  19795. +static void unhash_unformatted_node_nolock(jnode * node)
  19796. +{
  19797. + assert("vs-1683", node->key.j.mapping != NULL);
  19798. + assert("vs-1684",
  19799. + node->key.j.objectid ==
  19800. + get_inode_oid(node->key.j.mapping->host));
  19801. +
  19802. + /* remove jnode from hash-table */
  19803. + j_hash_remove_rcu(&node->tree->jhash_table, node);
  19804. + inode_detach_jnode(node);
  19805. + node->key.j.mapping = NULL;
  19806. + node->key.j.index = (unsigned long)-1;
  19807. + node->key.j.objectid = 0;
  19808. +
  19809. +}
  19810. +
  19811. +/* remove jnode from hash table and from inode's tree of jnodes. This is used in
  19812. + reiser4_invalidatepage and in kill_hook_extent -> truncate_inode_jnodes ->
  19813. + reiser4_uncapture_jnode */
  19814. +void unhash_unformatted_jnode(jnode * node)
  19815. +{
  19816. + assert("vs-1445", jnode_is_unformatted(node));
  19817. +
  19818. + write_lock_tree(node->tree);
  19819. + unhash_unformatted_node_nolock(node);
  19820. + write_unlock_tree(node->tree);
  19821. +}
  19822. +
  19823. +/*
  19824. + * search hash table for a jnode with given oid and index. If not found,
  19825. + * allocate new jnode, insert it, and also insert into radix tree for the
  19826. + * given inode/mapping.
  19827. + */
  19828. +static jnode *find_get_jnode(reiser4_tree * tree,
  19829. + struct address_space *mapping,
  19830. + oid_t oid, unsigned long index)
  19831. +{
  19832. + jnode *result;
  19833. + jnode *shadow;
  19834. + int preload;
  19835. +
  19836. + result = jnew_unformatted();
  19837. +
  19838. + if (unlikely(result == NULL))
  19839. + return ERR_PTR(RETERR(-ENOMEM));
  19840. +
  19841. + preload = radix_tree_preload(reiser4_ctx_gfp_mask_get());
  19842. + if (preload != 0)
  19843. + return ERR_PTR(preload);
  19844. +
  19845. + write_lock_tree(tree);
  19846. + shadow = jfind_nolock(mapping, index);
  19847. + if (likely(shadow == NULL)) {
  19848. + /* add new jnode to hash table and inode's radix tree of
  19849. + * jnodes */
  19850. + jref(result);
  19851. + hash_unformatted_jnode(result, mapping, index);
  19852. + } else {
  19853. + /* jnode is found in inode's radix tree of jnodes */
  19854. + jref(shadow);
  19855. + jnode_free(result, JNODE_UNFORMATTED_BLOCK);
  19856. + assert("vs-1498", shadow->key.j.mapping == mapping);
  19857. + result = shadow;
  19858. + }
  19859. + write_unlock_tree(tree);
  19860. +
  19861. + assert("nikita-2955",
  19862. + ergo(result != NULL, jnode_invariant(result, 0, 0)));
  19863. + radix_tree_preload_end();
  19864. + return result;
  19865. +}
  19866. +
  19867. +/* jget() (a la zget() but for unformatted nodes). Returns (and possibly
  19868. + creates) jnode corresponding to page @pg. jnode is attached to page and
  19869. + inserted into jnode hash-table. */
  19870. +static jnode *do_jget(reiser4_tree * tree, struct page *pg)
  19871. +{
  19872. + /*
  19873. + * There are two ways to create jnode: starting with pre-existing page
  19874. + * and without page.
  19875. + *
  19876. + * When page already exists, jnode is created
  19877. + * (jnode_of_page()->do_jget()) under page lock. This is done in
  19878. + * ->writepage(), or when capturing anonymous page dirtied through
  19879. + * mmap.
  19880. + *
  19881. + * Jnode without page is created by index_extent_jnode().
  19882. + *
  19883. + */
  19884. +
  19885. + jnode *result;
  19886. + oid_t oid = get_inode_oid(pg->mapping->host);
  19887. +
  19888. + assert("umka-176", pg != NULL);
  19889. + assert("nikita-2394", PageLocked(pg));
  19890. +
  19891. + result = jprivate(pg);
  19892. + if (likely(result != NULL))
  19893. + return jref(result);
  19894. +
  19895. + tree = reiser4_tree_by_page(pg);
  19896. +
  19897. + /* check hash-table first */
  19898. + result = jfind(pg->mapping, pg->index);
  19899. + if (unlikely(result != NULL)) {
  19900. + spin_lock_jnode(result);
  19901. + jnode_attach_page(result, pg);
  19902. + spin_unlock_jnode(result);
  19903. + result->key.j.mapping = pg->mapping;
  19904. + return result;
  19905. + }
  19906. +
  19907. + /* since page is locked, jnode should be allocated with GFP_NOFS flag */
  19908. + reiser4_ctx_gfp_mask_force(GFP_NOFS);
  19909. + result = find_get_jnode(tree, pg->mapping, oid, pg->index);
  19910. + if (unlikely(IS_ERR(result)))
  19911. + return result;
  19912. + /* attach jnode to page */
  19913. + spin_lock_jnode(result);
  19914. + jnode_attach_page(result, pg);
  19915. + spin_unlock_jnode(result);
  19916. + return result;
  19917. +}
  19918. +
  19919. +/*
  19920. + * return jnode for @pg, creating it if necessary.
  19921. + */
  19922. +jnode *jnode_of_page(struct page *pg)
  19923. +{
  19924. + jnode *result;
  19925. +
  19926. + assert("nikita-2394", PageLocked(pg));
  19927. +
  19928. + result = do_jget(reiser4_tree_by_page(pg), pg);
  19929. +
  19930. + if (REISER4_DEBUG && !IS_ERR(result)) {
  19931. + assert("nikita-3210", result == jprivate(pg));
  19932. + assert("nikita-2046", jnode_page(jprivate(pg)) == pg);
  19933. + if (jnode_is_unformatted(jprivate(pg))) {
  19934. + assert("nikita-2364",
  19935. + jprivate(pg)->key.j.index == pg->index);
  19936. + assert("nikita-2367",
  19937. + jprivate(pg)->key.j.mapping == pg->mapping);
  19938. + assert("nikita-2365",
  19939. + jprivate(pg)->key.j.objectid ==
  19940. + get_inode_oid(pg->mapping->host));
  19941. + assert("vs-1200",
  19942. + jprivate(pg)->key.j.objectid ==
  19943. + pg->mapping->host->i_ino);
  19944. + assert("nikita-2356",
  19945. + jnode_is_unformatted(jnode_by_page(pg)));
  19946. + }
  19947. + assert("nikita-2956", jnode_invariant(jprivate(pg), 0, 0));
  19948. + }
  19949. + return result;
  19950. +}
  19951. +
  19952. +/* attach page to jnode: set ->pg pointer in jnode, and ->private one in the
  19953. + * page.*/
  19954. +void jnode_attach_page(jnode * node, struct page *pg)
  19955. +{
  19956. + assert("nikita-2060", node != NULL);
  19957. + assert("nikita-2061", pg != NULL);
  19958. +
  19959. + assert("nikita-2050", jprivate(pg) == 0ul);
  19960. + assert("nikita-2393", !PagePrivate(pg));
  19961. + assert("vs-1741", node->pg == NULL);
  19962. +
  19963. + assert("nikita-2396", PageLocked(pg));
  19964. + assert_spin_locked(&(node->guard));
  19965. +
  19966. + get_page(pg);
  19967. + set_page_private(pg, (unsigned long)node);
  19968. + node->pg = pg;
  19969. + SetPagePrivate(pg);
  19970. +}
  19971. +
  19972. +/* Dual to jnode_attach_page: break a binding between page and jnode */
  19973. +void page_clear_jnode(struct page *page, jnode * node)
  19974. +{
  19975. + assert("nikita-2425", PageLocked(page));
  19976. + assert_spin_locked(&(node->guard));
  19977. + assert("nikita-2428", PagePrivate(page));
  19978. +
  19979. + assert("nikita-3551", !PageWriteback(page));
  19980. +
  19981. + JF_CLR(node, JNODE_PARSED);
  19982. + set_page_private(page, 0ul);
  19983. + ClearPagePrivate(page);
  19984. + node->pg = NULL;
  19985. + put_page(page);
  19986. +}
  19987. +
  19988. +#if 0
  19989. +/* it is only used in one place to handle error */
  19990. +void
  19991. +page_detach_jnode(struct page *page, struct address_space *mapping,
  19992. + unsigned long index)
  19993. +{
  19994. + assert("nikita-2395", page != NULL);
  19995. +
  19996. + lock_page(page);
  19997. + if ((page->mapping == mapping) && (page->index == index)
  19998. + && PagePrivate(page)) {
  19999. + jnode *node;
  20000. +
  20001. + node = jprivate(page);
  20002. + spin_lock_jnode(node);
  20003. + page_clear_jnode(page, node);
  20004. + spin_unlock_jnode(node);
  20005. + }
  20006. + unlock_page(page);
  20007. +}
  20008. +#endif /* 0 */
  20009. +
  20010. +/* return @node page locked.
  20011. +
  20012. + Locking ordering requires that one first takes page lock and afterwards
  20013. + spin lock on node attached to this page. Sometimes it is necessary to go in
  20014. + the opposite direction. This is done through standard trylock-and-release
  20015. + loop.
  20016. +*/
  20017. +static struct page *jnode_lock_page(jnode * node)
  20018. +{
  20019. + struct page *page;
  20020. +
  20021. + assert("nikita-2052", node != NULL);
  20022. + assert("nikita-2401", LOCK_CNT_NIL(spin_locked_jnode));
  20023. +
  20024. + while (1) {
  20025. +
  20026. + spin_lock_jnode(node);
  20027. + page = jnode_page(node);
  20028. + if (page == NULL)
  20029. + break;
  20030. +
  20031. + /* no need to get_page( page ) here, because page cannot
  20032. + be evicted from memory without detaching it from jnode and
  20033. + this requires spin lock on jnode that we already hold.
  20034. + */
  20035. + if (trylock_page(page)) {
  20036. + /* We won a lock on jnode page, proceed. */
  20037. + break;
  20038. + }
  20039. +
  20040. + /* Page is locked by someone else. */
  20041. + get_page(page);
  20042. + spin_unlock_jnode(node);
  20043. + wait_on_page_locked(page);
  20044. + /* it is possible that page was detached from jnode and
  20045. + returned to the free pool, or re-assigned while we were
  20046. + waiting on locked bit. This will be rechecked on the next
  20047. + loop iteration.
  20048. + */
  20049. + put_page(page);
  20050. +
  20051. + /* try again */
  20052. + }
  20053. + return page;
  20054. +}
  20055. +
  20056. +/*
  20057. + * is JNODE_PARSED bit is not set, call ->parse() method of jnode, to verify
  20058. + * validness of jnode content.
  20059. + */
  20060. +static inline int jparse(jnode * node)
  20061. +{
  20062. + int result;
  20063. +
  20064. + assert("nikita-2466", node != NULL);
  20065. +
  20066. + spin_lock_jnode(node);
  20067. + if (likely(!jnode_is_parsed(node))) {
  20068. + result = jnode_ops(node)->parse(node);
  20069. + if (likely(result == 0))
  20070. + JF_SET(node, JNODE_PARSED);
  20071. + } else
  20072. + result = 0;
  20073. + spin_unlock_jnode(node);
  20074. + return result;
  20075. +}
  20076. +
  20077. +/* Lock a page attached to jnode, create and attach page to jnode if it had no
  20078. + * one. */
  20079. +static struct page *jnode_get_page_locked(jnode * node, gfp_t gfp_flags)
  20080. +{
  20081. + struct page *page;
  20082. +
  20083. + spin_lock_jnode(node);
  20084. + page = jnode_page(node);
  20085. +
  20086. + if (page == NULL) {
  20087. + spin_unlock_jnode(node);
  20088. + page = find_or_create_page(jnode_get_mapping(node),
  20089. + jnode_get_index(node), gfp_flags);
  20090. + if (page == NULL)
  20091. + return ERR_PTR(RETERR(-ENOMEM));
  20092. + } else {
  20093. + if (trylock_page(page)) {
  20094. + spin_unlock_jnode(node);
  20095. + return page;
  20096. + }
  20097. + get_page(page);
  20098. + spin_unlock_jnode(node);
  20099. + lock_page(page);
  20100. + assert("nikita-3134", page->mapping == jnode_get_mapping(node));
  20101. + }
  20102. +
  20103. + spin_lock_jnode(node);
  20104. + if (!jnode_page(node))
  20105. + jnode_attach_page(node, page);
  20106. + spin_unlock_jnode(node);
  20107. +
  20108. + put_page(page);
  20109. + assert("zam-894", jnode_page(node) == page);
  20110. + return page;
  20111. +}
  20112. +
  20113. +/* Start read operation for jnode's page if page is not up-to-date. */
  20114. +static int jnode_start_read(jnode * node, struct page *page)
  20115. +{
  20116. + assert("zam-893", PageLocked(page));
  20117. +
  20118. + if (PageUptodate(page)) {
  20119. + unlock_page(page);
  20120. + return 0;
  20121. + }
  20122. + return reiser4_page_io(page, node, READ, reiser4_ctx_gfp_mask_get());
  20123. +}
  20124. +
  20125. +#if REISER4_DEBUG
  20126. +static void check_jload(jnode * node, struct page *page)
  20127. +{
  20128. + if (jnode_is_znode(node)) {
  20129. + znode *z = JZNODE(node);
  20130. +
  20131. + if (znode_is_any_locked(z)) {
  20132. + assert("nikita-3253",
  20133. + z->nr_items ==
  20134. + node_plugin_by_node(z)->num_of_items(z));
  20135. + kunmap(page);
  20136. + }
  20137. + assert("nikita-3565", znode_invariant(z));
  20138. + }
  20139. +}
  20140. +#else
  20141. +#define check_jload(node, page) noop
  20142. +#endif
  20143. +
  20144. +/* prefetch jnode to speed up next call to jload. Call this when you are going
  20145. + * to call jload() shortly. This will bring appropriate portion of jnode into
  20146. + * CPU cache. */
  20147. +void jload_prefetch(jnode * node)
  20148. +{
  20149. + prefetchw(&node->x_count);
  20150. +}
  20151. +
  20152. +/* load jnode's data into memory */
  20153. +int jload_gfp(jnode * node /* node to load */ ,
  20154. + gfp_t gfp_flags /* allocation flags */ ,
  20155. + int do_kmap/* true if page should be kmapped */)
  20156. +{
  20157. + struct page *page;
  20158. + int result = 0;
  20159. + int parsed;
  20160. +
  20161. + assert("nikita-3010", reiser4_schedulable());
  20162. +
  20163. + prefetchw(&node->pg);
  20164. +
  20165. + /* taking d-reference implies taking x-reference. */
  20166. + jref(node);
  20167. +
  20168. + /*
  20169. + * acquiring d-reference to @jnode and check for JNODE_PARSED bit
  20170. + * should be atomic, otherwise there is a race against
  20171. + * reiser4_releasepage().
  20172. + */
  20173. + spin_lock(&(node->load));
  20174. + add_d_ref(node);
  20175. + parsed = jnode_is_parsed(node);
  20176. + spin_unlock(&(node->load));
  20177. +
  20178. + if (unlikely(!parsed)) {
  20179. + page = jnode_get_page_locked(node, gfp_flags);
  20180. + if (unlikely(IS_ERR(page))) {
  20181. + result = PTR_ERR(page);
  20182. + goto failed;
  20183. + }
  20184. +
  20185. + result = jnode_start_read(node, page);
  20186. + if (unlikely(result != 0))
  20187. + goto failed;
  20188. +
  20189. + wait_on_page_locked(page);
  20190. + if (unlikely(!PageUptodate(page))) {
  20191. + result = RETERR(-EIO);
  20192. + goto failed;
  20193. + }
  20194. +
  20195. + if (do_kmap)
  20196. + node->data = kmap(page);
  20197. +
  20198. + result = jparse(node);
  20199. + if (unlikely(result != 0)) {
  20200. + if (do_kmap)
  20201. + kunmap(page);
  20202. + goto failed;
  20203. + }
  20204. + check_jload(node, page);
  20205. + } else {
  20206. + page = jnode_page(node);
  20207. + check_jload(node, page);
  20208. + if (do_kmap)
  20209. + node->data = kmap(page);
  20210. + }
  20211. +
  20212. + if (!is_writeout_mode())
  20213. + /* We do not mark pages active if jload is called as a part of
  20214. + * jnode_flush() or reiser4_write_logs(). Both jnode_flush()
  20215. + * and write_logs() add no value to cached data, there is no
  20216. + * sense to mark pages as active when they go to disk, it just
  20217. + * confuses vm scanning routines because clean page could be
  20218. + * moved out from inactive list as a result of this
  20219. + * mark_page_accessed() call. */
  20220. + mark_page_accessed(page);
  20221. +
  20222. + return 0;
  20223. +
  20224. +failed:
  20225. + jrelse_tail(node);
  20226. + return result;
  20227. +
  20228. +}
  20229. +
  20230. +/* start asynchronous reading for given jnode's page. */
  20231. +int jstartio(jnode * node)
  20232. +{
  20233. + struct page *page;
  20234. +
  20235. + page = jnode_get_page_locked(node, reiser4_ctx_gfp_mask_get());
  20236. + if (IS_ERR(page))
  20237. + return PTR_ERR(page);
  20238. +
  20239. + return jnode_start_read(node, page);
  20240. +}
  20241. +
  20242. +/* Initialize a node by calling appropriate plugin instead of reading
  20243. + * node from disk as in jload(). */
  20244. +int jinit_new(jnode * node, gfp_t gfp_flags)
  20245. +{
  20246. + struct page *page;
  20247. + int result;
  20248. +
  20249. + jref(node);
  20250. + add_d_ref(node);
  20251. +
  20252. + page = jnode_get_page_locked(node, gfp_flags);
  20253. + if (IS_ERR(page)) {
  20254. + result = PTR_ERR(page);
  20255. + goto failed;
  20256. + }
  20257. +
  20258. + SetPageUptodate(page);
  20259. + unlock_page(page);
  20260. +
  20261. + node->data = kmap(page);
  20262. +
  20263. + if (!jnode_is_parsed(node)) {
  20264. + jnode_plugin *jplug = jnode_ops(node);
  20265. + spin_lock_jnode(node);
  20266. + result = jplug->init(node);
  20267. + spin_unlock_jnode(node);
  20268. + if (result) {
  20269. + kunmap(page);
  20270. + goto failed;
  20271. + }
  20272. + JF_SET(node, JNODE_PARSED);
  20273. + }
  20274. +
  20275. + return 0;
  20276. +
  20277. +failed:
  20278. + jrelse(node);
  20279. + return result;
  20280. +}
  20281. +
  20282. +/* release a reference to jnode acquired by jload(), decrement ->d_count */
  20283. +void jrelse_tail(jnode * node/* jnode to release references to */)
  20284. +{
  20285. + assert("nikita-489", atomic_read(&node->d_count) > 0);
  20286. + atomic_dec(&node->d_count);
  20287. + /* release reference acquired in jload_gfp() or jinit_new() */
  20288. + jput(node);
  20289. + if (jnode_is_unformatted(node) || jnode_is_znode(node))
  20290. + LOCK_CNT_DEC(d_refs);
  20291. +}
  20292. +
  20293. +/* drop reference to node data. When last reference is dropped, data are
  20294. + unloaded. */
  20295. +void jrelse(jnode * node/* jnode to release references to */)
  20296. +{
  20297. + struct page *page;
  20298. +
  20299. + assert("nikita-487", node != NULL);
  20300. + assert_spin_not_locked(&(node->guard));
  20301. +
  20302. + page = jnode_page(node);
  20303. + if (likely(page != NULL)) {
  20304. + /*
  20305. + * it is safe not to lock jnode here, because at this point
  20306. + * @node->d_count is greater than zero (if jrelse() is used
  20307. + * correctly, that is). JNODE_PARSED may be not set yet, if,
  20308. + * for example, we got here as a result of error handling path
  20309. + * in jload(). Anyway, page cannot be detached by
  20310. + * reiser4_releasepage(). truncate will invalidate page
  20311. + * regardless, but this should not be a problem.
  20312. + */
  20313. + kunmap(page);
  20314. + }
  20315. + jrelse_tail(node);
  20316. +}
  20317. +
  20318. +/* called from jput() to wait for io completion */
  20319. +static void jnode_finish_io(jnode * node)
  20320. +{
  20321. + struct page *page;
  20322. +
  20323. + assert("nikita-2922", node != NULL);
  20324. +
  20325. + spin_lock_jnode(node);
  20326. + page = jnode_page(node);
  20327. + if (page != NULL) {
  20328. + get_page(page);
  20329. + spin_unlock_jnode(node);
  20330. + wait_on_page_writeback(page);
  20331. + put_page(page);
  20332. + } else
  20333. + spin_unlock_jnode(node);
  20334. +}
  20335. +
  20336. +/*
  20337. + * This is called by jput() when last reference to jnode is released. This is
  20338. + * separate function, because we want fast path of jput() to be inline and,
  20339. + * therefore, small.
  20340. + */
  20341. +void jput_final(jnode * node)
  20342. +{
  20343. + int r_i_p;
  20344. +
  20345. + /* A fast check for keeping node in cache. We always keep node in cache
  20346. + * if its page is present and node was not marked for deletion */
  20347. + if (jnode_page(node) != NULL && !JF_ISSET(node, JNODE_HEARD_BANSHEE)) {
  20348. + rcu_read_unlock();
  20349. + return;
  20350. + }
  20351. + r_i_p = !JF_TEST_AND_SET(node, JNODE_RIP);
  20352. + /*
  20353. + * if r_i_p is true, we were first to set JNODE_RIP on this node. In
  20354. + * this case it is safe to access node after unlock.
  20355. + */
  20356. + rcu_read_unlock();
  20357. + if (r_i_p) {
  20358. + jnode_finish_io(node);
  20359. + if (JF_ISSET(node, JNODE_HEARD_BANSHEE))
  20360. + /* node is removed from the tree. */
  20361. + jdelete(node);
  20362. + else
  20363. + jnode_try_drop(node);
  20364. + }
  20365. + /* if !r_i_p some other thread is already killing it */
  20366. +}
  20367. +
  20368. +int jwait_io(jnode * node, int rw)
  20369. +{
  20370. + struct page *page;
  20371. + int result;
  20372. +
  20373. + assert("zam-448", jnode_page(node) != NULL);
  20374. +
  20375. + page = jnode_page(node);
  20376. +
  20377. + result = 0;
  20378. + if (rw == READ) {
  20379. + wait_on_page_locked(page);
  20380. + } else {
  20381. + assert("nikita-2227", rw == WRITE);
  20382. + wait_on_page_writeback(page);
  20383. + }
  20384. + if (PageError(page))
  20385. + result = RETERR(-EIO);
  20386. +
  20387. + return result;
  20388. +}
  20389. +
  20390. +/*
  20391. + * jnode types and plugins.
  20392. + *
  20393. + * jnode by itself is a "base type". There are several different jnode
  20394. + * flavors, called "jnode types" (see jnode_type for a list). Sometimes code
  20395. + * has to do different things based on jnode type. In the standard reiser4 way
  20396. + * this is done by having jnode plugin (see fs/reiser4/plugin.h:jnode_plugin).
  20397. + *
  20398. + * Functions below deal with jnode types and define methods of jnode plugin.
  20399. + *
  20400. + */
  20401. +
  20402. +/* set jnode type. This is done during jnode initialization. */
  20403. +static void jnode_set_type(jnode * node, jnode_type type)
  20404. +{
  20405. + static unsigned long type_to_mask[] = {
  20406. + [JNODE_UNFORMATTED_BLOCK] = 1,
  20407. + [JNODE_FORMATTED_BLOCK] = 0,
  20408. + [JNODE_BITMAP] = 2,
  20409. + [JNODE_IO_HEAD] = 6,
  20410. + [JNODE_INODE] = 4
  20411. + };
  20412. +
  20413. + assert("zam-647", type < LAST_JNODE_TYPE);
  20414. + assert("nikita-2815", !jnode_is_loaded(node));
  20415. + assert("nikita-3386", node->state == 0);
  20416. +
  20417. + node->state |= (type_to_mask[type] << JNODE_TYPE_1);
  20418. +}
  20419. +
  20420. +/* ->init() method of jnode plugin for jnodes that don't require plugin
  20421. + * specific initialization. */
  20422. +static int init_noinit(jnode * node UNUSED_ARG)
  20423. +{
  20424. + return 0;
  20425. +}
  20426. +
  20427. +/* ->parse() method of jnode plugin for jnodes that don't require plugin
  20428. + * specific pasring. */
  20429. +static int parse_noparse(jnode * node UNUSED_ARG)
  20430. +{
  20431. + return 0;
  20432. +}
  20433. +
  20434. +/* ->mapping() method for unformatted jnode */
  20435. +struct address_space *mapping_jnode(const jnode * node)
  20436. +{
  20437. + struct address_space *map;
  20438. +
  20439. + assert("nikita-2713", node != NULL);
  20440. +
  20441. + /* mapping is stored in jnode */
  20442. +
  20443. + map = node->key.j.mapping;
  20444. + assert("nikita-2714", map != NULL);
  20445. + assert("nikita-2897", is_reiser4_inode(map->host));
  20446. + assert("nikita-2715", get_inode_oid(map->host) == node->key.j.objectid);
  20447. + return map;
  20448. +}
  20449. +
  20450. +/* ->index() method for unformatted jnodes */
  20451. +unsigned long index_jnode(const jnode * node)
  20452. +{
  20453. + /* index is stored in jnode */
  20454. + return node->key.j.index;
  20455. +}
  20456. +
  20457. +/* ->remove() method for unformatted jnodes */
  20458. +static inline void remove_jnode(jnode * node, reiser4_tree * tree)
  20459. +{
  20460. + /* remove jnode from hash table and radix tree */
  20461. + if (node->key.j.mapping)
  20462. + unhash_unformatted_node_nolock(node);
  20463. +}
  20464. +
  20465. +/* ->mapping() method for znodes */
  20466. +static struct address_space *mapping_znode(const jnode * node)
  20467. +{
  20468. + /* all znodes belong to fake inode */
  20469. + return reiser4_get_super_fake(jnode_get_tree(node)->super)->i_mapping;
  20470. +}
  20471. +
  20472. +/* ->index() method for znodes */
  20473. +static unsigned long index_znode(const jnode * node)
  20474. +{
  20475. + unsigned long addr;
  20476. + assert("nikita-3317", (1 << znode_shift_order) < sizeof(znode));
  20477. +
  20478. + /* index of znode is just its address (shifted) */
  20479. + addr = (unsigned long)node;
  20480. + return (addr - PAGE_OFFSET) >> znode_shift_order;
  20481. +}
  20482. +
  20483. +/* ->mapping() method for bitmap jnode */
  20484. +static struct address_space *mapping_bitmap(const jnode * node)
  20485. +{
  20486. + /* all bitmap blocks belong to special bitmap inode */
  20487. + return get_super_private(jnode_get_tree(node)->super)->bitmap->
  20488. + i_mapping;
  20489. +}
  20490. +
  20491. +/* ->index() method for jnodes that are indexed by address */
  20492. +static unsigned long index_is_address(const jnode * node)
  20493. +{
  20494. + unsigned long ind;
  20495. +
  20496. + ind = (unsigned long)node;
  20497. + return ind - PAGE_OFFSET;
  20498. +}
  20499. +
  20500. +/* resolve race with jput */
  20501. +jnode *jnode_rip_sync(reiser4_tree *tree, jnode *node)
  20502. +{
  20503. + /*
  20504. + * This is used as part of RCU-based jnode handling.
  20505. + *
  20506. + * jlookup(), zlook(), zget(), and cbk_cache_scan_slots() have to work
  20507. + * with unreferenced jnodes (ones with ->x_count == 0). Hash table is
  20508. + * not protected during this, so concurrent thread may execute
  20509. + * zget-set-HEARD_BANSHEE-zput, or somehow else cause jnode to be
  20510. + * freed in jput_final(). To avoid such races, jput_final() sets
  20511. + * JNODE_RIP on jnode (under tree lock). All places that work with
  20512. + * unreferenced jnodes call this function. It checks for JNODE_RIP bit
  20513. + * (first without taking tree lock), and if this bit is set, released
  20514. + * reference acquired by the current thread and returns NULL.
  20515. + *
  20516. + * As a result, if jnode is being concurrently freed, NULL is returned
  20517. + * and caller should pretend that jnode wasn't found in the first
  20518. + * place.
  20519. + *
  20520. + * Otherwise it's safe to release "rcu-read-lock" and continue with
  20521. + * jnode.
  20522. + */
  20523. + if (unlikely(JF_ISSET(node, JNODE_RIP))) {
  20524. + read_lock_tree(tree);
  20525. + if (JF_ISSET(node, JNODE_RIP)) {
  20526. + dec_x_ref(node);
  20527. + node = NULL;
  20528. + }
  20529. + read_unlock_tree(tree);
  20530. + }
  20531. + return node;
  20532. +}
  20533. +
  20534. +reiser4_key *jnode_build_key(const jnode * node, reiser4_key * key)
  20535. +{
  20536. + struct inode *inode;
  20537. + item_plugin *iplug;
  20538. + loff_t off;
  20539. +
  20540. + assert("nikita-3092", node != NULL);
  20541. + assert("nikita-3093", key != NULL);
  20542. + assert("nikita-3094", jnode_is_unformatted(node));
  20543. +
  20544. + off = ((loff_t) index_jnode(node)) << PAGE_SHIFT;
  20545. + inode = mapping_jnode(node)->host;
  20546. +
  20547. + if (node->parent_item_id != 0)
  20548. + iplug = item_plugin_by_id(node->parent_item_id);
  20549. + else
  20550. + iplug = NULL;
  20551. +
  20552. + if (iplug != NULL && iplug->f.key_by_offset)
  20553. + iplug->f.key_by_offset(inode, off, key);
  20554. + else {
  20555. + file_plugin *fplug;
  20556. +
  20557. + fplug = inode_file_plugin(inode);
  20558. + assert("zam-1007", fplug != NULL);
  20559. + assert("zam-1008", fplug->key_by_inode != NULL);
  20560. +
  20561. + fplug->key_by_inode(inode, off, key);
  20562. + }
  20563. +
  20564. + return key;
  20565. +}
  20566. +
  20567. +/* ->parse() method for formatted nodes */
  20568. +static int parse_znode(jnode * node)
  20569. +{
  20570. + return zparse(JZNODE(node));
  20571. +}
  20572. +
  20573. +/* ->delete() method for formatted nodes */
  20574. +static void delete_znode(jnode * node, reiser4_tree * tree)
  20575. +{
  20576. + znode *z;
  20577. +
  20578. + assert_rw_write_locked(&(tree->tree_lock));
  20579. + assert("vs-898", JF_ISSET(node, JNODE_HEARD_BANSHEE));
  20580. +
  20581. + z = JZNODE(node);
  20582. + assert("vs-899", z->c_count == 0);
  20583. +
  20584. + /* delete znode from sibling list. */
  20585. + sibling_list_remove(z);
  20586. +
  20587. + znode_remove(z, tree);
  20588. +}
  20589. +
  20590. +/* ->remove() method for formatted nodes */
  20591. +static int remove_znode(jnode * node, reiser4_tree * tree)
  20592. +{
  20593. + znode *z;
  20594. +
  20595. + assert_rw_write_locked(&(tree->tree_lock));
  20596. + z = JZNODE(node);
  20597. +
  20598. + if (z->c_count == 0) {
  20599. + /* detach znode from sibling list. */
  20600. + sibling_list_drop(z);
  20601. + /* this is called with tree spin-lock held, so call
  20602. + znode_remove() directly (rather than znode_lock_remove()). */
  20603. + znode_remove(z, tree);
  20604. + return 0;
  20605. + }
  20606. + return RETERR(-EBUSY);
  20607. +}
  20608. +
  20609. +/* ->init() method for formatted nodes */
  20610. +int init_znode(jnode * node)
  20611. +{
  20612. + znode *z;
  20613. +
  20614. + z = JZNODE(node);
  20615. + /* call node plugin to do actual initialization */
  20616. + z->nr_items = 0;
  20617. + return z->nplug->init(z);
  20618. +}
  20619. +
  20620. +/* ->clone() method for formatted nodes */
  20621. +static jnode *clone_formatted(jnode * node)
  20622. +{
  20623. + znode *clone;
  20624. +
  20625. + assert("vs-1430", jnode_is_znode(node));
  20626. + clone = zalloc(reiser4_ctx_gfp_mask_get());
  20627. + if (clone == NULL)
  20628. + return ERR_PTR(RETERR(-ENOMEM));
  20629. + zinit(clone, NULL, current_tree);
  20630. + jnode_set_block(ZJNODE(clone), jnode_get_block(node));
  20631. + /* ZJNODE(clone)->key.z is not initialized */
  20632. + clone->level = JZNODE(node)->level;
  20633. +
  20634. + return ZJNODE(clone);
  20635. +}
  20636. +
  20637. +/* jplug->clone for unformatted nodes */
  20638. +static jnode *clone_unformatted(jnode * node)
  20639. +{
  20640. + jnode *clone;
  20641. +
  20642. + assert("vs-1431", jnode_is_unformatted(node));
  20643. + clone = jalloc();
  20644. + if (clone == NULL)
  20645. + return ERR_PTR(RETERR(-ENOMEM));
  20646. +
  20647. + jnode_init(clone, current_tree, JNODE_UNFORMATTED_BLOCK);
  20648. + jnode_set_block(clone, jnode_get_block(node));
  20649. +
  20650. + return clone;
  20651. +
  20652. +}
  20653. +
  20654. +/*
  20655. + * Setup jnode plugin methods for various jnode types.
  20656. + */
  20657. +jnode_plugin jnode_plugins[LAST_JNODE_TYPE] = {
  20658. + [JNODE_UNFORMATTED_BLOCK] = {
  20659. + .h = {
  20660. + .type_id = REISER4_JNODE_PLUGIN_TYPE,
  20661. + .id = JNODE_UNFORMATTED_BLOCK,
  20662. + .pops = NULL,
  20663. + .label = "unformatted",
  20664. + .desc = "unformatted node",
  20665. + .linkage = {NULL, NULL}
  20666. + },
  20667. + .init = init_noinit,
  20668. + .parse = parse_noparse,
  20669. + .mapping = mapping_jnode,
  20670. + .index = index_jnode,
  20671. + .clone = clone_unformatted
  20672. + },
  20673. + [JNODE_FORMATTED_BLOCK] = {
  20674. + .h = {
  20675. + .type_id = REISER4_JNODE_PLUGIN_TYPE,
  20676. + .id = JNODE_FORMATTED_BLOCK,
  20677. + .pops = NULL,
  20678. + .label = "formatted",
  20679. + .desc = "formatted tree node",
  20680. + .linkage = {NULL, NULL}
  20681. + },
  20682. + .init = init_znode,
  20683. + .parse = parse_znode,
  20684. + .mapping = mapping_znode,
  20685. + .index = index_znode,
  20686. + .clone = clone_formatted
  20687. + },
  20688. + [JNODE_BITMAP] = {
  20689. + .h = {
  20690. + .type_id = REISER4_JNODE_PLUGIN_TYPE,
  20691. + .id = JNODE_BITMAP,
  20692. + .pops = NULL,
  20693. + .label = "bitmap",
  20694. + .desc = "bitmap node",
  20695. + .linkage = {NULL, NULL}
  20696. + },
  20697. + .init = init_noinit,
  20698. + .parse = parse_noparse,
  20699. + .mapping = mapping_bitmap,
  20700. + .index = index_is_address,
  20701. + .clone = NULL
  20702. + },
  20703. + [JNODE_IO_HEAD] = {
  20704. + .h = {
  20705. + .type_id = REISER4_JNODE_PLUGIN_TYPE,
  20706. + .id = JNODE_IO_HEAD,
  20707. + .pops = NULL,
  20708. + .label = "io head",
  20709. + .desc = "io head",
  20710. + .linkage = {NULL, NULL}
  20711. + },
  20712. + .init = init_noinit,
  20713. + .parse = parse_noparse,
  20714. + .mapping = mapping_bitmap,
  20715. + .index = index_is_address,
  20716. + .clone = NULL
  20717. + },
  20718. + [JNODE_INODE] = {
  20719. + .h = {
  20720. + .type_id = REISER4_JNODE_PLUGIN_TYPE,
  20721. + .id = JNODE_INODE,
  20722. + .pops = NULL,
  20723. + .label = "inode",
  20724. + .desc = "inode's builtin jnode",
  20725. + .linkage = {NULL, NULL}
  20726. + },
  20727. + .init = NULL,
  20728. + .parse = NULL,
  20729. + .mapping = NULL,
  20730. + .index = NULL,
  20731. + .clone = NULL
  20732. + }
  20733. +};
  20734. +
  20735. +/*
  20736. + * jnode destruction.
  20737. + *
  20738. + * Thread may use a jnode after it acquired a reference to it. References are
  20739. + * counted in ->x_count field. Reference protects jnode from being
  20740. + * recycled. This is different from protecting jnode data (that are stored in
  20741. + * jnode page) from being evicted from memory. Data are protected by jload()
  20742. + * and released by jrelse().
  20743. + *
  20744. + * If thread already possesses a reference to the jnode it can acquire another
  20745. + * one through jref(). Initial reference is obtained (usually) by locating
  20746. + * jnode in some indexing structure that depends on jnode type: formatted
  20747. + * nodes are kept in global hash table, where they are indexed by block
  20748. + * number, and also in the cbk cache. Unformatted jnodes are also kept in hash
  20749. + * table, which is indexed by oid and offset within file, and in per-inode
  20750. + * radix tree.
  20751. + *
  20752. + * Reference to jnode is released by jput(). If last reference is released,
  20753. + * jput_final() is called. This function determines whether jnode has to be
  20754. + * deleted (this happens when corresponding node is removed from the file
  20755. + * system, jnode is marked with JNODE_HEARD_BANSHEE bit in this case), or it
  20756. + * should be just "removed" (deleted from memory).
  20757. + *
  20758. + * Jnode destruction is signally delicate dance because of locking and RCU.
  20759. + */
  20760. +
  20761. +/*
  20762. + * Returns true if jnode cannot be removed right now. This check is called
  20763. + * under tree lock. If it returns true, jnode is irrevocably committed to be
  20764. + * deleted/removed.
  20765. + */
  20766. +static inline int jnode_is_busy(const jnode * node, jnode_type jtype)
  20767. +{
  20768. + /* if other thread managed to acquire a reference to this jnode, don't
  20769. + * free it. */
  20770. + if (atomic_read(&node->x_count) > 0)
  20771. + return 1;
  20772. + /* also, don't free znode that has children in memory */
  20773. + if (jtype == JNODE_FORMATTED_BLOCK && JZNODE(node)->c_count > 0)
  20774. + return 1;
  20775. + return 0;
  20776. +}
  20777. +
  20778. +/*
  20779. + * this is called as part of removing jnode. Based on jnode type, call
  20780. + * corresponding function that removes jnode from indices and returns it back
  20781. + * to the appropriate slab (through RCU).
  20782. + */
  20783. +static inline void
  20784. +jnode_remove(jnode * node, jnode_type jtype, reiser4_tree * tree)
  20785. +{
  20786. + switch (jtype) {
  20787. + case JNODE_UNFORMATTED_BLOCK:
  20788. + remove_jnode(node, tree);
  20789. + break;
  20790. + case JNODE_IO_HEAD:
  20791. + case JNODE_BITMAP:
  20792. + break;
  20793. + case JNODE_INODE:
  20794. + break;
  20795. + case JNODE_FORMATTED_BLOCK:
  20796. + remove_znode(node, tree);
  20797. + break;
  20798. + default:
  20799. + wrong_return_value("nikita-3196", "Wrong jnode type");
  20800. + }
  20801. +}
  20802. +
  20803. +/*
  20804. + * this is called as part of deleting jnode. Based on jnode type, call
  20805. + * corresponding function that removes jnode from indices and returns it back
  20806. + * to the appropriate slab (through RCU).
  20807. + *
  20808. + * This differs from jnode_remove() only for formatted nodes---for them
  20809. + * sibling list handling is different for removal and deletion.
  20810. + */
  20811. +static inline void
  20812. +jnode_delete(jnode * node, jnode_type jtype, reiser4_tree * tree UNUSED_ARG)
  20813. +{
  20814. + switch (jtype) {
  20815. + case JNODE_UNFORMATTED_BLOCK:
  20816. + remove_jnode(node, tree);
  20817. + break;
  20818. + case JNODE_IO_HEAD:
  20819. + case JNODE_BITMAP:
  20820. + break;
  20821. + case JNODE_FORMATTED_BLOCK:
  20822. + delete_znode(node, tree);
  20823. + break;
  20824. + case JNODE_INODE:
  20825. + default:
  20826. + wrong_return_value("nikita-3195", "Wrong jnode type");
  20827. + }
  20828. +}
  20829. +
  20830. +#if REISER4_DEBUG
  20831. +/*
  20832. + * remove jnode from the debugging list of all jnodes hanging off super-block.
  20833. + */
  20834. +void jnode_list_remove(jnode * node)
  20835. +{
  20836. + reiser4_super_info_data *sbinfo;
  20837. +
  20838. + sbinfo = get_super_private(jnode_get_tree(node)->super);
  20839. +
  20840. + spin_lock_irq(&sbinfo->all_guard);
  20841. + assert("nikita-2422", !list_empty(&node->jnodes));
  20842. + list_del_init(&node->jnodes);
  20843. + spin_unlock_irq(&sbinfo->all_guard);
  20844. +}
  20845. +#endif
  20846. +
  20847. +/*
  20848. + * this is called by jput_final() to remove jnode when last reference to it is
  20849. + * released.
  20850. + */
  20851. +static int jnode_try_drop(jnode * node)
  20852. +{
  20853. + int result;
  20854. + reiser4_tree *tree;
  20855. + jnode_type jtype;
  20856. +
  20857. + assert("nikita-2491", node != NULL);
  20858. + assert("nikita-2583", JF_ISSET(node, JNODE_RIP));
  20859. +
  20860. + tree = jnode_get_tree(node);
  20861. + jtype = jnode_get_type(node);
  20862. +
  20863. + spin_lock_jnode(node);
  20864. + write_lock_tree(tree);
  20865. + /*
  20866. + * if jnode has a page---leave it alone. Memory pressure will
  20867. + * eventually kill page and jnode.
  20868. + */
  20869. + if (jnode_page(node) != NULL) {
  20870. + write_unlock_tree(tree);
  20871. + spin_unlock_jnode(node);
  20872. + JF_CLR(node, JNODE_RIP);
  20873. + return RETERR(-EBUSY);
  20874. + }
  20875. +
  20876. + /* re-check ->x_count under tree lock. */
  20877. + result = jnode_is_busy(node, jtype);
  20878. + if (result == 0) {
  20879. + assert("nikita-2582", !JF_ISSET(node, JNODE_HEARD_BANSHEE));
  20880. + assert("jmacd-511/b", atomic_read(&node->d_count) == 0);
  20881. +
  20882. + spin_unlock_jnode(node);
  20883. + /* no page and no references---despatch him. */
  20884. + jnode_remove(node, jtype, tree);
  20885. + write_unlock_tree(tree);
  20886. + jnode_free(node, jtype);
  20887. + } else {
  20888. + /* busy check failed: reference was acquired by concurrent
  20889. + * thread. */
  20890. + write_unlock_tree(tree);
  20891. + spin_unlock_jnode(node);
  20892. + JF_CLR(node, JNODE_RIP);
  20893. + }
  20894. + return result;
  20895. +}
  20896. +
  20897. +/* jdelete() -- Delete jnode from the tree and file system */
  20898. +static int jdelete(jnode * node/* jnode to finish with */)
  20899. +{
  20900. + struct page *page;
  20901. + int result;
  20902. + reiser4_tree *tree;
  20903. + jnode_type jtype;
  20904. +
  20905. + assert("nikita-467", node != NULL);
  20906. + assert("nikita-2531", JF_ISSET(node, JNODE_RIP));
  20907. +
  20908. + jtype = jnode_get_type(node);
  20909. +
  20910. + page = jnode_lock_page(node);
  20911. + assert_spin_locked(&(node->guard));
  20912. +
  20913. + tree = jnode_get_tree(node);
  20914. +
  20915. + write_lock_tree(tree);
  20916. + /* re-check ->x_count under tree lock. */
  20917. + result = jnode_is_busy(node, jtype);
  20918. + if (likely(!result)) {
  20919. + assert("nikita-2123", JF_ISSET(node, JNODE_HEARD_BANSHEE));
  20920. + assert("jmacd-511", atomic_read(&node->d_count) == 0);
  20921. +
  20922. + /* detach page */
  20923. + if (page != NULL) {
  20924. + /*
  20925. + * FIXME this is racy against jnode_extent_write().
  20926. + */
  20927. + page_clear_jnode(page, node);
  20928. + }
  20929. + spin_unlock_jnode(node);
  20930. + /* goodbye */
  20931. + jnode_delete(node, jtype, tree);
  20932. + write_unlock_tree(tree);
  20933. + jnode_free(node, jtype);
  20934. + /* @node is no longer valid pointer */
  20935. + if (page != NULL)
  20936. + reiser4_drop_page(page);
  20937. + } else {
  20938. + /* busy check failed: reference was acquired by concurrent
  20939. + * thread. */
  20940. + JF_CLR(node, JNODE_RIP);
  20941. + write_unlock_tree(tree);
  20942. + spin_unlock_jnode(node);
  20943. + if (page != NULL)
  20944. + unlock_page(page);
  20945. + }
  20946. + return result;
  20947. +}
  20948. +
  20949. +/* drop jnode on the floor.
  20950. +
  20951. + Return value:
  20952. +
  20953. + -EBUSY: failed to drop jnode, because there are still references to it
  20954. +
  20955. + 0: successfully dropped jnode
  20956. +
  20957. +*/
  20958. +static int jdrop_in_tree(jnode * node, reiser4_tree * tree)
  20959. +{
  20960. + struct page *page;
  20961. + jnode_type jtype;
  20962. + int result;
  20963. +
  20964. + assert("zam-602", node != NULL);
  20965. + assert_rw_not_read_locked(&(tree->tree_lock));
  20966. + assert_rw_not_write_locked(&(tree->tree_lock));
  20967. + assert("nikita-2403", !JF_ISSET(node, JNODE_HEARD_BANSHEE));
  20968. +
  20969. + jtype = jnode_get_type(node);
  20970. +
  20971. + page = jnode_lock_page(node);
  20972. + assert_spin_locked(&(node->guard));
  20973. +
  20974. + write_lock_tree(tree);
  20975. +
  20976. + /* re-check ->x_count under tree lock. */
  20977. + result = jnode_is_busy(node, jtype);
  20978. + if (!result) {
  20979. + assert("nikita-2488", page == jnode_page(node));
  20980. + assert("nikita-2533", atomic_read(&node->d_count) == 0);
  20981. + if (page != NULL) {
  20982. + assert("nikita-2126", !PageDirty(page));
  20983. + assert("nikita-2127", PageUptodate(page));
  20984. + assert("nikita-2181", PageLocked(page));
  20985. + page_clear_jnode(page, node);
  20986. + }
  20987. + spin_unlock_jnode(node);
  20988. + jnode_remove(node, jtype, tree);
  20989. + write_unlock_tree(tree);
  20990. + jnode_free(node, jtype);
  20991. + if (page != NULL)
  20992. + reiser4_drop_page(page);
  20993. + } else {
  20994. + /* busy check failed: reference was acquired by concurrent
  20995. + * thread. */
  20996. + JF_CLR(node, JNODE_RIP);
  20997. + write_unlock_tree(tree);
  20998. + spin_unlock_jnode(node);
  20999. + if (page != NULL)
  21000. + unlock_page(page);
  21001. + }
  21002. + return result;
  21003. +}
  21004. +
  21005. +/* This function frees jnode "if possible". In particular, [dcx]_count has to
  21006. + be 0 (where applicable). */
  21007. +void jdrop(jnode * node)
  21008. +{
  21009. + jdrop_in_tree(node, jnode_get_tree(node));
  21010. +}
  21011. +
  21012. +/* IO head jnode implementation; The io heads are simple j-nodes with limited
  21013. + functionality (these j-nodes are not in any hash table) just for reading
  21014. + from and writing to disk. */
  21015. +
  21016. +jnode *reiser4_alloc_io_head(const reiser4_block_nr * block)
  21017. +{
  21018. + jnode *jal = jalloc();
  21019. +
  21020. + if (jal != NULL) {
  21021. + jnode_init(jal, current_tree, JNODE_IO_HEAD);
  21022. + jnode_set_block(jal, block);
  21023. + }
  21024. +
  21025. + jref(jal);
  21026. +
  21027. + return jal;
  21028. +}
  21029. +
  21030. +void reiser4_drop_io_head(jnode * node)
  21031. +{
  21032. + assert("zam-648", jnode_get_type(node) == JNODE_IO_HEAD);
  21033. +
  21034. + jput(node);
  21035. + jdrop(node);
  21036. +}
  21037. +
  21038. +/* protect keep jnode data from reiser4_releasepage() */
  21039. +void pin_jnode_data(jnode * node)
  21040. +{
  21041. + assert("zam-671", jnode_page(node) != NULL);
  21042. + get_page(jnode_page(node));
  21043. +}
  21044. +
  21045. +/* make jnode data free-able again */
  21046. +void unpin_jnode_data(jnode * node)
  21047. +{
  21048. + assert("zam-672", jnode_page(node) != NULL);
  21049. + put_page(jnode_page(node));
  21050. +}
  21051. +
  21052. +struct address_space *jnode_get_mapping(const jnode * node)
  21053. +{
  21054. + return jnode_ops(node)->mapping(node);
  21055. +}
  21056. +
  21057. +#if REISER4_DEBUG
  21058. +/* debugging aid: jnode invariant */
  21059. +int jnode_invariant_f(const jnode * node, char const **msg)
  21060. +{
  21061. +#define _ergo(ant, con) \
  21062. + ((*msg) = "{" #ant "} ergo {" #con "}", ergo((ant), (con)))
  21063. +#define _check(exp) ((*msg) = #exp, (exp))
  21064. +
  21065. + return _check(node != NULL) &&
  21066. + /* [jnode-queued] */
  21067. + /* only relocated node can be queued, except that when znode
  21068. + * is being deleted, its JNODE_RELOC bit is cleared */
  21069. + _ergo(JF_ISSET(node, JNODE_FLUSH_QUEUED),
  21070. + JF_ISSET(node, JNODE_RELOC) ||
  21071. + JF_ISSET(node, JNODE_HEARD_BANSHEE)) &&
  21072. + _check(node->jnodes.prev != NULL) &&
  21073. + _check(node->jnodes.next != NULL) &&
  21074. + /* [jnode-dirty] invariant */
  21075. + /* dirty inode is part of atom */
  21076. + _ergo(JF_ISSET(node, JNODE_DIRTY), node->atom != NULL) &&
  21077. + /* [jnode-oid] invariant */
  21078. + /* for unformatted node ->objectid and ->mapping fields are
  21079. + * consistent */
  21080. + _ergo(jnode_is_unformatted(node) && node->key.j.mapping != NULL,
  21081. + node->key.j.objectid ==
  21082. + get_inode_oid(node->key.j.mapping->host)) &&
  21083. + /* [jnode-atom-valid] invariant */
  21084. + /* node atom has valid state */
  21085. + _ergo(node->atom != NULL, node->atom->stage != ASTAGE_INVALID) &&
  21086. + /* [jnode-page-binding] invariant */
  21087. + /* if node points to page, it points back to node */
  21088. + _ergo(node->pg != NULL, jprivate(node->pg) == node) &&
  21089. + /* [jnode-refs] invariant */
  21090. + /* only referenced jnode can be loaded */
  21091. + _check(atomic_read(&node->x_count) >= atomic_read(&node->d_count));
  21092. +
  21093. +}
  21094. +
  21095. +static const char *jnode_type_name(jnode_type type)
  21096. +{
  21097. + switch (type) {
  21098. + case JNODE_UNFORMATTED_BLOCK:
  21099. + return "unformatted";
  21100. + case JNODE_FORMATTED_BLOCK:
  21101. + return "formatted";
  21102. + case JNODE_BITMAP:
  21103. + return "bitmap";
  21104. + case JNODE_IO_HEAD:
  21105. + return "io head";
  21106. + case JNODE_INODE:
  21107. + return "inode";
  21108. + case LAST_JNODE_TYPE:
  21109. + return "last";
  21110. + default:{
  21111. + static char unknown[30];
  21112. +
  21113. + sprintf(unknown, "unknown %i", type);
  21114. + return unknown;
  21115. + }
  21116. + }
  21117. +}
  21118. +
  21119. +#define jnode_state_name(node, flag) \
  21120. + (JF_ISSET((node), (flag)) ? ((#flag "|")+6) : "")
  21121. +
  21122. +/* debugging aid: output human readable information about @node */
  21123. +static void info_jnode(const char *prefix /* prefix to print */ ,
  21124. + const jnode * node/* node to print */)
  21125. +{
  21126. + assert("umka-068", prefix != NULL);
  21127. +
  21128. + if (node == NULL) {
  21129. + printk("%s: null\n", prefix);
  21130. + return;
  21131. + }
  21132. +
  21133. + printk
  21134. + ("%s: %p: state: %lx: [%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s], level: %i,"
  21135. + " block: %s, d_count: %d, x_count: %d, "
  21136. + "pg: %p, atom: %p, lock: %i:%i, type: %s, ", prefix, node,
  21137. + node->state,
  21138. + jnode_state_name(node, JNODE_PARSED),
  21139. + jnode_state_name(node, JNODE_HEARD_BANSHEE),
  21140. + jnode_state_name(node, JNODE_LEFT_CONNECTED),
  21141. + jnode_state_name(node, JNODE_RIGHT_CONNECTED),
  21142. + jnode_state_name(node, JNODE_ORPHAN),
  21143. + jnode_state_name(node, JNODE_CREATED),
  21144. + jnode_state_name(node, JNODE_RELOC),
  21145. + jnode_state_name(node, JNODE_OVRWR),
  21146. + jnode_state_name(node, JNODE_DIRTY),
  21147. + jnode_state_name(node, JNODE_IS_DYING),
  21148. + jnode_state_name(node, JNODE_RIP),
  21149. + jnode_state_name(node, JNODE_MISSED_IN_CAPTURE),
  21150. + jnode_state_name(node, JNODE_WRITEBACK),
  21151. + jnode_state_name(node, JNODE_DKSET),
  21152. + jnode_state_name(node, JNODE_REPACK),
  21153. + jnode_state_name(node, JNODE_CLUSTER_PAGE),
  21154. + jnode_get_level(node), sprint_address(jnode_get_block(node)),
  21155. + atomic_read(&node->d_count), atomic_read(&node->x_count),
  21156. + jnode_page(node), node->atom, 0, 0,
  21157. + jnode_type_name(jnode_get_type(node)));
  21158. + if (jnode_is_unformatted(node)) {
  21159. + printk("inode: %llu, index: %lu, ",
  21160. + node->key.j.objectid, node->key.j.index);
  21161. + }
  21162. +}
  21163. +
  21164. +/* debugging aid: check znode invariant and panic if it doesn't hold */
  21165. +static int jnode_invariant(jnode * node, int tlocked, int jlocked)
  21166. +{
  21167. + char const *failed_msg;
  21168. + int result;
  21169. + reiser4_tree *tree;
  21170. +
  21171. + tree = jnode_get_tree(node);
  21172. +
  21173. + assert("umka-063312", node != NULL);
  21174. + assert("umka-064321", tree != NULL);
  21175. +
  21176. + if (!jlocked && !tlocked)
  21177. + spin_lock_jnode((jnode *) node);
  21178. + if (!tlocked)
  21179. + read_lock_tree(jnode_get_tree(node));
  21180. + result = jnode_invariant_f(node, &failed_msg);
  21181. + if (!result) {
  21182. + info_jnode("corrupted node", node);
  21183. + warning("jmacd-555", "Condition %s failed", failed_msg);
  21184. + }
  21185. + if (!tlocked)
  21186. + read_unlock_tree(jnode_get_tree(node));
  21187. + if (!jlocked && !tlocked)
  21188. + spin_unlock_jnode((jnode *) node);
  21189. + return result;
  21190. +}
  21191. +
  21192. +#endif /* REISER4_DEBUG */
  21193. +
  21194. +/* Make Linus happy.
  21195. + Local variables:
  21196. + c-indentation-style: "K&R"
  21197. + mode-name: "LC"
  21198. + c-basic-offset: 8
  21199. + tab-width: 8
  21200. + fill-column: 80
  21201. + End:
  21202. +*/
  21203. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/jnode.h linux-4.14.2/fs/reiser4/jnode.h
  21204. --- linux-4.14.2.orig/fs/reiser4/jnode.h 1970-01-01 01:00:00.000000000 +0100
  21205. +++ linux-4.14.2/fs/reiser4/jnode.h 2017-11-26 22:13:09.000000000 +0100
  21206. @@ -0,0 +1,704 @@
  21207. +/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
  21208. + * reiser4/README */
  21209. +
  21210. +/* Declaration of jnode. See jnode.c for details. */
  21211. +
  21212. +#ifndef __JNODE_H__
  21213. +#define __JNODE_H__
  21214. +
  21215. +#include "forward.h"
  21216. +#include "type_safe_hash.h"
  21217. +#include "txnmgr.h"
  21218. +#include "key.h"
  21219. +#include "debug.h"
  21220. +#include "dformat.h"
  21221. +#include "page_cache.h"
  21222. +#include "context.h"
  21223. +
  21224. +#include "plugin/plugin.h"
  21225. +
  21226. +#include <linux/fs.h>
  21227. +#include <linux/mm.h>
  21228. +#include <linux/spinlock.h>
  21229. +#include <asm/atomic.h>
  21230. +#include <linux/bitops.h>
  21231. +#include <linux/list.h>
  21232. +#include <linux/rcupdate.h>
  21233. +
  21234. +/* declare hash table of jnodes (jnodes proper, that is, unformatted
  21235. + nodes) */
  21236. +TYPE_SAFE_HASH_DECLARE(j, jnode);
  21237. +
  21238. +/* declare hash table of znodes */
  21239. +TYPE_SAFE_HASH_DECLARE(z, znode);
  21240. +
  21241. +struct jnode_key {
  21242. + __u64 objectid;
  21243. + unsigned long index;
  21244. + struct address_space *mapping;
  21245. +};
  21246. +
  21247. +/*
  21248. + Jnode is the "base class" of other nodes in reiser4. It is also happens to
  21249. + be exactly the node we use for unformatted tree nodes.
  21250. +
  21251. + Jnode provides following basic functionality:
  21252. +
  21253. + . reference counting and indexing.
  21254. +
  21255. + . integration with page cache. Jnode has ->pg reference to which page can
  21256. + be attached.
  21257. +
  21258. + . interface to transaction manager. It is jnode that is kept in transaction
  21259. + manager lists, attached to atoms, etc. (NOTE-NIKITA one may argue that this
  21260. + means, there should be special type of jnode for inode.)
  21261. +
  21262. + Locking:
  21263. +
  21264. + Spin lock: the following fields are protected by the per-jnode spin lock:
  21265. +
  21266. + ->state
  21267. + ->atom
  21268. + ->capture_link
  21269. +
  21270. + Following fields are protected by the global tree lock:
  21271. +
  21272. + ->link
  21273. + ->key.z (content of ->key.z is only changed in znode_rehash())
  21274. + ->key.j
  21275. +
  21276. + Atomic counters
  21277. +
  21278. + ->x_count
  21279. + ->d_count
  21280. +
  21281. + ->pg, and ->data are protected by spin lock for unused jnode and are
  21282. + immutable for used jnode (one for which fs/reiser4/vfs_ops.c:releasable()
  21283. + is false).
  21284. +
  21285. + ->tree is immutable after creation
  21286. +
  21287. + Unclear
  21288. +
  21289. + ->blocknr: should be under jnode spin-lock, but current interface is based
  21290. + on passing of block address.
  21291. +
  21292. + If you ever need to spin lock two nodes at once, do this in "natural"
  21293. + memory order: lock znode with lower address first. (See lock_two_nodes().)
  21294. +
  21295. + Invariants involving this data-type:
  21296. +
  21297. + [jnode-dirty]
  21298. + [jnode-refs]
  21299. + [jnode-oid]
  21300. + [jnode-queued]
  21301. + [jnode-atom-valid]
  21302. + [jnode-page-binding]
  21303. +*/
  21304. +
  21305. +struct jnode {
  21306. +#if REISER4_DEBUG
  21307. +#define JMAGIC 0x52654973 /* "ReIs" */
  21308. + int magic;
  21309. +#endif
  21310. + /* FIRST CACHE LINE (16 bytes): data used by jload */
  21311. +
  21312. + /* jnode's state: bitwise flags from the reiser4_jnode_state enum. */
  21313. + /* 0 */ unsigned long state;
  21314. +
  21315. + /* lock, protecting jnode's fields. */
  21316. + /* 4 */ spinlock_t load;
  21317. +
  21318. + /* counter of references to jnode itself. Increased on jref().
  21319. + Decreased on jput().
  21320. + */
  21321. + /* 8 */ atomic_t x_count;
  21322. +
  21323. + /* counter of references to jnode's data. Pin data page(s) in
  21324. + memory while this is greater than 0. Increased on jload().
  21325. + Decreased on jrelse().
  21326. + */
  21327. + /* 12 */ atomic_t d_count;
  21328. +
  21329. + /* SECOND CACHE LINE: data used by hash table lookups */
  21330. +
  21331. + /* 16 */ union {
  21332. + /* znodes are hashed by block number */
  21333. + reiser4_block_nr z;
  21334. + /* unformatted nodes are hashed by mapping plus offset */
  21335. + struct jnode_key j;
  21336. + } key;
  21337. +
  21338. + /* THIRD CACHE LINE */
  21339. +
  21340. + /* 32 */ union {
  21341. + /* pointers to maintain hash-table */
  21342. + z_hash_link z;
  21343. + j_hash_link j;
  21344. + } link;
  21345. +
  21346. + /* pointer to jnode page. */
  21347. + /* 36 */ struct page *pg;
  21348. + /* pointer to node itself. This is page_address(node->pg) when page is
  21349. + attached to the jnode
  21350. + */
  21351. + /* 40 */ void *data;
  21352. +
  21353. + /* 44 */ reiser4_tree *tree;
  21354. +
  21355. + /* FOURTH CACHE LINE: atom related fields */
  21356. +
  21357. + /* 48 */ spinlock_t guard;
  21358. +
  21359. + /* atom the block is in, if any */
  21360. + /* 52 */ txn_atom *atom;
  21361. +
  21362. + /* capture list */
  21363. + /* 56 */ struct list_head capture_link;
  21364. +
  21365. + /* FIFTH CACHE LINE */
  21366. +
  21367. + /* 64 */ struct rcu_head rcu;
  21368. + /* crosses cache line */
  21369. +
  21370. + /* SIXTH CACHE LINE */
  21371. +
  21372. + /* the real blocknr (where io is going to/from) */
  21373. + /* 80 */ reiser4_block_nr blocknr;
  21374. + /* Parent item type, unformatted and CRC need it for
  21375. + * offset => key conversion. */
  21376. + /* NOTE: this parent_item_id looks like jnode type. */
  21377. + /* 88 */ reiser4_plugin_id parent_item_id;
  21378. + /* 92 */
  21379. +#if REISER4_DEBUG
  21380. + /* list of all jnodes for debugging purposes. */
  21381. + struct list_head jnodes;
  21382. + /* how many times this jnode was written in one transaction */
  21383. + int written;
  21384. + /* this indicates which atom's list the jnode is on */
  21385. + atom_list list;
  21386. +#endif
  21387. +} __attribute__ ((aligned(16)));
  21388. +
  21389. +/*
  21390. + * jnode types. Enumeration of existing jnode types.
  21391. + */
  21392. +typedef enum {
  21393. + JNODE_UNFORMATTED_BLOCK, /* unformatted block */
  21394. + JNODE_FORMATTED_BLOCK, /* formatted block, znode */
  21395. + JNODE_BITMAP, /* bitmap */
  21396. + JNODE_IO_HEAD, /* jnode representing a block in the
  21397. + * wandering log */
  21398. + JNODE_INODE, /* jnode embedded into inode */
  21399. + LAST_JNODE_TYPE
  21400. +} jnode_type;
  21401. +
  21402. +/* jnode states */
  21403. +typedef enum {
  21404. + /* jnode's page is loaded and data checked */
  21405. + JNODE_PARSED = 0,
  21406. + /* node was deleted, not all locks on it were released. This
  21407. + node is empty and is going to be removed from the tree
  21408. + shortly. */
  21409. + JNODE_HEARD_BANSHEE = 1,
  21410. + /* left sibling pointer is valid */
  21411. + JNODE_LEFT_CONNECTED = 2,
  21412. + /* right sibling pointer is valid */
  21413. + JNODE_RIGHT_CONNECTED = 3,
  21414. +
  21415. + /* znode was just created and doesn't yet have a pointer from
  21416. + its parent */
  21417. + JNODE_ORPHAN = 4,
  21418. +
  21419. + /* this node was created by its transaction and has not been assigned
  21420. + a block address. */
  21421. + JNODE_CREATED = 5,
  21422. +
  21423. + /* this node is currently relocated */
  21424. + JNODE_RELOC = 6,
  21425. + /* this node is currently wandered */
  21426. + JNODE_OVRWR = 7,
  21427. +
  21428. + /* this znode has been modified */
  21429. + JNODE_DIRTY = 8,
  21430. +
  21431. + /* znode lock is being invalidated */
  21432. + JNODE_IS_DYING = 9,
  21433. +
  21434. + /* THIS PLACE IS INTENTIONALLY LEFT BLANK */
  21435. +
  21436. + /* jnode is queued for flushing. */
  21437. + JNODE_FLUSH_QUEUED = 12,
  21438. +
  21439. + /* In the following bits jnode type is encoded. */
  21440. + JNODE_TYPE_1 = 13,
  21441. + JNODE_TYPE_2 = 14,
  21442. + JNODE_TYPE_3 = 15,
  21443. +
  21444. + /* jnode is being destroyed */
  21445. + JNODE_RIP = 16,
  21446. +
  21447. + /* znode was not captured during locking (it might so be because
  21448. + ->level != LEAF_LEVEL and lock_mode == READ_LOCK) */
  21449. + JNODE_MISSED_IN_CAPTURE = 17,
  21450. +
  21451. + /* write is in progress */
  21452. + JNODE_WRITEBACK = 18,
  21453. +
  21454. + /* unused flag */
  21455. + JNODE_NEW = 19,
  21456. +
  21457. + /* delimiting keys are already set for this znode. */
  21458. + JNODE_DKSET = 20,
  21459. +
  21460. + /* when this bit is set page and jnode can not be disconnected */
  21461. + JNODE_WRITE_PREPARED = 21,
  21462. +
  21463. + JNODE_CLUSTER_PAGE = 22,
  21464. + /* Jnode is marked for repacking, that means the reiser4 flush and the
  21465. + * block allocator should process this node special way */
  21466. + JNODE_REPACK = 23,
  21467. + /* node should be converted by flush in squalloc phase */
  21468. + JNODE_CONVERTIBLE = 24,
  21469. + /*
  21470. + * When jnode is dirtied for the first time in given transaction,
  21471. + * do_jnode_make_dirty() checks whether this jnode can possible became
  21472. + * member of overwrite set. If so, this bit is set, and one block is
  21473. + * reserved in the ->flush_reserved space of atom.
  21474. + *
  21475. + * This block is "used" (and JNODE_FLUSH_RESERVED bit is cleared) when
  21476. + *
  21477. + * (1) flush decides that we want this block to go into relocate
  21478. + * set after all.
  21479. + *
  21480. + * (2) wandering log is allocated (by log writer)
  21481. + *
  21482. + * (3) extent is allocated
  21483. + *
  21484. + */
  21485. + JNODE_FLUSH_RESERVED = 29
  21486. +} reiser4_jnode_state;
  21487. +
  21488. +/* Macros for accessing the jnode state. */
  21489. +
  21490. +static inline void JF_CLR(jnode * j, int f)
  21491. +{
  21492. + assert("unknown-1", j->magic == JMAGIC);
  21493. + clear_bit(f, &j->state);
  21494. +}
  21495. +static inline int JF_ISSET(const jnode * j, int f)
  21496. +{
  21497. + assert("unknown-2", j->magic == JMAGIC);
  21498. + return test_bit(f, &((jnode *) j)->state);
  21499. +}
  21500. +static inline void JF_SET(jnode * j, int f)
  21501. +{
  21502. + assert("unknown-3", j->magic == JMAGIC);
  21503. + set_bit(f, &j->state);
  21504. +}
  21505. +
  21506. +static inline int JF_TEST_AND_SET(jnode * j, int f)
  21507. +{
  21508. + assert("unknown-4", j->magic == JMAGIC);
  21509. + return test_and_set_bit(f, &j->state);
  21510. +}
  21511. +
  21512. +static inline void spin_lock_jnode(jnode *node)
  21513. +{
  21514. + /* check that spinlocks of lower priorities are not held */
  21515. + assert("", (LOCK_CNT_NIL(rw_locked_tree) &&
  21516. + LOCK_CNT_NIL(spin_locked_txnh) &&
  21517. + LOCK_CNT_NIL(spin_locked_zlock) &&
  21518. + LOCK_CNT_NIL(rw_locked_dk) &&
  21519. + LOCK_CNT_LT(spin_locked_jnode, 2)));
  21520. +
  21521. + spin_lock(&(node->guard));
  21522. +
  21523. + LOCK_CNT_INC(spin_locked_jnode);
  21524. + LOCK_CNT_INC(spin_locked);
  21525. +}
  21526. +
  21527. +static inline void spin_unlock_jnode(jnode *node)
  21528. +{
  21529. + assert_spin_locked(&(node->guard));
  21530. + assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_jnode));
  21531. + assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
  21532. +
  21533. + LOCK_CNT_DEC(spin_locked_jnode);
  21534. + LOCK_CNT_DEC(spin_locked);
  21535. +
  21536. + spin_unlock(&(node->guard));
  21537. +}
  21538. +
  21539. +static inline int jnode_is_in_deleteset(const jnode * node)
  21540. +{
  21541. + return JF_ISSET(node, JNODE_RELOC);
  21542. +}
  21543. +
  21544. +extern int init_jnodes(void);
  21545. +extern void done_jnodes(void);
  21546. +
  21547. +/* Jnode routines */
  21548. +extern jnode *jalloc(void);
  21549. +extern void jfree(jnode * node) NONNULL;
  21550. +extern jnode *jclone(jnode *);
  21551. +extern jnode *jlookup(reiser4_tree * tree,
  21552. + oid_t objectid, unsigned long ind) NONNULL;
  21553. +extern jnode *jfind(struct address_space *, unsigned long index) NONNULL;
  21554. +extern jnode *jnode_by_page(struct page *pg) NONNULL;
  21555. +extern jnode *jnode_of_page(struct page *pg) NONNULL;
  21556. +void jnode_attach_page(jnode * node, struct page *pg);
  21557. +
  21558. +void unhash_unformatted_jnode(jnode *);
  21559. +extern jnode *page_next_jnode(jnode * node) NONNULL;
  21560. +extern void jnode_init(jnode * node, reiser4_tree * tree, jnode_type) NONNULL;
  21561. +extern void jnode_make_dirty(jnode * node) NONNULL;
  21562. +extern void jnode_make_clean(jnode * node) NONNULL;
  21563. +extern void jnode_make_wander_nolock(jnode * node) NONNULL;
  21564. +extern void jnode_make_wander(jnode *) NONNULL;
  21565. +extern void znode_make_reloc(znode * , flush_queue_t *) NONNULL;
  21566. +extern void unformatted_make_reloc(jnode *, flush_queue_t *) NONNULL;
  21567. +extern struct address_space *jnode_get_mapping(const jnode * node) NONNULL;
  21568. +
  21569. +/**
  21570. + * jnode_get_block
  21571. + * @node: jnode to query
  21572. + *
  21573. + */
  21574. +static inline const reiser4_block_nr *jnode_get_block(const jnode *node)
  21575. +{
  21576. + assert("nikita-528", node != NULL);
  21577. +
  21578. + return &node->blocknr;
  21579. +}
  21580. +
  21581. +/**
  21582. + * jnode_set_block
  21583. + * @node: jnode to update
  21584. + * @blocknr: new block nr
  21585. + */
  21586. +static inline void jnode_set_block(jnode *node, const reiser4_block_nr *blocknr)
  21587. +{
  21588. + assert("nikita-2020", node != NULL);
  21589. + assert("umka-055", blocknr != NULL);
  21590. + node->blocknr = *blocknr;
  21591. +}
  21592. +
  21593. +
  21594. +/* block number for IO. Usually this is the same as jnode_get_block(), unless
  21595. + * jnode was emergency flushed---then block number chosen by eflush is
  21596. + * used. */
  21597. +static inline const reiser4_block_nr *jnode_get_io_block(jnode * node)
  21598. +{
  21599. + assert("nikita-2768", node != NULL);
  21600. + assert_spin_locked(&(node->guard));
  21601. +
  21602. + return jnode_get_block(node);
  21603. +}
  21604. +
  21605. +/* Jnode flush interface. */
  21606. +extern reiser4_blocknr_hint *reiser4_pos_hint(flush_pos_t *pos);
  21607. +extern flush_queue_t *reiser4_pos_fq(flush_pos_t *pos);
  21608. +
  21609. +/* FIXME-VS: these are used in plugin/item/extent.c */
  21610. +
  21611. +/* does extent_get_block have to be called */
  21612. +#define jnode_mapped(node) JF_ISSET (node, JNODE_MAPPED)
  21613. +#define jnode_set_mapped(node) JF_SET (node, JNODE_MAPPED)
  21614. +
  21615. +/* the node should be converted during flush squalloc phase */
  21616. +#define jnode_convertible(node) JF_ISSET (node, JNODE_CONVERTIBLE)
  21617. +#define jnode_set_convertible(node) JF_SET (node, JNODE_CONVERTIBLE)
  21618. +
  21619. +/* Macros to convert from jnode to znode, znode to jnode. These are macros
  21620. + because C doesn't allow overloading of const prototypes. */
  21621. +#define ZJNODE(x) (&(x)->zjnode)
  21622. +#define JZNODE(x) \
  21623. +({ \
  21624. + typeof(x) __tmp_x; \
  21625. + \
  21626. + __tmp_x = (x); \
  21627. + assert("jmacd-1300", jnode_is_znode(__tmp_x)); \
  21628. + (znode*) __tmp_x; \
  21629. +})
  21630. +
  21631. +extern int jnodes_tree_init(reiser4_tree * tree);
  21632. +extern int jnodes_tree_done(reiser4_tree * tree);
  21633. +
  21634. +#if REISER4_DEBUG
  21635. +
  21636. +extern int znode_is_any_locked(const znode * node);
  21637. +extern void jnode_list_remove(jnode * node);
  21638. +
  21639. +#else
  21640. +
  21641. +#define jnode_list_remove(node) noop
  21642. +
  21643. +#endif
  21644. +
  21645. +int znode_is_root(const znode * node) NONNULL;
  21646. +
  21647. +/* bump reference counter on @node */
  21648. +static inline void add_x_ref(jnode * node/* node to increase x_count of */)
  21649. +{
  21650. + assert("nikita-1911", node != NULL);
  21651. +
  21652. + atomic_inc(&node->x_count);
  21653. + LOCK_CNT_INC(x_refs);
  21654. +}
  21655. +
  21656. +static inline void dec_x_ref(jnode * node)
  21657. +{
  21658. + assert("nikita-3215", node != NULL);
  21659. + assert("nikita-3216", atomic_read(&node->x_count) > 0);
  21660. +
  21661. + atomic_dec(&node->x_count);
  21662. + assert("nikita-3217", LOCK_CNT_GTZ(x_refs));
  21663. + LOCK_CNT_DEC(x_refs);
  21664. +}
  21665. +
  21666. +/* jref() - increase counter of references to jnode/znode (x_count) */
  21667. +static inline jnode *jref(jnode * node)
  21668. +{
  21669. + assert("jmacd-508", (node != NULL) && !IS_ERR(node));
  21670. + add_x_ref(node);
  21671. + return node;
  21672. +}
  21673. +
  21674. +/* get the page of jnode */
  21675. +static inline struct page *jnode_page(const jnode * node)
  21676. +{
  21677. + return node->pg;
  21678. +}
  21679. +
  21680. +/* return pointer to jnode data */
  21681. +static inline char *jdata(const jnode * node)
  21682. +{
  21683. + assert("nikita-1415", node != NULL);
  21684. + assert("nikita-3198", jnode_page(node) != NULL);
  21685. + return node->data;
  21686. +}
  21687. +
  21688. +static inline int jnode_is_loaded(const jnode * node)
  21689. +{
  21690. + assert("zam-506", node != NULL);
  21691. + return atomic_read(&node->d_count) > 0;
  21692. +}
  21693. +
  21694. +extern void page_clear_jnode(struct page *page, jnode * node) NONNULL;
  21695. +
  21696. +static inline void jnode_set_reloc(jnode * node)
  21697. +{
  21698. + assert("nikita-2431", node != NULL);
  21699. + assert("nikita-2432", !JF_ISSET(node, JNODE_OVRWR));
  21700. + JF_SET(node, JNODE_RELOC);
  21701. +}
  21702. +
  21703. +/* jload/jwrite/junload give a bread/bwrite/brelse functionality for jnodes */
  21704. +
  21705. +extern int jload_gfp(jnode *, gfp_t, int do_kmap) NONNULL;
  21706. +
  21707. +static inline int jload(jnode *node)
  21708. +{
  21709. + return jload_gfp(node, reiser4_ctx_gfp_mask_get(), 1);
  21710. +}
  21711. +
  21712. +extern int jinit_new(jnode *, gfp_t) NONNULL;
  21713. +extern int jstartio(jnode *) NONNULL;
  21714. +
  21715. +extern void jdrop(jnode *) NONNULL;
  21716. +extern int jwait_io(jnode *, int rw) NONNULL;
  21717. +
  21718. +void jload_prefetch(jnode *);
  21719. +
  21720. +extern jnode *reiser4_alloc_io_head(const reiser4_block_nr * block) NONNULL;
  21721. +extern void reiser4_drop_io_head(jnode * node) NONNULL;
  21722. +
  21723. +static inline reiser4_tree *jnode_get_tree(const jnode * node)
  21724. +{
  21725. + assert("nikita-2691", node != NULL);
  21726. + return node->tree;
  21727. +}
  21728. +
  21729. +extern void pin_jnode_data(jnode *);
  21730. +extern void unpin_jnode_data(jnode *);
  21731. +
  21732. +static inline jnode_type jnode_get_type(const jnode * node)
  21733. +{
  21734. + static const unsigned long state_mask =
  21735. + (1 << JNODE_TYPE_1) | (1 << JNODE_TYPE_2) | (1 << JNODE_TYPE_3);
  21736. +
  21737. + static jnode_type mask_to_type[] = {
  21738. + /* JNODE_TYPE_3 : JNODE_TYPE_2 : JNODE_TYPE_1 */
  21739. +
  21740. + /* 000 */
  21741. + [0] = JNODE_FORMATTED_BLOCK,
  21742. + /* 001 */
  21743. + [1] = JNODE_UNFORMATTED_BLOCK,
  21744. + /* 010 */
  21745. + [2] = JNODE_BITMAP,
  21746. + /* 011 */
  21747. + [3] = LAST_JNODE_TYPE, /*invalid */
  21748. + /* 100 */
  21749. + [4] = JNODE_INODE,
  21750. + /* 101 */
  21751. + [5] = LAST_JNODE_TYPE,
  21752. + /* 110 */
  21753. + [6] = JNODE_IO_HEAD,
  21754. + /* 111 */
  21755. + [7] = LAST_JNODE_TYPE, /* invalid */
  21756. + };
  21757. +
  21758. + return mask_to_type[(node->state & state_mask) >> JNODE_TYPE_1];
  21759. +}
  21760. +
  21761. +/* returns true if node is a znode */
  21762. +static inline int jnode_is_znode(const jnode * node)
  21763. +{
  21764. + return jnode_get_type(node) == JNODE_FORMATTED_BLOCK;
  21765. +}
  21766. +
  21767. +static inline int jnode_is_flushprepped(jnode * node)
  21768. +{
  21769. + assert("jmacd-78212", node != NULL);
  21770. + assert_spin_locked(&(node->guard));
  21771. + return !JF_ISSET(node, JNODE_DIRTY) || JF_ISSET(node, JNODE_RELOC) ||
  21772. + JF_ISSET(node, JNODE_OVRWR);
  21773. +}
  21774. +
  21775. +/* Return true if @node has already been processed by the squeeze and allocate
  21776. + process. This implies the block address has been finalized for the
  21777. + duration of this atom (or it is clean and will remain in place). If this
  21778. + returns true you may use the block number as a hint. */
  21779. +static inline int jnode_check_flushprepped(jnode * node)
  21780. +{
  21781. + int result;
  21782. +
  21783. + /* It must be clean or relocated or wandered. New allocations are set
  21784. + * to relocate. */
  21785. + spin_lock_jnode(node);
  21786. + result = jnode_is_flushprepped(node);
  21787. + spin_unlock_jnode(node);
  21788. + return result;
  21789. +}
  21790. +
  21791. +/* returns true if node is unformatted */
  21792. +static inline int jnode_is_unformatted(const jnode * node)
  21793. +{
  21794. + assert("jmacd-0123", node != NULL);
  21795. + return jnode_get_type(node) == JNODE_UNFORMATTED_BLOCK;
  21796. +}
  21797. +
  21798. +/* returns true if node represents a cluster cache page */
  21799. +static inline int jnode_is_cluster_page(const jnode * node)
  21800. +{
  21801. + assert("edward-50", node != NULL);
  21802. + return (JF_ISSET(node, JNODE_CLUSTER_PAGE));
  21803. +}
  21804. +
  21805. +/* returns true is node is builtin inode's jnode */
  21806. +static inline int jnode_is_inode(const jnode * node)
  21807. +{
  21808. + assert("vs-1240", node != NULL);
  21809. + return jnode_get_type(node) == JNODE_INODE;
  21810. +}
  21811. +
  21812. +static inline jnode_plugin *jnode_ops_of(const jnode_type type)
  21813. +{
  21814. + assert("nikita-2367", type < LAST_JNODE_TYPE);
  21815. + return jnode_plugin_by_id((reiser4_plugin_id) type);
  21816. +}
  21817. +
  21818. +static inline jnode_plugin *jnode_ops(const jnode * node)
  21819. +{
  21820. + assert("nikita-2366", node != NULL);
  21821. +
  21822. + return jnode_ops_of(jnode_get_type(node));
  21823. +}
  21824. +
  21825. +/* Get the index of a block. */
  21826. +static inline unsigned long jnode_get_index(jnode * node)
  21827. +{
  21828. + return jnode_ops(node)->index(node);
  21829. +}
  21830. +
  21831. +/* return true if "node" is the root */
  21832. +static inline int jnode_is_root(const jnode * node)
  21833. +{
  21834. + return jnode_is_znode(node) && znode_is_root(JZNODE(node));
  21835. +}
  21836. +
  21837. +extern struct address_space *mapping_jnode(const jnode * node);
  21838. +extern unsigned long index_jnode(const jnode * node);
  21839. +
  21840. +static inline void jput(jnode * node);
  21841. +extern void jput_final(jnode * node);
  21842. +
  21843. +/* bump data counter on @node */
  21844. +static inline void add_d_ref(jnode * node/* node to increase d_count of */)
  21845. +{
  21846. + assert("nikita-1962", node != NULL);
  21847. +
  21848. + atomic_inc(&node->d_count);
  21849. + if (jnode_is_unformatted(node) || jnode_is_znode(node))
  21850. + LOCK_CNT_INC(d_refs);
  21851. +}
  21852. +
  21853. +/* jput() - decrement x_count reference counter on znode.
  21854. +
  21855. + Count may drop to 0, jnode stays in cache until memory pressure causes the
  21856. + eviction of its page. The c_count variable also ensures that children are
  21857. + pressured out of memory before the parent. The jnode remains hashed as
  21858. + long as the VM allows its page to stay in memory.
  21859. +*/
  21860. +static inline void jput(jnode * node)
  21861. +{
  21862. + assert("jmacd-509", node != NULL);
  21863. + assert("jmacd-510", atomic_read(&node->x_count) > 0);
  21864. + assert("zam-926", reiser4_schedulable());
  21865. + LOCK_CNT_DEC(x_refs);
  21866. +
  21867. + rcu_read_lock();
  21868. + /*
  21869. + * we don't need any kind of lock here--jput_final() uses RCU.
  21870. + */
  21871. + if (unlikely(atomic_dec_and_test(&node->x_count)))
  21872. + jput_final(node);
  21873. + else
  21874. + rcu_read_unlock();
  21875. + assert("nikita-3473", reiser4_schedulable());
  21876. +}
  21877. +
  21878. +extern void jrelse(jnode * node);
  21879. +extern void jrelse_tail(jnode * node);
  21880. +
  21881. +extern jnode *jnode_rip_sync(reiser4_tree * t, jnode * node);
  21882. +
  21883. +/* resolve race with jput */
  21884. +static inline jnode *jnode_rip_check(reiser4_tree * tree, jnode * node)
  21885. +{
  21886. + if (unlikely(JF_ISSET(node, JNODE_RIP)))
  21887. + node = jnode_rip_sync(tree, node);
  21888. + return node;
  21889. +}
  21890. +
  21891. +extern reiser4_key *jnode_build_key(const jnode *node, reiser4_key * key);
  21892. +
  21893. +#if REISER4_DEBUG
  21894. +extern int jnode_invariant_f(const jnode *node, char const **msg);
  21895. +#endif
  21896. +
  21897. +extern jnode_plugin jnode_plugins[LAST_JNODE_TYPE];
  21898. +
  21899. +/* __JNODE_H__ */
  21900. +#endif
  21901. +
  21902. +/* Make Linus happy.
  21903. + Local variables:
  21904. + c-indentation-style: "K&R"
  21905. + mode-name: "LC"
  21906. + c-basic-offset: 8
  21907. + tab-width: 8
  21908. + fill-column: 120
  21909. + End:
  21910. +*/
  21911. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/kassign.c linux-4.14.2/fs/reiser4/kassign.c
  21912. --- linux-4.14.2.orig/fs/reiser4/kassign.c 1970-01-01 01:00:00.000000000 +0100
  21913. +++ linux-4.14.2/fs/reiser4/kassign.c 2017-11-26 22:13:09.000000000 +0100
  21914. @@ -0,0 +1,677 @@
  21915. +/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
  21916. + * reiser4/README */
  21917. +
  21918. +/* Key assignment policy implementation */
  21919. +
  21920. +/*
  21921. + * In reiser4 every piece of file system data and meta-data has a key. Keys
  21922. + * are used to store information in and retrieve it from reiser4 internal
  21923. + * tree. In addition to this, keys define _ordering_ of all file system
  21924. + * information: things having close keys are placed into the same or
  21925. + * neighboring (in the tree order) nodes of the tree. As our block allocator
  21926. + * tries to respect tree order (see flush.c), keys also define order in which
  21927. + * things are laid out on the disk, and hence, affect performance directly.
  21928. + *
  21929. + * Obviously, assignment of keys to data and meta-data should be consistent
  21930. + * across whole file system. Algorithm that calculates a key for a given piece
  21931. + * of data or meta-data is referred to as "key assignment".
  21932. + *
  21933. + * Key assignment is too expensive to be implemented as a plugin (that is,
  21934. + * with an ability to support different key assignment schemas in the same
  21935. + * compiled kernel image). As a compromise, all key-assignment functions and
  21936. + * data-structures are collected in this single file, so that modifications to
  21937. + * key assignment algorithm can be localized. Additional changes may be
  21938. + * required in key.[ch].
  21939. + *
  21940. + * Current default reiser4 key assignment algorithm is dubbed "Plan A". As one
  21941. + * may guess, there is "Plan B" too.
  21942. + *
  21943. + */
  21944. +
  21945. +/*
  21946. + * Additional complication with key assignment implementation is a requirement
  21947. + * to support different key length.
  21948. + */
  21949. +
  21950. +/*
  21951. + * KEY ASSIGNMENT: PLAN A, LONG KEYS.
  21952. + *
  21953. + * DIRECTORY ITEMS
  21954. + *
  21955. + * | 60 | 4 | 7 |1| 56 | 64 | 64 |
  21956. + * +--------------+---+---+-+-------------+------------------+-----------------+
  21957. + * | dirid | 0 | F |H| prefix-1 | prefix-2 | prefix-3/hash |
  21958. + * +--------------+---+---+-+-------------+------------------+-----------------+
  21959. + * | | | | |
  21960. + * | 8 bytes | 8 bytes | 8 bytes | 8 bytes |
  21961. + *
  21962. + * dirid objectid of directory this item is for
  21963. + *
  21964. + * F fibration, see fs/reiser4/plugin/fibration.[ch]
  21965. + *
  21966. + * H 1 if last 8 bytes of the key contain hash,
  21967. + * 0 if last 8 bytes of the key contain prefix-3
  21968. + *
  21969. + * prefix-1 first 7 characters of file name.
  21970. + * Padded by zeroes if name is not long enough.
  21971. + *
  21972. + * prefix-2 next 8 characters of the file name.
  21973. + *
  21974. + * prefix-3 next 8 characters of the file name.
  21975. + *
  21976. + * hash hash of the rest of file name (i.e., portion of file
  21977. + * name not included into prefix-1 and prefix-2).
  21978. + *
  21979. + * File names shorter than 23 (== 7 + 8 + 8) characters are completely encoded
  21980. + * in the key. Such file names are called "short". They are distinguished by H
  21981. + * bit set 0 in the key.
  21982. + *
  21983. + * Other file names are "long". For long name, H bit is 1, and first 15 (== 7
  21984. + * + 8) characters are encoded in prefix-1 and prefix-2 portions of the
  21985. + * key. Last 8 bytes of the key are occupied by hash of the remaining
  21986. + * characters of the name.
  21987. + *
  21988. + * This key assignment reaches following important goals:
  21989. + *
  21990. + * (1) directory entries are sorted in approximately lexicographical
  21991. + * order.
  21992. + *
  21993. + * (2) collisions (when multiple directory items have the same key), while
  21994. + * principally unavoidable in a tree with fixed length keys, are rare.
  21995. + *
  21996. + * STAT DATA
  21997. + *
  21998. + * | 60 | 4 | 64 | 4 | 60 | 64 |
  21999. + * +--------------+---+-----------------+---+--------------+-----------------+
  22000. + * | locality id | 1 | ordering | 0 | objectid | 0 |
  22001. + * +--------------+---+-----------------+---+--------------+-----------------+
  22002. + * | | | | |
  22003. + * | 8 bytes | 8 bytes | 8 bytes | 8 bytes |
  22004. + *
  22005. + * locality id object id of a directory where first name was created for
  22006. + * the object
  22007. + *
  22008. + * ordering copy of second 8-byte portion of the key of directory
  22009. + * entry for the first name of this object. Ordering has a form
  22010. + * {
  22011. + * fibration :7;
  22012. + * h :1;
  22013. + * prefix1 :56;
  22014. + * }
  22015. + * see description of key for directory entry above.
  22016. + *
  22017. + * objectid object id for this object
  22018. + *
  22019. + * This key assignment policy is designed to keep stat-data in the same order
  22020. + * as corresponding directory items, thus speeding up readdir/stat types of
  22021. + * workload.
  22022. + *
  22023. + * FILE BODY
  22024. + *
  22025. + * | 60 | 4 | 64 | 4 | 60 | 64 |
  22026. + * +--------------+---+-----------------+---+--------------+-----------------+
  22027. + * | locality id | 4 | ordering | 0 | objectid | offset |
  22028. + * +--------------+---+-----------------+---+--------------+-----------------+
  22029. + * | | | | |
  22030. + * | 8 bytes | 8 bytes | 8 bytes | 8 bytes |
  22031. + *
  22032. + * locality id object id of a directory where first name was created for
  22033. + * the object
  22034. + *
  22035. + * ordering the same as in the key of stat-data for this object
  22036. + *
  22037. + * objectid object id for this object
  22038. + *
  22039. + * offset logical offset from the beginning of this file.
  22040. + * Measured in bytes.
  22041. + *
  22042. + *
  22043. + * KEY ASSIGNMENT: PLAN A, SHORT KEYS.
  22044. + *
  22045. + * DIRECTORY ITEMS
  22046. + *
  22047. + * | 60 | 4 | 7 |1| 56 | 64 |
  22048. + * +--------------+---+---+-+-------------+-----------------+
  22049. + * | dirid | 0 | F |H| prefix-1 | prefix-2/hash |
  22050. + * +--------------+---+---+-+-------------+-----------------+
  22051. + * | | | |
  22052. + * | 8 bytes | 8 bytes | 8 bytes |
  22053. + *
  22054. + * dirid objectid of directory this item is for
  22055. + *
  22056. + * F fibration, see fs/reiser4/plugin/fibration.[ch]
  22057. + *
  22058. + * H 1 if last 8 bytes of the key contain hash,
  22059. + * 0 if last 8 bytes of the key contain prefix-2
  22060. + *
  22061. + * prefix-1 first 7 characters of file name.
  22062. + * Padded by zeroes if name is not long enough.
  22063. + *
  22064. + * prefix-2 next 8 characters of the file name.
  22065. + *
  22066. + * hash hash of the rest of file name (i.e., portion of file
  22067. + * name not included into prefix-1).
  22068. + *
  22069. + * File names shorter than 15 (== 7 + 8) characters are completely encoded in
  22070. + * the key. Such file names are called "short". They are distinguished by H
  22071. + * bit set in the key.
  22072. + *
  22073. + * Other file names are "long". For long name, H bit is 0, and first 7
  22074. + * characters are encoded in prefix-1 portion of the key. Last 8 bytes of the
  22075. + * key are occupied by hash of the remaining characters of the name.
  22076. + *
  22077. + * STAT DATA
  22078. + *
  22079. + * | 60 | 4 | 4 | 60 | 64 |
  22080. + * +--------------+---+---+--------------+-----------------+
  22081. + * | locality id | 1 | 0 | objectid | 0 |
  22082. + * +--------------+---+---+--------------+-----------------+
  22083. + * | | | |
  22084. + * | 8 bytes | 8 bytes | 8 bytes |
  22085. + *
  22086. + * locality id object id of a directory where first name was created for
  22087. + * the object
  22088. + *
  22089. + * objectid object id for this object
  22090. + *
  22091. + * FILE BODY
  22092. + *
  22093. + * | 60 | 4 | 4 | 60 | 64 |
  22094. + * +--------------+---+---+--------------+-----------------+
  22095. + * | locality id | 4 | 0 | objectid | offset |
  22096. + * +--------------+---+---+--------------+-----------------+
  22097. + * | | | |
  22098. + * | 8 bytes | 8 bytes | 8 bytes |
  22099. + *
  22100. + * locality id object id of a directory where first name was created for
  22101. + * the object
  22102. + *
  22103. + * objectid object id for this object
  22104. + *
  22105. + * offset logical offset from the beginning of this file.
  22106. + * Measured in bytes.
  22107. + *
  22108. + *
  22109. + */
  22110. +
  22111. +#include "debug.h"
  22112. +#include "key.h"
  22113. +#include "kassign.h"
  22114. +#include "vfs_ops.h"
  22115. +#include "inode.h"
  22116. +#include "super.h"
  22117. +#include "dscale.h"
  22118. +
  22119. +#include <linux/types.h> /* for __u?? */
  22120. +#include <linux/fs.h> /* for struct super_block, etc */
  22121. +
  22122. +/* bitmask for H bit (see comment at the beginning of this file */
  22123. +static const __u64 longname_mark = 0x0100000000000000ull;
  22124. +/* bitmask for F and H portions of the key. */
  22125. +static const __u64 fibration_mask = 0xff00000000000000ull;
  22126. +
  22127. +/* return true if name is not completely encoded in @key */
  22128. +int is_longname_key(const reiser4_key * key)
  22129. +{
  22130. + __u64 highpart;
  22131. +
  22132. + assert("nikita-2863", key != NULL);
  22133. + if (get_key_type(key) != KEY_FILE_NAME_MINOR)
  22134. + reiser4_print_key("oops", key);
  22135. + assert("nikita-2864", get_key_type(key) == KEY_FILE_NAME_MINOR);
  22136. +
  22137. + if (REISER4_LARGE_KEY)
  22138. + highpart = get_key_ordering(key);
  22139. + else
  22140. + highpart = get_key_objectid(key);
  22141. +
  22142. + return (highpart & longname_mark) ? 1 : 0;
  22143. +}
  22144. +
  22145. +/* return true if @name is too long to be completely encoded in the key */
  22146. +int is_longname(const char *name UNUSED_ARG, int len)
  22147. +{
  22148. + if (REISER4_LARGE_KEY)
  22149. + return len > 23;
  22150. + else
  22151. + return len > 15;
  22152. +}
  22153. +
  22154. +/* code ascii string into __u64.
  22155. +
  22156. + Put characters of @name into result (@str) one after another starting
  22157. + from @start_idx-th highest (arithmetically) byte. This produces
  22158. + endian-safe encoding. memcpy(2) will not do.
  22159. +
  22160. +*/
  22161. +static __u64 pack_string(const char *name /* string to encode */ ,
  22162. + int start_idx /* highest byte in result from
  22163. + * which to start encoding */ )
  22164. +{
  22165. + unsigned i;
  22166. + __u64 str;
  22167. +
  22168. + str = 0;
  22169. + for (i = 0; (i < sizeof str - start_idx) && name[i]; ++i) {
  22170. + str <<= 8;
  22171. + str |= (unsigned char)name[i];
  22172. + }
  22173. + str <<= (sizeof str - i - start_idx) << 3;
  22174. + return str;
  22175. +}
  22176. +
  22177. +/* opposite to pack_string(). Takes value produced by pack_string(), restores
  22178. + * string encoded in it and stores result in @buf */
  22179. +char *reiser4_unpack_string(__u64 value, char *buf)
  22180. +{
  22181. + do {
  22182. + *buf = value >> (64 - 8);
  22183. + if (*buf)
  22184. + ++buf;
  22185. + value <<= 8;
  22186. + } while (value != 0);
  22187. + *buf = 0;
  22188. + return buf;
  22189. +}
  22190. +
  22191. +/* obtain name encoded in @key and store it in @buf */
  22192. +char *extract_name_from_key(const reiser4_key * key, char *buf)
  22193. +{
  22194. + char *c;
  22195. +
  22196. + assert("nikita-2868", !is_longname_key(key));
  22197. +
  22198. + c = buf;
  22199. + if (REISER4_LARGE_KEY) {
  22200. + c = reiser4_unpack_string(get_key_ordering(key) &
  22201. + ~fibration_mask, c);
  22202. + c = reiser4_unpack_string(get_key_fulloid(key), c);
  22203. + } else
  22204. + c = reiser4_unpack_string(get_key_fulloid(key) &
  22205. + ~fibration_mask, c);
  22206. + reiser4_unpack_string(get_key_offset(key), c);
  22207. + return buf;
  22208. +}
  22209. +
  22210. +/**
  22211. + * complete_entry_key - calculate entry key by name
  22212. + * @dir: directory where entry is (or will be) in
  22213. + * @name: name to calculate key of
  22214. + * @len: lenth of name
  22215. + * @result: place to store result in
  22216. + *
  22217. + * Sets fields of entry key @result which depend on file name.
  22218. + * When REISER4_LARGE_KEY is defined three fields of @result are set: ordering,
  22219. + * objectid and offset. Otherwise, objectid and offset are set.
  22220. + */
  22221. +void complete_entry_key(const struct inode *dir, const char *name,
  22222. + int len, reiser4_key *result)
  22223. +{
  22224. +#if REISER4_LARGE_KEY
  22225. + __u64 ordering;
  22226. + __u64 objectid;
  22227. + __u64 offset;
  22228. +
  22229. + assert("nikita-1139", dir != NULL);
  22230. + assert("nikita-1142", result != NULL);
  22231. + assert("nikita-2867", strlen(name) == len);
  22232. +
  22233. + /*
  22234. + * key allocation algorithm for directory entries in case of large
  22235. + * keys:
  22236. + *
  22237. + * If name is not longer than 7 + 8 + 8 = 23 characters, put first 7
  22238. + * characters into ordering field of key, next 8 charactes (if any)
  22239. + * into objectid field of key and next 8 ones (of any) into offset
  22240. + * field of key
  22241. + *
  22242. + * If file name is longer than 23 characters, put first 7 characters
  22243. + * into key's ordering, next 8 to objectid and hash of remaining
  22244. + * characters into offset field.
  22245. + *
  22246. + * To distinguish above cases, in latter set up unused high bit in
  22247. + * ordering field.
  22248. + */
  22249. +
  22250. + /* [0-6] characters to ordering */
  22251. + ordering = pack_string(name, 1);
  22252. + if (len > 7) {
  22253. + /* [7-14] characters to objectid */
  22254. + objectid = pack_string(name + 7, 0);
  22255. + if (len > 15) {
  22256. + if (len <= 23) {
  22257. + /* [15-23] characters to offset */
  22258. + offset = pack_string(name + 15, 0);
  22259. + } else {
  22260. + /* note in a key the fact that offset contains
  22261. + * hash */
  22262. + ordering |= longname_mark;
  22263. +
  22264. + /* offset is the hash of the file name's tail */
  22265. + offset = inode_hash_plugin(dir)->hash(name + 15,
  22266. + len - 15);
  22267. + }
  22268. + } else {
  22269. + offset = 0ull;
  22270. + }
  22271. + } else {
  22272. + objectid = 0ull;
  22273. + offset = 0ull;
  22274. + }
  22275. +
  22276. + assert("nikita-3480", inode_fibration_plugin(dir) != NULL);
  22277. + ordering |= inode_fibration_plugin(dir)->fibre(dir, name, len);
  22278. +
  22279. + set_key_ordering(result, ordering);
  22280. + set_key_fulloid(result, objectid);
  22281. + set_key_offset(result, offset);
  22282. + return;
  22283. +
  22284. +#else
  22285. + __u64 objectid;
  22286. + __u64 offset;
  22287. +
  22288. + assert("nikita-1139", dir != NULL);
  22289. + assert("nikita-1142", result != NULL);
  22290. + assert("nikita-2867", strlen(name) == len);
  22291. +
  22292. + /*
  22293. + * key allocation algorithm for directory entries in case of not large
  22294. + * keys:
  22295. + *
  22296. + * If name is not longer than 7 + 8 = 15 characters, put first 7
  22297. + * characters into objectid field of key, next 8 charactes (if any)
  22298. + * into offset field of key
  22299. + *
  22300. + * If file name is longer than 15 characters, put first 7 characters
  22301. + * into key's objectid, and hash of remaining characters into offset
  22302. + * field.
  22303. + *
  22304. + * To distinguish above cases, in latter set up unused high bit in
  22305. + * objectid field.
  22306. + */
  22307. +
  22308. + /* [0-6] characters to objectid */
  22309. + objectid = pack_string(name, 1);
  22310. + if (len > 7) {
  22311. + if (len <= 15) {
  22312. + /* [7-14] characters to offset */
  22313. + offset = pack_string(name + 7, 0);
  22314. + } else {
  22315. + /* note in a key the fact that offset contains hash. */
  22316. + objectid |= longname_mark;
  22317. +
  22318. + /* offset is the hash of the file name. */
  22319. + offset = inode_hash_plugin(dir)->hash(name + 7,
  22320. + len - 7);
  22321. + }
  22322. + } else
  22323. + offset = 0ull;
  22324. +
  22325. + assert("nikita-3480", inode_fibration_plugin(dir) != NULL);
  22326. + objectid |= inode_fibration_plugin(dir)->fibre(dir, name, len);
  22327. +
  22328. + set_key_fulloid(result, objectid);
  22329. + set_key_offset(result, offset);
  22330. + return;
  22331. +#endif /* ! REISER4_LARGE_KEY */
  22332. +}
  22333. +
  22334. +/* true, if @key is the key of "." */
  22335. +int is_dot_key(const reiser4_key * key/* key to check */)
  22336. +{
  22337. + assert("nikita-1717", key != NULL);
  22338. + assert("nikita-1718", get_key_type(key) == KEY_FILE_NAME_MINOR);
  22339. + return
  22340. + (get_key_ordering(key) == 0ull) &&
  22341. + (get_key_objectid(key) == 0ull) && (get_key_offset(key) == 0ull);
  22342. +}
  22343. +
  22344. +/* build key for stat-data.
  22345. +
  22346. + return key of stat-data of this object. This should became sd plugin
  22347. + method in the future. For now, let it be here.
  22348. +
  22349. +*/
  22350. +reiser4_key *build_sd_key(const struct inode *target /* inode of an object */ ,
  22351. + reiser4_key * result /* resulting key of @target
  22352. + stat-data */ )
  22353. +{
  22354. + assert("nikita-261", result != NULL);
  22355. +
  22356. + reiser4_key_init(result);
  22357. + set_key_locality(result, reiser4_inode_data(target)->locality_id);
  22358. + set_key_ordering(result, get_inode_ordering(target));
  22359. + set_key_objectid(result, get_inode_oid(target));
  22360. + set_key_type(result, KEY_SD_MINOR);
  22361. + set_key_offset(result, (__u64) 0);
  22362. + return result;
  22363. +}
  22364. +
  22365. +/* encode part of key into &obj_key_id
  22366. +
  22367. + This encodes into @id part of @key sufficient to restore @key later,
  22368. + given that latter is key of object (key of stat-data).
  22369. +
  22370. + See &obj_key_id
  22371. +*/
  22372. +int build_obj_key_id(const reiser4_key * key /* key to encode */ ,
  22373. + obj_key_id * id/* id where key is encoded in */)
  22374. +{
  22375. + assert("nikita-1151", key != NULL);
  22376. + assert("nikita-1152", id != NULL);
  22377. +
  22378. + memcpy(id, key, sizeof *id);
  22379. + return 0;
  22380. +}
  22381. +
  22382. +/* encode reference to @obj in @id.
  22383. +
  22384. + This is like build_obj_key_id() above, but takes inode as parameter. */
  22385. +int build_inode_key_id(const struct inode *obj /* object to build key of */ ,
  22386. + obj_key_id * id/* result */)
  22387. +{
  22388. + reiser4_key sdkey;
  22389. +
  22390. + assert("nikita-1166", obj != NULL);
  22391. + assert("nikita-1167", id != NULL);
  22392. +
  22393. + build_sd_key(obj, &sdkey);
  22394. + build_obj_key_id(&sdkey, id);
  22395. + return 0;
  22396. +}
  22397. +
  22398. +/* decode @id back into @key
  22399. +
  22400. + Restore key of object stat-data from @id. This is dual to
  22401. + build_obj_key_id() above.
  22402. +*/
  22403. +int extract_key_from_id(const obj_key_id * id /* object key id to extract key
  22404. + * from */ ,
  22405. + reiser4_key * key/* result */)
  22406. +{
  22407. + assert("nikita-1153", id != NULL);
  22408. + assert("nikita-1154", key != NULL);
  22409. +
  22410. + reiser4_key_init(key);
  22411. + memcpy(key, id, sizeof *id);
  22412. + return 0;
  22413. +}
  22414. +
  22415. +/* extract objectid of directory from key of directory entry within said
  22416. + directory.
  22417. + */
  22418. +oid_t extract_dir_id_from_key(const reiser4_key * de_key /* key of
  22419. + * directory
  22420. + * entry */ )
  22421. +{
  22422. + assert("nikita-1314", de_key != NULL);
  22423. + return get_key_locality(de_key);
  22424. +}
  22425. +
  22426. +/* encode into @id key of directory entry.
  22427. +
  22428. + Encode into @id information sufficient to later distinguish directory
  22429. + entries within the same directory. This is not whole key, because all
  22430. + directory entries within directory item share locality which is equal
  22431. + to objectid of their directory.
  22432. +
  22433. +*/
  22434. +int build_de_id(const struct inode *dir /* inode of directory */ ,
  22435. + const struct qstr *name /* name to be given to @obj by
  22436. + * directory entry being
  22437. + * constructed */ ,
  22438. + de_id * id/* short key of directory entry */)
  22439. +{
  22440. + reiser4_key key;
  22441. +
  22442. + assert("nikita-1290", dir != NULL);
  22443. + assert("nikita-1292", id != NULL);
  22444. +
  22445. + /* NOTE-NIKITA this is suboptimal. */
  22446. + inode_dir_plugin(dir)->build_entry_key(dir, name, &key);
  22447. + return build_de_id_by_key(&key, id);
  22448. +}
  22449. +
  22450. +/* encode into @id key of directory entry.
  22451. +
  22452. + Encode into @id information sufficient to later distinguish directory
  22453. + entries within the same directory. This is not whole key, because all
  22454. + directory entries within directory item share locality which is equal
  22455. + to objectid of their directory.
  22456. +
  22457. +*/
  22458. +int build_de_id_by_key(const reiser4_key * entry_key /* full key of directory
  22459. + * entry */ ,
  22460. + de_id * id/* short key of directory entry */)
  22461. +{
  22462. + memcpy(id, ((__u64 *) entry_key) + 1, sizeof *id);
  22463. + return 0;
  22464. +}
  22465. +
  22466. +/* restore from @id key of directory entry.
  22467. +
  22468. + Function dual to build_de_id(): given @id and locality, build full
  22469. + key of directory entry within directory item.
  22470. +
  22471. +*/
  22472. +int extract_key_from_de_id(const oid_t locality /* locality of directory
  22473. + * entry */ ,
  22474. + const de_id * id /* directory entry id */ ,
  22475. + reiser4_key * key/* result */)
  22476. +{
  22477. + /* no need to initialise key here: all fields are overwritten */
  22478. + memcpy(((__u64 *) key) + 1, id, sizeof *id);
  22479. + set_key_locality(key, locality);
  22480. + set_key_type(key, KEY_FILE_NAME_MINOR);
  22481. + return 0;
  22482. +}
  22483. +
  22484. +/* compare two &de_id's */
  22485. +cmp_t de_id_cmp(const de_id * id1 /* first &de_id to compare */ ,
  22486. + const de_id * id2/* second &de_id to compare */)
  22487. +{
  22488. + /* NOTE-NIKITA ugly implementation */
  22489. + reiser4_key k1;
  22490. + reiser4_key k2;
  22491. +
  22492. + extract_key_from_de_id((oid_t) 0, id1, &k1);
  22493. + extract_key_from_de_id((oid_t) 0, id2, &k2);
  22494. + return keycmp(&k1, &k2);
  22495. +}
  22496. +
  22497. +/* compare &de_id with key */
  22498. +cmp_t de_id_key_cmp(const de_id * id /* directory entry id to compare */ ,
  22499. + const reiser4_key * key/* key to compare */)
  22500. +{
  22501. + cmp_t result;
  22502. + reiser4_key *k1;
  22503. +
  22504. + k1 = (reiser4_key *) (((unsigned long)id) - sizeof key->el[0]);
  22505. + result = KEY_DIFF_EL(k1, key, 1);
  22506. + if (result == EQUAL_TO) {
  22507. + result = KEY_DIFF_EL(k1, key, 2);
  22508. + if (REISER4_LARGE_KEY && result == EQUAL_TO)
  22509. + result = KEY_DIFF_EL(k1, key, 3);
  22510. + }
  22511. + return result;
  22512. +}
  22513. +
  22514. +/*
  22515. + * return number of bytes necessary to encode @inode identity.
  22516. + */
  22517. +int inode_onwire_size(const struct inode *inode)
  22518. +{
  22519. + int result;
  22520. +
  22521. + result = dscale_bytes_to_write(get_inode_oid(inode));
  22522. + result += dscale_bytes_to_write(get_inode_locality(inode));
  22523. +
  22524. + /*
  22525. + * ordering is large (it usually has highest bits set), so it makes
  22526. + * little sense to dscale it.
  22527. + */
  22528. + if (REISER4_LARGE_KEY)
  22529. + result += sizeof(get_inode_ordering(inode));
  22530. + return result;
  22531. +}
  22532. +
  22533. +/*
  22534. + * encode @inode identity at @start
  22535. + */
  22536. +char *build_inode_onwire(const struct inode *inode, char *start)
  22537. +{
  22538. + start += dscale_write(start, get_inode_locality(inode));
  22539. + start += dscale_write(start, get_inode_oid(inode));
  22540. +
  22541. + if (REISER4_LARGE_KEY) {
  22542. + put_unaligned(cpu_to_le64(get_inode_ordering(inode)), (__le64 *)start);
  22543. + start += sizeof(get_inode_ordering(inode));
  22544. + }
  22545. + return start;
  22546. +}
  22547. +
  22548. +/*
  22549. + * extract key that was previously encoded by build_inode_onwire() at @addr
  22550. + */
  22551. +char *extract_obj_key_id_from_onwire(char *addr, obj_key_id * key_id)
  22552. +{
  22553. + __u64 val;
  22554. +
  22555. + addr += dscale_read(addr, &val);
  22556. + val = (val << KEY_LOCALITY_SHIFT) | KEY_SD_MINOR;
  22557. + put_unaligned(cpu_to_le64(val), (__le64 *)key_id->locality);
  22558. + addr += dscale_read(addr, &val);
  22559. + put_unaligned(cpu_to_le64(val), (__le64 *)key_id->objectid);
  22560. +#if REISER4_LARGE_KEY
  22561. + memcpy(&key_id->ordering, addr, sizeof key_id->ordering);
  22562. + addr += sizeof key_id->ordering;
  22563. +#endif
  22564. + return addr;
  22565. +}
  22566. +
  22567. +/*
  22568. + * skip a key that was previously encoded by build_inode_onwire() at @addr
  22569. + * FIXME: handle IO errors.
  22570. + */
  22571. +char * locate_obj_key_id_onwire(char * addr)
  22572. +{
  22573. + /* locality */
  22574. + addr += dscale_bytes_to_read(addr);
  22575. + /* objectid */
  22576. + addr += dscale_bytes_to_read(addr);
  22577. +#if REISER4_LARGE_KEY
  22578. + addr += sizeof ((obj_key_id *)0)->ordering;
  22579. +#endif
  22580. + return addr;
  22581. +}
  22582. +
  22583. +/* Make Linus happy.
  22584. + Local variables:
  22585. + c-indentation-style: "K&R"
  22586. + mode-name: "LC"
  22587. + c-basic-offset: 8
  22588. + tab-width: 8
  22589. + fill-column: 120
  22590. + End:
  22591. +*/
  22592. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/kassign.h linux-4.14.2/fs/reiser4/kassign.h
  22593. --- linux-4.14.2.orig/fs/reiser4/kassign.h 1970-01-01 01:00:00.000000000 +0100
  22594. +++ linux-4.14.2/fs/reiser4/kassign.h 2017-11-26 22:13:09.000000000 +0100
  22595. @@ -0,0 +1,111 @@
  22596. +/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
  22597. + * reiser4/README */
  22598. +
  22599. +/* Key assignment policy interface. See kassign.c for details. */
  22600. +
  22601. +#if !defined(__KASSIGN_H__)
  22602. +#define __KASSIGN_H__
  22603. +
  22604. +#include "forward.h"
  22605. +#include "key.h"
  22606. +#include "dformat.h"
  22607. +
  22608. +#include <linux/types.h> /* for __u?? */
  22609. +#include <linux/fs.h> /* for struct super_block, etc */
  22610. +#include <linux/dcache.h> /* for struct qstr */
  22611. +
  22612. +/* key assignment functions */
  22613. +
  22614. +/* Information from which key of file stat-data can be uniquely
  22615. + restored. This depends on key assignment policy for
  22616. + stat-data. Currently it's enough to store object id and locality id
  22617. + (60+60==120) bits, because minor packing locality and offset of
  22618. + stat-data key are always known constants: KEY_SD_MINOR and 0
  22619. + respectively. For simplicity 4 bits are wasted in each id, and just
  22620. + two 64 bit integers are stored.
  22621. +
  22622. + This field has to be byte-aligned, because we don't want to waste
  22623. + space in directory entries. There is another side of a coin of
  22624. + course: we waste CPU and bus bandwidth in stead, by copying data back
  22625. + and forth.
  22626. +
  22627. + Next optimization: &obj_key_id is mainly used to address stat data from
  22628. + directory entries. Under the assumption that majority of files only have
  22629. + only name (one hard link) from *the* parent directory it seems reasonable
  22630. + to only store objectid of stat data and take its locality from key of
  22631. + directory item.
  22632. +
  22633. + This requires some flag to be added to the &obj_key_id to distinguish
  22634. + between these two cases. Remaining bits in flag byte are then asking to be
  22635. + used to store file type.
  22636. +
  22637. + This optimization requires changes in directory item handling code.
  22638. +
  22639. +*/
  22640. +typedef struct obj_key_id {
  22641. + d8 locality[sizeof(__u64)];
  22642. + ON_LARGE_KEY(d8 ordering[sizeof(__u64)];
  22643. + )
  22644. + d8 objectid[sizeof(__u64)];
  22645. +}
  22646. +obj_key_id;
  22647. +
  22648. +/* Information sufficient to uniquely identify directory entry within
  22649. + compressed directory item.
  22650. +
  22651. + For alignment issues see &obj_key_id above.
  22652. +*/
  22653. +typedef struct de_id {
  22654. + ON_LARGE_KEY(d8 ordering[sizeof(__u64)];)
  22655. + d8 objectid[sizeof(__u64)];
  22656. + d8 offset[sizeof(__u64)];
  22657. +}
  22658. +de_id;
  22659. +
  22660. +extern int inode_onwire_size(const struct inode *obj);
  22661. +extern char *build_inode_onwire(const struct inode *obj, char *area);
  22662. +extern char *locate_obj_key_id_onwire(char *area);
  22663. +extern char *extract_obj_key_id_from_onwire(char *area, obj_key_id * key_id);
  22664. +
  22665. +extern int build_inode_key_id(const struct inode *obj, obj_key_id * id);
  22666. +extern int extract_key_from_id(const obj_key_id * id, reiser4_key * key);
  22667. +extern int build_obj_key_id(const reiser4_key * key, obj_key_id * id);
  22668. +extern oid_t extract_dir_id_from_key(const reiser4_key * de_key);
  22669. +extern int build_de_id(const struct inode *dir, const struct qstr *name,
  22670. + de_id * id);
  22671. +extern int build_de_id_by_key(const reiser4_key * entry_key, de_id * id);
  22672. +extern int extract_key_from_de_id(const oid_t locality, const de_id * id,
  22673. + reiser4_key * key);
  22674. +extern cmp_t de_id_cmp(const de_id * id1, const de_id * id2);
  22675. +extern cmp_t de_id_key_cmp(const de_id * id, const reiser4_key * key);
  22676. +
  22677. +extern int build_readdir_key_common(struct file *dir, reiser4_key * result);
  22678. +extern void build_entry_key_common(const struct inode *dir,
  22679. + const struct qstr *name,
  22680. + reiser4_key * result);
  22681. +extern void build_entry_key_stable_entry(const struct inode *dir,
  22682. + const struct qstr *name,
  22683. + reiser4_key * result);
  22684. +extern int is_dot_key(const reiser4_key * key);
  22685. +extern reiser4_key *build_sd_key(const struct inode *target,
  22686. + reiser4_key * result);
  22687. +
  22688. +extern int is_longname_key(const reiser4_key * key);
  22689. +extern int is_longname(const char *name, int len);
  22690. +extern char *extract_name_from_key(const reiser4_key * key, char *buf);
  22691. +extern char *reiser4_unpack_string(__u64 value, char *buf);
  22692. +extern void complete_entry_key(const struct inode *dir, const char *name,
  22693. + int len, reiser4_key *result);
  22694. +
  22695. +/* __KASSIGN_H__ */
  22696. +#endif
  22697. +
  22698. +/* Make Linus happy.
  22699. + Local variables:
  22700. + c-indentation-style: "K&R"
  22701. + mode-name: "LC"
  22702. + c-basic-offset: 8
  22703. + tab-width: 8
  22704. + fill-column: 120
  22705. + End:
  22706. +*/
  22707. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/Kconfig linux-4.14.2/fs/reiser4/Kconfig
  22708. --- linux-4.14.2.orig/fs/reiser4/Kconfig 1970-01-01 01:00:00.000000000 +0100
  22709. +++ linux-4.14.2/fs/reiser4/Kconfig 2017-11-26 22:14:44.000000000 +0100
  22710. @@ -0,0 +1,36 @@
  22711. +config REISER4_FS
  22712. + tristate "Reiser4 (EXPERIMENTAL)"
  22713. + select ZLIB_INFLATE
  22714. + select ZLIB_DEFLATE
  22715. + select LZO_COMPRESS
  22716. + select LZO_DECOMPRESS
  22717. + select ZSTD_COMPRESS
  22718. + select ZSTD_DECOMPRESS
  22719. + select CRYPTO
  22720. + select CRYPTO_CRC32C
  22721. + help
  22722. + Reiser4 is a filesystem that performs all filesystem operations
  22723. + as atomic transactions, which means that it either performs a
  22724. + write, or it does not, and in the event of a crash it does not
  22725. + partially perform it or corrupt it.
  22726. +
  22727. + It stores files in dancing trees, which are like balanced trees but
  22728. + faster. It packs small files together so that they share blocks
  22729. + without wasting space. This means you can use it to store really
  22730. + small files. It also means that it saves you disk space. It avoids
  22731. + hassling you with anachronisms like having a maximum number of
  22732. + inodes, and wasting space if you use less than that number.
  22733. +
  22734. + Reiser4 is a distinct filesystem type from reiserfs (V3).
  22735. + It's therefore not possible to use reiserfs file systems
  22736. + with reiser4.
  22737. +
  22738. + To learn more about reiser4, go to http://www.namesys.com
  22739. +
  22740. +config REISER4_DEBUG
  22741. + bool "Enable reiser4 debug mode"
  22742. + depends on REISER4_FS
  22743. + help
  22744. + Don't use this unless you are debugging reiser4.
  22745. +
  22746. + If unsure, say N.
  22747. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/key.c linux-4.14.2/fs/reiser4/key.c
  22748. --- linux-4.14.2.orig/fs/reiser4/key.c 1970-01-01 01:00:00.000000000 +0100
  22749. +++ linux-4.14.2/fs/reiser4/key.c 2017-11-26 22:13:09.000000000 +0100
  22750. @@ -0,0 +1,138 @@
  22751. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  22752. + * reiser4/README */
  22753. +
  22754. +/* Key manipulations. */
  22755. +
  22756. +#include "debug.h"
  22757. +#include "key.h"
  22758. +#include "super.h"
  22759. +#include "reiser4.h"
  22760. +
  22761. +#include <linux/types.h> /* for __u?? */
  22762. +
  22763. +/* Minimal possible key: all components are zero. It is presumed that this is
  22764. + independent of key scheme. */
  22765. +static const reiser4_key MINIMAL_KEY = {
  22766. + .el = {
  22767. + 0ull,
  22768. + ON_LARGE_KEY(0ull,)
  22769. + 0ull,
  22770. + 0ull
  22771. + }
  22772. +};
  22773. +
  22774. +/* Maximal possible key: all components are ~0. It is presumed that this is
  22775. + independent of key scheme. */
  22776. +static const reiser4_key MAXIMAL_KEY = {
  22777. + .el = {
  22778. + __constant_cpu_to_le64(~0ull),
  22779. + ON_LARGE_KEY(__constant_cpu_to_le64(~0ull),)
  22780. + __constant_cpu_to_le64(~0ull),
  22781. + __constant_cpu_to_le64(~0ull)
  22782. + }
  22783. +};
  22784. +
  22785. +/* Initialize key. */
  22786. +void reiser4_key_init(reiser4_key * key/* key to init */)
  22787. +{
  22788. + assert("nikita-1169", key != NULL);
  22789. + memset(key, 0, sizeof *key);
  22790. +}
  22791. +
  22792. +/* minimal possible key in the tree. Return pointer to the static storage. */
  22793. +const reiser4_key * reiser4_min_key(void)
  22794. +{
  22795. + return &MINIMAL_KEY;
  22796. +}
  22797. +
  22798. +/* maximum possible key in the tree. Return pointer to the static storage. */
  22799. +const reiser4_key * reiser4_max_key(void)
  22800. +{
  22801. + return &MAXIMAL_KEY;
  22802. +}
  22803. +
  22804. +#if REISER4_DEBUG
  22805. +/* debugging aid: print symbolic name of key type */
  22806. +static const char *type_name(unsigned int key_type/* key type */)
  22807. +{
  22808. + switch (key_type) {
  22809. + case KEY_FILE_NAME_MINOR:
  22810. + return "file name";
  22811. + case KEY_SD_MINOR:
  22812. + return "stat data";
  22813. + case KEY_ATTR_NAME_MINOR:
  22814. + return "attr name";
  22815. + case KEY_ATTR_BODY_MINOR:
  22816. + return "attr body";
  22817. + case KEY_BODY_MINOR:
  22818. + return "file body";
  22819. + default:
  22820. + return "unknown";
  22821. + }
  22822. +}
  22823. +
  22824. +/* debugging aid: print human readable information about key */
  22825. +void reiser4_print_key(const char *prefix /* prefix to print */ ,
  22826. + const reiser4_key * key/* key to print */)
  22827. +{
  22828. + /* turn bold on */
  22829. + /* printf ("\033[1m"); */
  22830. + if (key == NULL)
  22831. + printk("%s: null key\n", prefix);
  22832. + else {
  22833. + if (REISER4_LARGE_KEY)
  22834. + printk("%s: (%Lx:%x:%Lx:%Lx:%Lx:%Lx)", prefix,
  22835. + get_key_locality(key),
  22836. + get_key_type(key),
  22837. + get_key_ordering(key),
  22838. + get_key_band(key),
  22839. + get_key_objectid(key), get_key_offset(key));
  22840. + else
  22841. + printk("%s: (%Lx:%x:%Lx:%Lx:%Lx)", prefix,
  22842. + get_key_locality(key),
  22843. + get_key_type(key),
  22844. + get_key_band(key),
  22845. + get_key_objectid(key), get_key_offset(key));
  22846. + /*
  22847. + * if this is a key of directory entry, try to decode part of
  22848. + * a name stored in the key, and output it.
  22849. + */
  22850. + if (get_key_type(key) == KEY_FILE_NAME_MINOR) {
  22851. + char buf[DE_NAME_BUF_LEN];
  22852. + char *c;
  22853. +
  22854. + c = buf;
  22855. + c = reiser4_unpack_string(get_key_ordering(key), c);
  22856. + reiser4_unpack_string(get_key_fulloid(key), c);
  22857. + printk("[%s", buf);
  22858. + if (is_longname_key(key))
  22859. + /*
  22860. + * only part of the name is stored in the key.
  22861. + */
  22862. + printk("...]\n");
  22863. + else {
  22864. + /*
  22865. + * whole name is stored in the key.
  22866. + */
  22867. + reiser4_unpack_string(get_key_offset(key), buf);
  22868. + printk("%s]\n", buf);
  22869. + }
  22870. + } else {
  22871. + printk("[%s]\n", type_name(get_key_type(key)));
  22872. + }
  22873. + }
  22874. + /* turn bold off */
  22875. + /* printf ("\033[m\017"); */
  22876. +}
  22877. +
  22878. +#endif
  22879. +
  22880. +/* Make Linus happy.
  22881. + Local variables:
  22882. + c-indentation-style: "K&R"
  22883. + mode-name: "LC"
  22884. + c-basic-offset: 8
  22885. + tab-width: 8
  22886. + fill-column: 120
  22887. + End:
  22888. +*/
  22889. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/key.h linux-4.14.2/fs/reiser4/key.h
  22890. --- linux-4.14.2.orig/fs/reiser4/key.h 1970-01-01 01:00:00.000000000 +0100
  22891. +++ linux-4.14.2/fs/reiser4/key.h 2017-11-26 22:13:09.000000000 +0100
  22892. @@ -0,0 +1,392 @@
  22893. +/* Copyright 2000, 2001, 2002, 2003 by Hans Reiser, licensing governed by
  22894. + * reiser4/README */
  22895. +
  22896. +/* Declarations of key-related data-structures and operations on keys. */
  22897. +
  22898. +#if !defined(__REISER4_KEY_H__)
  22899. +#define __REISER4_KEY_H__
  22900. +
  22901. +#include "dformat.h"
  22902. +#include "forward.h"
  22903. +#include "debug.h"
  22904. +
  22905. +#include <linux/types.h> /* for __u?? */
  22906. +
  22907. +/* Operations on keys in reiser4 tree */
  22908. +
  22909. +/* No access to any of these fields shall be done except via a
  22910. + wrapping macro/function, and that wrapping macro/function shall
  22911. + convert to little endian order. Compare keys will consider cpu byte order. */
  22912. +
  22913. +/* A storage layer implementation difference between a regular unix file body
  22914. + and its attributes is in the typedef below which causes all of the attributes
  22915. + of a file to be near in key to all of the other attributes for all of the
  22916. + files within that directory, and not near to the file itself. It is
  22917. + interesting to consider whether this is the wrong approach, and whether there
  22918. + should be no difference at all. For current usage patterns this choice is
  22919. + probably the right one. */
  22920. +
  22921. +/* possible values for minor packing locality (4 bits required) */
  22922. +typedef enum {
  22923. + /* file name */
  22924. + KEY_FILE_NAME_MINOR = 0,
  22925. + /* stat-data */
  22926. + KEY_SD_MINOR = 1,
  22927. + /* file attribute name */
  22928. + KEY_ATTR_NAME_MINOR = 2,
  22929. + /* file attribute value */
  22930. + KEY_ATTR_BODY_MINOR = 3,
  22931. + /* file body (tail or extent) */
  22932. + KEY_BODY_MINOR = 4,
  22933. +} key_minor_locality;
  22934. +
  22935. +/* Everything stored in the tree has a unique key, which means that the tree is
  22936. + (logically) fully ordered by key. Physical order is determined by dynamic
  22937. + heuristics that attempt to reflect key order when allocating available space,
  22938. + and by the repacker. It is stylistically better to put aggregation
  22939. + information into the key. Thus, if you want to segregate extents from tails,
  22940. + it is better to give them distinct minor packing localities rather than
  22941. + changing block_alloc.c to check the node type when deciding where to allocate
  22942. + the node.
  22943. +
  22944. + The need to randomly displace new directories and large files disturbs this
  22945. + symmetry unfortunately. However, it should be noted that this is a need that
  22946. + is not clearly established given the existence of a repacker. Also, in our
  22947. + current implementation tails have a different minor packing locality from
  22948. + extents, and no files have both extents and tails, so maybe symmetry can be
  22949. + had without performance cost after all. Symmetry is what we ship for now....
  22950. +*/
  22951. +
  22952. +/* Arbitrary major packing localities can be assigned to objects using
  22953. + the reiser4(filenameA/..packing<=some_number) system call.
  22954. +
  22955. + In reiser4, the creat() syscall creates a directory
  22956. +
  22957. + whose default flow (that which is referred to if the directory is
  22958. + read as a file) is the traditional unix file body.
  22959. +
  22960. + whose directory plugin is the 'filedir'
  22961. +
  22962. + whose major packing locality is that of the parent of the object created.
  22963. +
  22964. + The static_stat item is a particular commonly used directory
  22965. + compression (the one for normal unix files).
  22966. +
  22967. + The filedir plugin checks to see if the static_stat item exists.
  22968. + There is a unique key for static_stat. If yes, then it uses the
  22969. + static_stat item for all of the values that it contains. The
  22970. + static_stat item contains a flag for each stat it contains which
  22971. + indicates whether one should look outside the static_stat item for its
  22972. + contents.
  22973. +*/
  22974. +
  22975. +/* offset of fields in reiser4_key. Value of each element of this enum
  22976. + is index within key (thought as array of __u64's) where this field
  22977. + is. */
  22978. +typedef enum {
  22979. + /* major "locale", aka dirid. Sits in 1st element */
  22980. + KEY_LOCALITY_INDEX = 0,
  22981. + /* minor "locale", aka item type. Sits in 1st element */
  22982. + KEY_TYPE_INDEX = 0,
  22983. + ON_LARGE_KEY(KEY_ORDERING_INDEX,)
  22984. + /* "object band". Sits in 2nd element */
  22985. + KEY_BAND_INDEX,
  22986. + /* objectid. Sits in 2nd element */
  22987. + KEY_OBJECTID_INDEX = KEY_BAND_INDEX,
  22988. + /* full objectid. Sits in 2nd element */
  22989. + KEY_FULLOID_INDEX = KEY_BAND_INDEX,
  22990. + /* Offset. Sits in 3rd element */
  22991. + KEY_OFFSET_INDEX,
  22992. + /* Name hash. Sits in 3rd element */
  22993. + KEY_HASH_INDEX = KEY_OFFSET_INDEX,
  22994. + KEY_CACHELINE_END = KEY_OFFSET_INDEX,
  22995. + KEY_LAST_INDEX
  22996. +} reiser4_key_field_index;
  22997. +
  22998. +/* key in reiser4 internal "balanced" tree. It is just array of three
  22999. + 64bit integers in disk byte order (little-endian by default). This
  23000. + array is actually indexed by reiser4_key_field. Each __u64 within
  23001. + this array is called "element". Logical key component encoded within
  23002. + elements are called "fields".
  23003. +
  23004. + We declare this as union with second component dummy to suppress
  23005. + inconvenient array<->pointer casts implied in C. */
  23006. +union reiser4_key {
  23007. + __le64 el[KEY_LAST_INDEX];
  23008. + int pad;
  23009. +};
  23010. +
  23011. +/* bitmasks showing where within reiser4_key particular key is stored. */
  23012. +/* major locality occupies higher 60 bits of the first element */
  23013. +#define KEY_LOCALITY_MASK 0xfffffffffffffff0ull
  23014. +
  23015. +/* minor locality occupies lower 4 bits of the first element */
  23016. +#define KEY_TYPE_MASK 0xfull
  23017. +
  23018. +/* controversial band occupies higher 4 bits of the 2nd element */
  23019. +#define KEY_BAND_MASK 0xf000000000000000ull
  23020. +
  23021. +/* objectid occupies lower 60 bits of the 2nd element */
  23022. +#define KEY_OBJECTID_MASK 0x0fffffffffffffffull
  23023. +
  23024. +/* full 64bit objectid*/
  23025. +#define KEY_FULLOID_MASK 0xffffffffffffffffull
  23026. +
  23027. +/* offset is just 3rd L.M.Nt itself */
  23028. +#define KEY_OFFSET_MASK 0xffffffffffffffffull
  23029. +
  23030. +/* ordering is whole second element */
  23031. +#define KEY_ORDERING_MASK 0xffffffffffffffffull
  23032. +
  23033. +/* how many bits key element should be shifted to left to get particular field
  23034. + */
  23035. +typedef enum {
  23036. + KEY_LOCALITY_SHIFT = 4,
  23037. + KEY_TYPE_SHIFT = 0,
  23038. + KEY_BAND_SHIFT = 60,
  23039. + KEY_OBJECTID_SHIFT = 0,
  23040. + KEY_FULLOID_SHIFT = 0,
  23041. + KEY_OFFSET_SHIFT = 0,
  23042. + KEY_ORDERING_SHIFT = 0,
  23043. +} reiser4_key_field_shift;
  23044. +
  23045. +static inline __u64
  23046. +get_key_el(const reiser4_key * key, reiser4_key_field_index off)
  23047. +{
  23048. + assert("nikita-753", key != NULL);
  23049. + assert("nikita-754", off < KEY_LAST_INDEX);
  23050. + return le64_to_cpu(get_unaligned(&key->el[off]));
  23051. +}
  23052. +
  23053. +static inline void
  23054. +set_key_el(reiser4_key * key, reiser4_key_field_index off, __u64 value)
  23055. +{
  23056. + assert("nikita-755", key != NULL);
  23057. + assert("nikita-756", off < KEY_LAST_INDEX);
  23058. + put_unaligned(cpu_to_le64(value), &key->el[off]);
  23059. +}
  23060. +
  23061. +/* macro to define getter and setter functions for field F with type T */
  23062. +#define DEFINE_KEY_FIELD(L, U, T) \
  23063. +static inline T get_key_ ## L(const reiser4_key *key) \
  23064. +{ \
  23065. + assert("nikita-750", key != NULL); \
  23066. + return (T) (get_key_el(key, KEY_ ## U ## _INDEX) & \
  23067. + KEY_ ## U ## _MASK) >> KEY_ ## U ## _SHIFT; \
  23068. +} \
  23069. + \
  23070. +static inline void set_key_ ## L(reiser4_key * key, T loc) \
  23071. +{ \
  23072. + __u64 el; \
  23073. + \
  23074. + assert("nikita-752", key != NULL); \
  23075. + \
  23076. + el = get_key_el(key, KEY_ ## U ## _INDEX); \
  23077. + /* clear field bits in the key */ \
  23078. + el &= ~KEY_ ## U ## _MASK; \
  23079. + /* actually it should be \
  23080. + \
  23081. + el |= ( loc << KEY_ ## U ## _SHIFT ) & KEY_ ## U ## _MASK; \
  23082. + \
  23083. + but we trust user to never pass values that wouldn't fit \
  23084. + into field. Clearing extra bits is one operation, but this \
  23085. + function is time-critical. \
  23086. + But check this in assertion. */ \
  23087. + assert("nikita-759", ((loc << KEY_ ## U ## _SHIFT) & \
  23088. + ~KEY_ ## U ## _MASK) == 0); \
  23089. + el |= (loc << KEY_ ## U ## _SHIFT); \
  23090. + set_key_el(key, KEY_ ## U ## _INDEX, el); \
  23091. +}
  23092. +
  23093. +typedef __u64 oid_t;
  23094. +
  23095. +/* define get_key_locality(), set_key_locality() */
  23096. +DEFINE_KEY_FIELD(locality, LOCALITY, oid_t);
  23097. +/* define get_key_type(), set_key_type() */
  23098. +DEFINE_KEY_FIELD(type, TYPE, key_minor_locality);
  23099. +/* define get_key_band(), set_key_band() */
  23100. +DEFINE_KEY_FIELD(band, BAND, __u64);
  23101. +/* define get_key_objectid(), set_key_objectid() */
  23102. +DEFINE_KEY_FIELD(objectid, OBJECTID, oid_t);
  23103. +/* define get_key_fulloid(), set_key_fulloid() */
  23104. +DEFINE_KEY_FIELD(fulloid, FULLOID, oid_t);
  23105. +/* define get_key_offset(), set_key_offset() */
  23106. +DEFINE_KEY_FIELD(offset, OFFSET, __u64);
  23107. +#if (REISER4_LARGE_KEY)
  23108. +/* define get_key_ordering(), set_key_ordering() */
  23109. +DEFINE_KEY_FIELD(ordering, ORDERING, __u64);
  23110. +#else
  23111. +static inline __u64 get_key_ordering(const reiser4_key * key)
  23112. +{
  23113. + return 0;
  23114. +}
  23115. +
  23116. +static inline void set_key_ordering(reiser4_key * key, __u64 val)
  23117. +{
  23118. +}
  23119. +#endif
  23120. +
  23121. +/* key comparison result */
  23122. +typedef enum { LESS_THAN = -1, /* if first key is less than second */
  23123. + EQUAL_TO = 0, /* if keys are equal */
  23124. + GREATER_THAN = +1 /* if first key is greater than second */
  23125. +} cmp_t;
  23126. +
  23127. +void reiser4_key_init(reiser4_key * key);
  23128. +
  23129. +/* minimal possible key in the tree. Return pointer to the static storage. */
  23130. +extern const reiser4_key *reiser4_min_key(void);
  23131. +extern const reiser4_key *reiser4_max_key(void);
  23132. +
  23133. +/* helper macro for keycmp() */
  23134. +#define KEY_DIFF(k1, k2, field) \
  23135. +({ \
  23136. + typeof(get_key_ ## field(k1)) f1; \
  23137. + typeof(get_key_ ## field(k2)) f2; \
  23138. + \
  23139. + f1 = get_key_ ## field(k1); \
  23140. + f2 = get_key_ ## field(k2); \
  23141. + \
  23142. + (f1 < f2) ? LESS_THAN : ((f1 == f2) ? EQUAL_TO : GREATER_THAN); \
  23143. +})
  23144. +
  23145. +/* helper macro for keycmp() */
  23146. +#define KEY_DIFF_EL(k1, k2, off) \
  23147. +({ \
  23148. + __u64 e1; \
  23149. + __u64 e2; \
  23150. + \
  23151. + e1 = get_key_el(k1, off); \
  23152. + e2 = get_key_el(k2, off); \
  23153. + \
  23154. + (e1 < e2) ? LESS_THAN : ((e1 == e2) ? EQUAL_TO : GREATER_THAN); \
  23155. +})
  23156. +
  23157. +/* compare `k1' and `k2'. This function is a heart of "key allocation
  23158. + policy". All you need to implement new policy is to add yet another
  23159. + clause here. */
  23160. +static inline cmp_t keycmp(const reiser4_key * k1 /* first key to compare */ ,
  23161. + const reiser4_key * k2/* second key to compare */)
  23162. +{
  23163. + cmp_t result;
  23164. +
  23165. + /*
  23166. + * This function is the heart of reiser4 tree-routines. Key comparison
  23167. + * is among most heavily used operations in the file system.
  23168. + */
  23169. +
  23170. + assert("nikita-439", k1 != NULL);
  23171. + assert("nikita-440", k2 != NULL);
  23172. +
  23173. + /* there is no actual branch here: condition is compile time constant
  23174. + * and constant folding and propagation ensures that only one branch
  23175. + * is actually compiled in. */
  23176. +
  23177. + if (REISER4_PLANA_KEY_ALLOCATION) {
  23178. + /* if physical order of fields in a key is identical
  23179. + with logical order, we can implement key comparison
  23180. + as three 64bit comparisons. */
  23181. + /* logical order of fields in plan-a:
  23182. + locality->type->objectid->offset. */
  23183. + /* compare locality and type at once */
  23184. + result = KEY_DIFF_EL(k1, k2, 0);
  23185. + if (result == EQUAL_TO) {
  23186. + /* compare objectid (and band if it's there) */
  23187. + result = KEY_DIFF_EL(k1, k2, 1);
  23188. + /* compare offset */
  23189. + if (result == EQUAL_TO) {
  23190. + result = KEY_DIFF_EL(k1, k2, 2);
  23191. + if (REISER4_LARGE_KEY && result == EQUAL_TO)
  23192. + result = KEY_DIFF_EL(k1, k2, 3);
  23193. + }
  23194. + }
  23195. + } else if (REISER4_3_5_KEY_ALLOCATION) {
  23196. + result = KEY_DIFF(k1, k2, locality);
  23197. + if (result == EQUAL_TO) {
  23198. + result = KEY_DIFF(k1, k2, objectid);
  23199. + if (result == EQUAL_TO) {
  23200. + result = KEY_DIFF(k1, k2, type);
  23201. + if (result == EQUAL_TO)
  23202. + result = KEY_DIFF(k1, k2, offset);
  23203. + }
  23204. + }
  23205. + } else
  23206. + impossible("nikita-441", "Unknown key allocation scheme!");
  23207. + return result;
  23208. +}
  23209. +
  23210. +/* true if @k1 equals @k2 */
  23211. +static inline int keyeq(const reiser4_key * k1 /* first key to compare */ ,
  23212. + const reiser4_key * k2/* second key to compare */)
  23213. +{
  23214. + assert("nikita-1879", k1 != NULL);
  23215. + assert("nikita-1880", k2 != NULL);
  23216. + return !memcmp(k1, k2, sizeof *k1);
  23217. +}
  23218. +
  23219. +/* true if @k1 is less than @k2 */
  23220. +static inline int keylt(const reiser4_key * k1 /* first key to compare */ ,
  23221. + const reiser4_key * k2/* second key to compare */)
  23222. +{
  23223. + assert("nikita-1952", k1 != NULL);
  23224. + assert("nikita-1953", k2 != NULL);
  23225. + return keycmp(k1, k2) == LESS_THAN;
  23226. +}
  23227. +
  23228. +/* true if @k1 is less than or equal to @k2 */
  23229. +static inline int keyle(const reiser4_key * k1 /* first key to compare */ ,
  23230. + const reiser4_key * k2/* second key to compare */)
  23231. +{
  23232. + assert("nikita-1954", k1 != NULL);
  23233. + assert("nikita-1955", k2 != NULL);
  23234. + return keycmp(k1, k2) != GREATER_THAN;
  23235. +}
  23236. +
  23237. +/* true if @k1 is greater than @k2 */
  23238. +static inline int keygt(const reiser4_key * k1 /* first key to compare */ ,
  23239. + const reiser4_key * k2/* second key to compare */)
  23240. +{
  23241. + assert("nikita-1959", k1 != NULL);
  23242. + assert("nikita-1960", k2 != NULL);
  23243. + return keycmp(k1, k2) == GREATER_THAN;
  23244. +}
  23245. +
  23246. +/* true if @k1 is greater than or equal to @k2 */
  23247. +static inline int keyge(const reiser4_key * k1 /* first key to compare */ ,
  23248. + const reiser4_key * k2/* second key to compare */)
  23249. +{
  23250. + assert("nikita-1956", k1 != NULL);
  23251. + assert("nikita-1957", k2 != NULL); /* October 4: sputnik launched
  23252. + * November 3: Laika */
  23253. + return keycmp(k1, k2) != LESS_THAN;
  23254. +}
  23255. +
  23256. +static inline void prefetchkey(reiser4_key * key)
  23257. +{
  23258. + prefetch(key);
  23259. + prefetch(&key->el[KEY_CACHELINE_END]);
  23260. +}
  23261. +
  23262. +/* (%Lx:%x:%Lx:%Lx:%Lx:%Lx) =
  23263. + 1 + 16 + 1 + 1 + 1 + 1 + 1 + 16 + 1 + 16 + 1 + 16 + 1 */
  23264. +/* size of a buffer suitable to hold human readable key representation */
  23265. +#define KEY_BUF_LEN (80)
  23266. +
  23267. +#if REISER4_DEBUG
  23268. +extern void reiser4_print_key(const char *prefix, const reiser4_key * key);
  23269. +#else
  23270. +#define reiser4_print_key(p, k) noop
  23271. +#endif
  23272. +
  23273. +/* __FS_REISERFS_KEY_H__ */
  23274. +#endif
  23275. +
  23276. +/* Make Linus happy.
  23277. + Local variables:
  23278. + c-indentation-style: "K&R"
  23279. + mode-name: "LC"
  23280. + c-basic-offset: 8
  23281. + tab-width: 8
  23282. + fill-column: 120
  23283. + End:
  23284. +*/
  23285. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/ktxnmgrd.c linux-4.14.2/fs/reiser4/ktxnmgrd.c
  23286. --- linux-4.14.2.orig/fs/reiser4/ktxnmgrd.c 1970-01-01 01:00:00.000000000 +0100
  23287. +++ linux-4.14.2/fs/reiser4/ktxnmgrd.c 2017-11-26 22:13:09.000000000 +0100
  23288. @@ -0,0 +1,215 @@
  23289. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  23290. +/* Transaction manager daemon. */
  23291. +
  23292. +/*
  23293. + * ktxnmgrd is a kernel daemon responsible for committing transactions. It is
  23294. + * needed/important for the following reasons:
  23295. + *
  23296. + * 1. in reiser4 atom is not committed immediately when last transaction
  23297. + * handle closes, unless atom is either too old or too large (see
  23298. + * atom_should_commit()). This is done to avoid committing too frequently.
  23299. + * because:
  23300. + *
  23301. + * 2. sometimes we don't want to commit atom when closing last transaction
  23302. + * handle even if it is old and fat enough. For example, because we are at
  23303. + * this point under directory semaphore, and committing would stall all
  23304. + * accesses to this directory.
  23305. + *
  23306. + * ktxnmgrd binds its time sleeping on condition variable. When is awakes
  23307. + * either due to (tunable) timeout or because it was explicitly woken up by
  23308. + * call to ktxnmgrd_kick(), it scans list of all atoms and commits ones
  23309. + * eligible.
  23310. + *
  23311. + */
  23312. +
  23313. +#include "debug.h"
  23314. +#include "txnmgr.h"
  23315. +#include "tree.h"
  23316. +#include "ktxnmgrd.h"
  23317. +#include "super.h"
  23318. +#include "reiser4.h"
  23319. +
  23320. +#include <linux/sched.h> /* for struct task_struct */
  23321. +#include <linux/wait.h>
  23322. +#include <linux/suspend.h>
  23323. +#include <linux/kernel.h>
  23324. +#include <linux/writeback.h>
  23325. +#include <linux/kthread.h>
  23326. +#include <linux/freezer.h>
  23327. +
  23328. +static int scan_mgr(struct super_block *);
  23329. +
  23330. +/*
  23331. + * change current->comm so that ps, top, and friends will see changed
  23332. + * state. This serves no useful purpose whatsoever, but also costs nothing. May
  23333. + * be it will make lonely system administrator feeling less alone at 3 A.M.
  23334. + */
  23335. +#define set_comm(state) \
  23336. + snprintf(current->comm, sizeof(current->comm), \
  23337. + "%s:%s:%s", __FUNCTION__, (super)->s_id, (state))
  23338. +
  23339. +/**
  23340. + * ktxnmgrd - kernel txnmgr daemon
  23341. + * @arg: pointer to super block
  23342. + *
  23343. + * The background transaction manager daemon, started as a kernel thread during
  23344. + * reiser4 initialization.
  23345. + */
  23346. +static int ktxnmgrd(void *arg)
  23347. +{
  23348. + struct super_block *super;
  23349. + ktxnmgrd_context *ctx;
  23350. + txn_mgr *mgr;
  23351. + int done = 0;
  23352. +
  23353. + super = arg;
  23354. + mgr = &get_super_private(super)->tmgr;
  23355. +
  23356. + /*
  23357. + * do_fork() just copies task_struct into the new thread. ->fs_context
  23358. + * shouldn't be copied of course. This shouldn't be a problem for the
  23359. + * rest of the code though.
  23360. + */
  23361. + current->journal_info = NULL;
  23362. + ctx = mgr->daemon;
  23363. + while (1) {
  23364. + try_to_freeze();
  23365. + set_comm("wait");
  23366. + {
  23367. + DEFINE_WAIT(__wait);
  23368. +
  23369. + prepare_to_wait(&ctx->wait, &__wait,
  23370. + TASK_INTERRUPTIBLE);
  23371. + if (kthread_should_stop())
  23372. + done = 1;
  23373. + else
  23374. + schedule_timeout(ctx->timeout);
  23375. + finish_wait(&ctx->wait, &__wait);
  23376. + }
  23377. + if (done)
  23378. + break;
  23379. + set_comm("run");
  23380. + spin_lock(&ctx->guard);
  23381. + /*
  23382. + * wait timed out or ktxnmgrd was woken up by explicit request
  23383. + * to commit something. Scan list of atoms in txnmgr and look
  23384. + * for too old atoms.
  23385. + */
  23386. + do {
  23387. + ctx->rescan = 0;
  23388. + scan_mgr(super);
  23389. + spin_lock(&ctx->guard);
  23390. + if (ctx->rescan) {
  23391. + /*
  23392. + * the list could be modified while ctx
  23393. + * spinlock was released, we have to repeat
  23394. + * scanning from the beginning
  23395. + */
  23396. + break;
  23397. + }
  23398. + } while (ctx->rescan);
  23399. + spin_unlock(&ctx->guard);
  23400. + }
  23401. + return 0;
  23402. +}
  23403. +
  23404. +#undef set_comm
  23405. +
  23406. +/**
  23407. + * reiser4_init_ktxnmgrd - initialize ktxnmgrd context and start kernel daemon
  23408. + * @super: pointer to super block
  23409. + *
  23410. + * Allocates and initializes ktxnmgrd_context, attaches it to transaction
  23411. + * manager. Starts kernel txnmgr daemon. This is called on mount.
  23412. + */
  23413. +int reiser4_init_ktxnmgrd(struct super_block *super)
  23414. +{
  23415. + txn_mgr *mgr;
  23416. + ktxnmgrd_context *ctx;
  23417. +
  23418. + mgr = &get_super_private(super)->tmgr;
  23419. +
  23420. + assert("zam-1014", mgr->daemon == NULL);
  23421. +
  23422. + ctx = kzalloc(sizeof(ktxnmgrd_context), reiser4_ctx_gfp_mask_get());
  23423. + if (!ctx)
  23424. + return RETERR(-ENOMEM);
  23425. +
  23426. + assert("nikita-2442", ctx != NULL);
  23427. +
  23428. + init_waitqueue_head(&ctx->wait);
  23429. +
  23430. + /*kcond_init(&ctx->startup);*/
  23431. + spin_lock_init(&ctx->guard);
  23432. + ctx->timeout = REISER4_TXNMGR_TIMEOUT;
  23433. + ctx->rescan = 1;
  23434. + mgr->daemon = ctx;
  23435. +
  23436. + ctx->tsk = kthread_run(ktxnmgrd, super, "ktxnmgrd");
  23437. + if (IS_ERR(ctx->tsk)) {
  23438. + int ret = PTR_ERR(ctx->tsk);
  23439. + mgr->daemon = NULL;
  23440. + kfree(ctx);
  23441. + return RETERR(ret);
  23442. + }
  23443. + return 0;
  23444. +}
  23445. +
  23446. +void ktxnmgrd_kick(txn_mgr *mgr)
  23447. +{
  23448. + assert("nikita-3234", mgr != NULL);
  23449. + assert("nikita-3235", mgr->daemon != NULL);
  23450. + wake_up(&mgr->daemon->wait);
  23451. +}
  23452. +
  23453. +int is_current_ktxnmgrd(void)
  23454. +{
  23455. + return (get_current_super_private()->tmgr.daemon->tsk == current);
  23456. +}
  23457. +
  23458. +/**
  23459. + * scan_mgr - commit atoms which are to be committed
  23460. + * @super: super block to commit atoms of
  23461. + *
  23462. + * Commits old atoms.
  23463. + */
  23464. +static int scan_mgr(struct super_block *super)
  23465. +{
  23466. + int ret;
  23467. + reiser4_context ctx;
  23468. +
  23469. + init_stack_context(&ctx, super);
  23470. +
  23471. + ret = commit_some_atoms(&get_super_private(super)->tmgr);
  23472. +
  23473. + reiser4_exit_context(&ctx);
  23474. + return ret;
  23475. +}
  23476. +
  23477. +/**
  23478. + * reiser4_done_ktxnmgrd - stop kernel thread and frees ktxnmgrd context
  23479. + * @mgr:
  23480. + *
  23481. + * This is called on umount. Stops ktxnmgrd and free t
  23482. + */
  23483. +void reiser4_done_ktxnmgrd(struct super_block *super)
  23484. +{
  23485. + txn_mgr *mgr;
  23486. +
  23487. + mgr = &get_super_private(super)->tmgr;
  23488. + assert("zam-1012", mgr->daemon != NULL);
  23489. +
  23490. + kthread_stop(mgr->daemon->tsk);
  23491. + kfree(mgr->daemon);
  23492. + mgr->daemon = NULL;
  23493. +}
  23494. +
  23495. +/*
  23496. + * Local variables:
  23497. + * c-indentation-style: "K&R"
  23498. + * mode-name: "LC"
  23499. + * c-basic-offset: 8
  23500. + * tab-width: 8
  23501. + * fill-column: 120
  23502. + * End:
  23503. + */
  23504. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/ktxnmgrd.h linux-4.14.2/fs/reiser4/ktxnmgrd.h
  23505. --- linux-4.14.2.orig/fs/reiser4/ktxnmgrd.h 1970-01-01 01:00:00.000000000 +0100
  23506. +++ linux-4.14.2/fs/reiser4/ktxnmgrd.h 2017-11-26 22:13:09.000000000 +0100
  23507. @@ -0,0 +1,52 @@
  23508. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  23509. + * reiser4/README */
  23510. +
  23511. +/* Transaction manager daemon. See ktxnmgrd.c for comments. */
  23512. +
  23513. +#ifndef __KTXNMGRD_H__
  23514. +#define __KTXNMGRD_H__
  23515. +
  23516. +#include "txnmgr.h"
  23517. +
  23518. +#include <linux/fs.h>
  23519. +#include <linux/wait.h>
  23520. +#include <linux/completion.h>
  23521. +#include <linux/spinlock.h>
  23522. +#include <asm/atomic.h>
  23523. +#include <linux/sched.h> /* for struct task_struct */
  23524. +
  23525. +/* in this structure all data necessary to start up, shut down and communicate
  23526. + * with ktxnmgrd are kept. */
  23527. +struct ktxnmgrd_context {
  23528. + /* wait queue head on which ktxnmgrd sleeps */
  23529. + wait_queue_head_t wait;
  23530. + /* spin lock protecting all fields of this structure */
  23531. + spinlock_t guard;
  23532. + /* timeout of sleeping on ->wait */
  23533. + signed long timeout;
  23534. + /* kernel thread running ktxnmgrd */
  23535. + struct task_struct *tsk;
  23536. + /* list of all file systems served by this ktxnmgrd */
  23537. + struct list_head queue;
  23538. + /* should ktxnmgrd repeat scanning of atoms? */
  23539. + unsigned int rescan:1;
  23540. +};
  23541. +
  23542. +extern int reiser4_init_ktxnmgrd(struct super_block *);
  23543. +extern void reiser4_done_ktxnmgrd(struct super_block *);
  23544. +
  23545. +extern void ktxnmgrd_kick(txn_mgr * mgr);
  23546. +extern int is_current_ktxnmgrd(void);
  23547. +
  23548. +/* __KTXNMGRD_H__ */
  23549. +#endif
  23550. +
  23551. +/* Make Linus happy.
  23552. + Local variables:
  23553. + c-indentation-style: "K&R"
  23554. + mode-name: "LC"
  23555. + c-basic-offset: 8
  23556. + tab-width: 8
  23557. + fill-column: 120
  23558. + End:
  23559. +*/
  23560. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/lock.c linux-4.14.2/fs/reiser4/lock.c
  23561. --- linux-4.14.2.orig/fs/reiser4/lock.c 1970-01-01 01:00:00.000000000 +0100
  23562. +++ linux-4.14.2/fs/reiser4/lock.c 2017-11-26 22:13:09.000000000 +0100
  23563. @@ -0,0 +1,1237 @@
  23564. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  23565. + * reiser4/README */
  23566. +
  23567. +/* Traditional deadlock avoidance is achieved by acquiring all locks in a single
  23568. + order. V4 balances the tree from the bottom up, and searches the tree from
  23569. + the top down, and that is really the way we want it, so tradition won't work
  23570. + for us.
  23571. +
  23572. + Instead we have two lock orderings, a high priority lock ordering, and a low
  23573. + priority lock ordering. Each node in the tree has a lock in its znode.
  23574. +
  23575. + Suppose we have a set of processes which lock (R/W) tree nodes. Each process
  23576. + has a set (maybe empty) of already locked nodes ("process locked set"). Each
  23577. + process may have a pending lock request to a node locked by another process.
  23578. + Note: we lock and unlock, but do not transfer locks: it is possible
  23579. + transferring locks instead would save some bus locking....
  23580. +
  23581. + Deadlock occurs when we have a loop constructed from process locked sets and
  23582. + lock request vectors.
  23583. +
  23584. + NOTE: The reiser4 "tree" is a tree on disk, but its cached representation in
  23585. + memory is extended with "znodes" with which we connect nodes with their left
  23586. + and right neighbors using sibling pointers stored in the znodes. When we
  23587. + perform balancing operations we often go from left to right and from right to
  23588. + left.
  23589. +
  23590. + +-P1-+ +-P3-+
  23591. + |+--+| V1 |+--+|
  23592. + ||N1|| -------> ||N3||
  23593. + |+--+| |+--+|
  23594. + +----+ +----+
  23595. + ^ |
  23596. + |V2 |V3
  23597. + | v
  23598. + +---------P2---------+
  23599. + |+--+ +--+|
  23600. + ||N2| -------- |N4||
  23601. + |+--+ +--+|
  23602. + +--------------------+
  23603. +
  23604. + We solve this by ensuring that only low priority processes lock in top to
  23605. + bottom order and from right to left, and high priority processes lock from
  23606. + bottom to top and left to right.
  23607. +
  23608. + ZAM-FIXME-HANS: order not just node locks in this way, order atom locks, and
  23609. + kill those damn busy loops.
  23610. + ANSWER(ZAM): atom locks (which are introduced by ASTAGE_CAPTURE_WAIT atom
  23611. + stage) cannot be ordered that way. There are no rules what nodes can belong
  23612. + to the atom and what nodes cannot. We cannot define what is right or left
  23613. + direction, what is top or bottom. We can take immediate parent or side
  23614. + neighbor of one node, but nobody guarantees that, say, left neighbor node is
  23615. + not a far right neighbor for other nodes from the same atom. It breaks
  23616. + deadlock avoidance rules and hi-low priority locking cannot be applied for
  23617. + atom locks.
  23618. +
  23619. + How does it help to avoid deadlocks ?
  23620. +
  23621. + Suppose we have a deadlock with n processes. Processes from one priority
  23622. + class never deadlock because they take locks in one consistent
  23623. + order.
  23624. +
  23625. + So, any possible deadlock loop must have low priority as well as high
  23626. + priority processes. There are no other lock priority levels except low and
  23627. + high. We know that any deadlock loop contains at least one node locked by a
  23628. + low priority process and requested by a high priority process. If this
  23629. + situation is caught and resolved it is sufficient to avoid deadlocks.
  23630. +
  23631. + V4 DEADLOCK PREVENTION ALGORITHM IMPLEMENTATION.
  23632. +
  23633. + The deadlock prevention algorithm is based on comparing
  23634. + priorities of node owners (processes which keep znode locked) and
  23635. + requesters (processes which want to acquire a lock on znode). We
  23636. + implement a scheme where low-priority owners yield locks to
  23637. + high-priority requesters. We created a signal passing system that
  23638. + is used to ask low-priority processes to yield one or more locked
  23639. + znodes.
  23640. +
  23641. + The condition when a znode needs to change its owners is described by the
  23642. + following formula:
  23643. +
  23644. + #############################################
  23645. + # #
  23646. + # (number of high-priority requesters) > 0 #
  23647. + # AND #
  23648. + # (numbers of high-priority owners) == 0 #
  23649. + # #
  23650. + #############################################
  23651. +
  23652. + Note that a low-priority process delays node releasing if another
  23653. + high-priority process owns this node. So, slightly more strictly speaking,
  23654. + to have a deadlock capable cycle you must have a loop in which a high
  23655. + priority process is waiting on a low priority process to yield a node, which
  23656. + is slightly different from saying a high priority process is waiting on a
  23657. + node owned by a low priority process.
  23658. +
  23659. + It is enough to avoid deadlocks if we prevent any low-priority process from
  23660. + falling asleep if its locked set contains a node which satisfies the
  23661. + deadlock condition.
  23662. +
  23663. + That condition is implicitly or explicitly checked in all places where new
  23664. + high-priority requests may be added or removed from node request queue or
  23665. + high-priority process takes or releases a lock on node. The main
  23666. + goal of these checks is to never lose the moment when node becomes "has
  23667. + wrong owners" and send "must-yield-this-lock" signals to its low-pri owners
  23668. + at that time.
  23669. +
  23670. + The information about received signals is stored in the per-process
  23671. + structure (lock stack) and analyzed before a low-priority process goes to
  23672. + sleep but after a "fast" attempt to lock a node fails. Any signal wakes
  23673. + sleeping process up and forces him to re-check lock status and received
  23674. + signal info. If "must-yield-this-lock" signals were received the locking
  23675. + primitive (longterm_lock_znode()) fails with -E_DEADLOCK error code.
  23676. +
  23677. + V4 LOCKING DRAWBACKS
  23678. +
  23679. + If we have already balanced on one level, and we are propagating our changes
  23680. + upward to a higher level, it could be very messy to surrender all locks on
  23681. + the lower level because we put so much computational work into it, and
  23682. + reverting them to their state before they were locked might be very complex.
  23683. + We also don't want to acquire all locks before performing balancing because
  23684. + that would either be almost as much work as the balancing, or it would be
  23685. + too conservative and lock too much. We want balancing to be done only at
  23686. + high priority. Yet, we might want to go to the left one node and use some
  23687. + of its empty space... So we make one attempt at getting the node to the left
  23688. + using try_lock, and if it fails we do without it, because we didn't really
  23689. + need it, it was only a nice to have.
  23690. +
  23691. + LOCK STRUCTURES DESCRIPTION
  23692. +
  23693. + The following data structures are used in the reiser4 locking
  23694. + implementation:
  23695. +
  23696. + All fields related to long-term locking are stored in znode->lock.
  23697. +
  23698. + The lock stack is a per thread object. It owns all znodes locked by the
  23699. + thread. One znode may be locked by several threads in case of read lock or
  23700. + one znode may be write locked by one thread several times. The special link
  23701. + objects (lock handles) support n<->m relation between znodes and lock
  23702. + owners.
  23703. +
  23704. + <Thread 1> <Thread 2>
  23705. +
  23706. + +---------+ +---------+
  23707. + | LS1 | | LS2 |
  23708. + +---------+ +---------+
  23709. + ^ ^
  23710. + |---------------+ +----------+
  23711. + v v v v
  23712. + +---------+ +---------+ +---------+ +---------+
  23713. + | LH1 | | LH2 | | LH3 | | LH4 |
  23714. + +---------+ +---------+ +---------+ +---------+
  23715. + ^ ^ ^ ^
  23716. + | +------------+ |
  23717. + v v v
  23718. + +---------+ +---------+ +---------+
  23719. + | Z1 | | Z2 | | Z3 |
  23720. + +---------+ +---------+ +---------+
  23721. +
  23722. + Thread 1 locked znodes Z1 and Z2, thread 2 locked znodes Z2 and Z3. The
  23723. + picture above shows that lock stack LS1 has a list of 2 lock handles LH1 and
  23724. + LH2, lock stack LS2 has a list with lock handles LH3 and LH4 on it. Znode
  23725. + Z1 is locked by only one thread, znode has only one lock handle LH1 on its
  23726. + list, similar situation is for Z3 which is locked by the thread 2 only. Z2
  23727. + is locked (for read) twice by different threads and two lock handles are on
  23728. + its list. Each lock handle represents a single relation of a locking of a
  23729. + znode by a thread. Locking of a znode is an establishing of a locking
  23730. + relation between the lock stack and the znode by adding of a new lock handle
  23731. + to a list of lock handles, the lock stack. The lock stack links all lock
  23732. + handles for all znodes locked by the lock stack. The znode list groups all
  23733. + lock handles for all locks stacks which locked the znode.
  23734. +
  23735. + Yet another relation may exist between znode and lock owners. If lock
  23736. + procedure cannot immediately take lock on an object it adds the lock owner
  23737. + on special `requestors' list belongs to znode. That list represents a
  23738. + queue of pending lock requests. Because one lock owner may request only
  23739. + only one lock object at a time, it is a 1->n relation between lock objects
  23740. + and a lock owner implemented as it is described above. Full information
  23741. + (priority, pointers to lock and link objects) about each lock request is
  23742. + stored in lock owner structure in `request' field.
  23743. +
  23744. + SHORT_TERM LOCKING
  23745. +
  23746. + This is a list of primitive operations over lock stacks / lock handles /
  23747. + znodes and locking descriptions for them.
  23748. +
  23749. + 1. locking / unlocking which is done by two list insertion/deletion, one
  23750. + to/from znode's list of lock handles, another one is to/from lock stack's
  23751. + list of lock handles. The first insertion is protected by
  23752. + znode->lock.guard spinlock. The list owned by the lock stack can be
  23753. + modified only by thread who owns the lock stack and nobody else can
  23754. + modify/read it. There is nothing to be protected by a spinlock or
  23755. + something else.
  23756. +
  23757. + 2. adding/removing a lock request to/from znode requesters list. The rule is
  23758. + that znode->lock.guard spinlock should be taken for this.
  23759. +
  23760. + 3. we can traverse list of lock handles and use references to lock stacks who
  23761. + locked given znode if znode->lock.guard spinlock is taken.
  23762. +
  23763. + 4. If a lock stack is associated with a znode as a lock requestor or lock
  23764. + owner its existence is guaranteed by znode->lock.guard spinlock. Some its
  23765. + (lock stack's) fields should be protected from being accessed in parallel
  23766. + by two or more threads. Please look at lock_stack structure definition
  23767. + for the info how those fields are protected. */
  23768. +
  23769. +/* Znode lock and capturing intertwining. */
  23770. +/* In current implementation we capture formatted nodes before locking
  23771. + them. Take a look on longterm lock znode, reiser4_try_capture() request
  23772. + precedes locking requests. The longterm_lock_znode function unconditionally
  23773. + captures znode before even checking of locking conditions.
  23774. +
  23775. + Another variant is to capture znode after locking it. It was not tested, but
  23776. + at least one deadlock condition is supposed to be there. One thread has
  23777. + locked a znode (Node-1) and calls reiser4_try_capture() for it.
  23778. + reiser4_try_capture() sleeps because znode's atom has CAPTURE_WAIT state.
  23779. + Second thread is a flushing thread, its current atom is the atom Node-1
  23780. + belongs to. Second thread wants to lock Node-1 and sleeps because Node-1
  23781. + is locked by the first thread. The described situation is a deadlock. */
  23782. +
  23783. +#include "debug.h"
  23784. +#include "txnmgr.h"
  23785. +#include "znode.h"
  23786. +#include "jnode.h"
  23787. +#include "tree.h"
  23788. +#include "plugin/node/node.h"
  23789. +#include "super.h"
  23790. +
  23791. +#include <linux/spinlock.h>
  23792. +
  23793. +#if REISER4_DEBUG
  23794. +static int request_is_deadlock_safe(znode * , znode_lock_mode,
  23795. + znode_lock_request);
  23796. +#endif
  23797. +
  23798. +/* Returns a lock owner associated with current thread */
  23799. +lock_stack *get_current_lock_stack(void)
  23800. +{
  23801. + return &get_current_context()->stack;
  23802. +}
  23803. +
  23804. +/* Wakes up all low priority owners informing them about possible deadlock */
  23805. +static void wake_up_all_lopri_owners(znode * node)
  23806. +{
  23807. + lock_handle *handle;
  23808. +
  23809. + assert_spin_locked(&(node->lock.guard));
  23810. + list_for_each_entry(handle, &node->lock.owners, owners_link) {
  23811. + assert("nikita-1832", handle->node == node);
  23812. + /* count this signal in owner->nr_signaled */
  23813. + if (!handle->signaled) {
  23814. + handle->signaled = 1;
  23815. + atomic_inc(&handle->owner->nr_signaled);
  23816. + /* Wake up a single process */
  23817. + reiser4_wake_up(handle->owner);
  23818. + }
  23819. + }
  23820. +}
  23821. +
  23822. +/* Adds a lock to a lock owner, which means creating a link to the lock and
  23823. + putting the link into the two lists all links are on (the doubly linked list
  23824. + that forms the lock_stack, and the doubly linked list of links attached
  23825. + to a lock.
  23826. +*/
  23827. +static inline void
  23828. +link_object(lock_handle * handle, lock_stack * owner, znode * node)
  23829. +{
  23830. + assert("jmacd-810", handle->owner == NULL);
  23831. + assert_spin_locked(&(node->lock.guard));
  23832. +
  23833. + handle->owner = owner;
  23834. + handle->node = node;
  23835. +
  23836. + assert("reiser4-4",
  23837. + ergo(list_empty_careful(&owner->locks), owner->nr_locks == 0));
  23838. +
  23839. + /* add lock handle to the end of lock_stack's list of locks */
  23840. + list_add_tail(&handle->locks_link, &owner->locks);
  23841. + ON_DEBUG(owner->nr_locks++);
  23842. + reiser4_ctx_gfp_mask_set();
  23843. +
  23844. + /* add lock handle to the head of znode's list of owners */
  23845. + list_add(&handle->owners_link, &node->lock.owners);
  23846. + handle->signaled = 0;
  23847. +}
  23848. +
  23849. +/* Breaks a relation between a lock and its owner */
  23850. +static inline void unlink_object(lock_handle * handle)
  23851. +{
  23852. + assert("zam-354", handle->owner != NULL);
  23853. + assert("nikita-1608", handle->node != NULL);
  23854. + assert_spin_locked(&(handle->node->lock.guard));
  23855. + assert("nikita-1829", handle->owner == get_current_lock_stack());
  23856. + assert("reiser4-5", handle->owner->nr_locks > 0);
  23857. +
  23858. + /* remove lock handle from lock_stack's list of locks */
  23859. + list_del(&handle->locks_link);
  23860. + ON_DEBUG(handle->owner->nr_locks--);
  23861. + reiser4_ctx_gfp_mask_set();
  23862. + assert("reiser4-6",
  23863. + ergo(list_empty_careful(&handle->owner->locks),
  23864. + handle->owner->nr_locks == 0));
  23865. + /* remove lock handle from znode's list of owners */
  23866. + list_del(&handle->owners_link);
  23867. + /* indicates that lock handle is free now */
  23868. + handle->node = NULL;
  23869. +#if REISER4_DEBUG
  23870. + INIT_LIST_HEAD(&handle->locks_link);
  23871. + INIT_LIST_HEAD(&handle->owners_link);
  23872. + handle->owner = NULL;
  23873. +#endif
  23874. +}
  23875. +
  23876. +/* Actually locks an object knowing that we are able to do this */
  23877. +static void lock_object(lock_stack * owner)
  23878. +{
  23879. + struct lock_request *request;
  23880. + znode *node;
  23881. +
  23882. + request = &owner->request;
  23883. + node = request->node;
  23884. + assert_spin_locked(&(node->lock.guard));
  23885. + if (request->mode == ZNODE_READ_LOCK) {
  23886. + node->lock.nr_readers++;
  23887. + } else {
  23888. + /* check that we don't switched from read to write lock */
  23889. + assert("nikita-1840", node->lock.nr_readers <= 0);
  23890. + /* We allow recursive locking; a node can be locked several
  23891. + times for write by same process */
  23892. + node->lock.nr_readers--;
  23893. + }
  23894. +
  23895. + link_object(request->handle, owner, node);
  23896. +
  23897. + if (owner->curpri)
  23898. + node->lock.nr_hipri_owners++;
  23899. +}
  23900. +
  23901. +/* Check for recursive write locking */
  23902. +static int recursive(lock_stack * owner)
  23903. +{
  23904. + int ret;
  23905. + znode *node;
  23906. + lock_handle *lh;
  23907. +
  23908. + node = owner->request.node;
  23909. +
  23910. + /* Owners list is not empty for a locked node */
  23911. + assert("zam-314", !list_empty_careful(&node->lock.owners));
  23912. + assert("nikita-1841", owner == get_current_lock_stack());
  23913. + assert_spin_locked(&(node->lock.guard));
  23914. +
  23915. + lh = list_entry(node->lock.owners.next, lock_handle, owners_link);
  23916. + ret = (lh->owner == owner);
  23917. +
  23918. + /* Recursive read locking should be done usual way */
  23919. + assert("zam-315", !ret || owner->request.mode == ZNODE_WRITE_LOCK);
  23920. + /* mixing of read/write locks is not allowed */
  23921. + assert("zam-341", !ret || znode_is_wlocked(node));
  23922. +
  23923. + return ret;
  23924. +}
  23925. +
  23926. +#if REISER4_DEBUG
  23927. +/* Returns true if the lock is held by the calling thread. */
  23928. +int znode_is_any_locked(const znode * node)
  23929. +{
  23930. + lock_handle *handle;
  23931. + lock_stack *stack;
  23932. + int ret;
  23933. +
  23934. + if (!znode_is_locked(node))
  23935. + return 0;
  23936. +
  23937. + stack = get_current_lock_stack();
  23938. +
  23939. + spin_lock_stack(stack);
  23940. +
  23941. + ret = 0;
  23942. +
  23943. + list_for_each_entry(handle, &stack->locks, locks_link) {
  23944. + if (handle->node == node) {
  23945. + ret = 1;
  23946. + break;
  23947. + }
  23948. + }
  23949. +
  23950. + spin_unlock_stack(stack);
  23951. +
  23952. + return ret;
  23953. +}
  23954. +
  23955. +#endif
  23956. +
  23957. +/* Returns true if a write lock is held by the calling thread. */
  23958. +int znode_is_write_locked(const znode * node)
  23959. +{
  23960. + lock_stack *stack;
  23961. + lock_handle *handle;
  23962. +
  23963. + assert("jmacd-8765", node != NULL);
  23964. +
  23965. + if (!znode_is_wlocked(node))
  23966. + return 0;
  23967. +
  23968. + stack = get_current_lock_stack();
  23969. +
  23970. + /*
  23971. + * When znode is write locked, all owner handles point to the same lock
  23972. + * stack. Get pointer to lock stack from the first lock handle from
  23973. + * znode's owner list
  23974. + */
  23975. + handle = list_entry(node->lock.owners.next, lock_handle, owners_link);
  23976. +
  23977. + return (handle->owner == stack);
  23978. +}
  23979. +
  23980. +/* This "deadlock" condition is the essential part of reiser4 locking
  23981. + implementation. This condition is checked explicitly by calling
  23982. + check_deadlock_condition() or implicitly in all places where znode lock
  23983. + state (set of owners and request queue) is changed. Locking code is
  23984. + designed to use this condition to trigger procedure of passing object from
  23985. + low priority owner(s) to high priority one(s).
  23986. +
  23987. + The procedure results in passing an event (setting lock_handle->signaled
  23988. + flag) and counting this event in nr_signaled field of owner's lock stack
  23989. + object and wakeup owner's process.
  23990. +*/
  23991. +static inline int check_deadlock_condition(znode * node)
  23992. +{
  23993. + assert_spin_locked(&(node->lock.guard));
  23994. + return node->lock.nr_hipri_requests > 0
  23995. + && node->lock.nr_hipri_owners == 0;
  23996. +}
  23997. +
  23998. +static int check_livelock_condition(znode * node, znode_lock_mode mode)
  23999. +{
  24000. + zlock * lock = &node->lock;
  24001. +
  24002. + return mode == ZNODE_READ_LOCK &&
  24003. + lock->nr_readers >= 0 && lock->nr_hipri_write_requests > 0;
  24004. +}
  24005. +
  24006. +/* checks lock/request compatibility */
  24007. +static int can_lock_object(lock_stack * owner)
  24008. +{
  24009. + znode *node = owner->request.node;
  24010. +
  24011. + assert_spin_locked(&(node->lock.guard));
  24012. +
  24013. + /* See if the node is disconnected. */
  24014. + if (unlikely(ZF_ISSET(node, JNODE_IS_DYING)))
  24015. + return RETERR(-EINVAL);
  24016. +
  24017. + /* Do not ever try to take a lock if we are going in low priority
  24018. + direction and a node have a high priority request without high
  24019. + priority owners. */
  24020. + if (unlikely(!owner->curpri && check_deadlock_condition(node)))
  24021. + return RETERR(-E_REPEAT);
  24022. + if (unlikely(owner->curpri &&
  24023. + check_livelock_condition(node, owner->request.mode)))
  24024. + return RETERR(-E_REPEAT);
  24025. + if (unlikely(!is_lock_compatible(node, owner->request.mode)))
  24026. + return RETERR(-E_REPEAT);
  24027. + return 0;
  24028. +}
  24029. +
  24030. +/* Setting of a high priority to the process. It clears "signaled" flags
  24031. + because znode locked by high-priority process can't satisfy our "deadlock
  24032. + condition". */
  24033. +static void set_high_priority(lock_stack * owner)
  24034. +{
  24035. + assert("nikita-1846", owner == get_current_lock_stack());
  24036. + /* Do nothing if current priority is already high */
  24037. + if (!owner->curpri) {
  24038. + /* We don't need locking for owner->locks list, because, this
  24039. + * function is only called with the lock stack of the current
  24040. + * thread, and no other thread can play with owner->locks list
  24041. + * and/or change ->node pointers of lock handles in this list.
  24042. + *
  24043. + * (Interrupts also are not involved.)
  24044. + */
  24045. + lock_handle *item = list_entry(owner->locks.next, lock_handle,
  24046. + locks_link);
  24047. + while (&owner->locks != &item->locks_link) {
  24048. + znode *node = item->node;
  24049. +
  24050. + spin_lock_zlock(&node->lock);
  24051. +
  24052. + node->lock.nr_hipri_owners++;
  24053. +
  24054. + /* we can safely set signaled to zero, because
  24055. + previous statement (nr_hipri_owners ++) guarantees
  24056. + that signaled will be never set again. */
  24057. + item->signaled = 0;
  24058. + spin_unlock_zlock(&node->lock);
  24059. +
  24060. + item = list_entry(item->locks_link.next, lock_handle,
  24061. + locks_link);
  24062. + }
  24063. + owner->curpri = 1;
  24064. + atomic_set(&owner->nr_signaled, 0);
  24065. + }
  24066. +}
  24067. +
  24068. +/* Sets a low priority to the process. */
  24069. +static void set_low_priority(lock_stack * owner)
  24070. +{
  24071. + assert("nikita-3075", owner == get_current_lock_stack());
  24072. + /* Do nothing if current priority is already low */
  24073. + if (owner->curpri) {
  24074. + /* scan all locks (lock handles) held by @owner, which is
  24075. + actually current thread, and check whether we are reaching
  24076. + deadlock possibility anywhere.
  24077. + */
  24078. + lock_handle *handle = list_entry(owner->locks.next, lock_handle,
  24079. + locks_link);
  24080. + while (&owner->locks != &handle->locks_link) {
  24081. + znode *node = handle->node;
  24082. + spin_lock_zlock(&node->lock);
  24083. + /* this thread just was hipri owner of @node, so
  24084. + nr_hipri_owners has to be greater than zero. */
  24085. + assert("nikita-1835", node->lock.nr_hipri_owners > 0);
  24086. + node->lock.nr_hipri_owners--;
  24087. + /* If we have deadlock condition, adjust a nr_signaled
  24088. + field. It is enough to set "signaled" flag only for
  24089. + current process, other low-pri owners will be
  24090. + signaled and waken up after current process unlocks
  24091. + this object and any high-priority requestor takes
  24092. + control. */
  24093. + if (check_deadlock_condition(node)
  24094. + && !handle->signaled) {
  24095. + handle->signaled = 1;
  24096. + atomic_inc(&owner->nr_signaled);
  24097. + }
  24098. + spin_unlock_zlock(&node->lock);
  24099. + handle = list_entry(handle->locks_link.next,
  24100. + lock_handle, locks_link);
  24101. + }
  24102. + owner->curpri = 0;
  24103. + }
  24104. +}
  24105. +
  24106. +static void remove_lock_request(lock_stack * requestor)
  24107. +{
  24108. + zlock * lock = &requestor->request.node->lock;
  24109. +
  24110. + if (requestor->curpri) {
  24111. + assert("nikita-1838", lock->nr_hipri_requests > 0);
  24112. + lock->nr_hipri_requests--;
  24113. + if (requestor->request.mode == ZNODE_WRITE_LOCK)
  24114. + lock->nr_hipri_write_requests--;
  24115. + }
  24116. + list_del(&requestor->requestors_link);
  24117. +}
  24118. +
  24119. +static void invalidate_all_lock_requests(znode * node)
  24120. +{
  24121. + lock_stack *requestor, *tmp;
  24122. +
  24123. + assert_spin_locked(&(node->lock.guard));
  24124. +
  24125. + list_for_each_entry_safe(requestor, tmp, &node->lock.requestors,
  24126. + requestors_link) {
  24127. + remove_lock_request(requestor);
  24128. + requestor->request.ret_code = -EINVAL;
  24129. + reiser4_wake_up(requestor);
  24130. + requestor->request.mode = ZNODE_NO_LOCK;
  24131. + }
  24132. +}
  24133. +
  24134. +static void dispatch_lock_requests(znode * node)
  24135. +{
  24136. + lock_stack *requestor, *tmp;
  24137. +
  24138. + assert_spin_locked(&(node->lock.guard));
  24139. +
  24140. + list_for_each_entry_safe(requestor, tmp, &node->lock.requestors,
  24141. + requestors_link) {
  24142. + if (znode_is_write_locked(node))
  24143. + break;
  24144. + if (!can_lock_object(requestor)) {
  24145. + lock_object(requestor);
  24146. + remove_lock_request(requestor);
  24147. + requestor->request.ret_code = 0;
  24148. + reiser4_wake_up(requestor);
  24149. + requestor->request.mode = ZNODE_NO_LOCK;
  24150. + }
  24151. + }
  24152. +}
  24153. +
  24154. +/* release long-term lock, acquired by longterm_lock_znode() */
  24155. +void longterm_unlock_znode(lock_handle * handle)
  24156. +{
  24157. + znode *node = handle->node;
  24158. + lock_stack *oldowner = handle->owner;
  24159. + int hipri;
  24160. + int readers;
  24161. + int rdelta;
  24162. + int youdie;
  24163. +
  24164. + /*
  24165. + * this is time-critical and highly optimized code. Modify carefully.
  24166. + */
  24167. +
  24168. + assert("jmacd-1021", handle != NULL);
  24169. + assert("jmacd-1022", handle->owner != NULL);
  24170. + assert("nikita-1392", LOCK_CNT_GTZ(long_term_locked_znode));
  24171. +
  24172. + assert("zam-130", oldowner == get_current_lock_stack());
  24173. +
  24174. + LOCK_CNT_DEC(long_term_locked_znode);
  24175. +
  24176. + /*
  24177. + * to minimize amount of operations performed under lock, pre-compute
  24178. + * all variables used within critical section. This makes code
  24179. + * obscure.
  24180. + */
  24181. +
  24182. + /* was this lock of hi or lo priority */
  24183. + hipri = oldowner->curpri ? 1 : 0;
  24184. + /* number of readers */
  24185. + readers = node->lock.nr_readers;
  24186. + /* +1 if write lock, -1 if read lock */
  24187. + rdelta = (readers > 0) ? -1 : +1;
  24188. + /* true if node is to die and write lock is released */
  24189. + youdie = ZF_ISSET(node, JNODE_HEARD_BANSHEE) && (readers < 0);
  24190. +
  24191. + spin_lock_zlock(&node->lock);
  24192. +
  24193. + assert("zam-101", znode_is_locked(node));
  24194. +
  24195. + /* Adjust a number of high priority owners of this lock */
  24196. + assert("nikita-1836", node->lock.nr_hipri_owners >= hipri);
  24197. + node->lock.nr_hipri_owners -= hipri;
  24198. +
  24199. + /* Handle znode deallocation on last write-lock release. */
  24200. + if (znode_is_wlocked_once(node)) {
  24201. + if (youdie) {
  24202. + forget_znode(handle);
  24203. + assert("nikita-2191", znode_invariant(node));
  24204. + zput(node);
  24205. + return;
  24206. + }
  24207. + }
  24208. +
  24209. + if (handle->signaled)
  24210. + atomic_dec(&oldowner->nr_signaled);
  24211. +
  24212. + /* Unlocking means owner<->object link deletion */
  24213. + unlink_object(handle);
  24214. +
  24215. + /* This is enough to be sure whether an object is completely
  24216. + unlocked. */
  24217. + node->lock.nr_readers += rdelta;
  24218. +
  24219. + /* If the node is locked it must have an owners list. Likewise, if
  24220. + the node is unlocked it must have an empty owners list. */
  24221. + assert("zam-319", equi(znode_is_locked(node),
  24222. + !list_empty_careful(&node->lock.owners)));
  24223. +
  24224. +#if REISER4_DEBUG
  24225. + if (!znode_is_locked(node))
  24226. + ++node->times_locked;
  24227. +#endif
  24228. +
  24229. + /* If there are pending lock requests we wake up a requestor */
  24230. + if (!znode_is_wlocked(node))
  24231. + dispatch_lock_requests(node);
  24232. + if (check_deadlock_condition(node))
  24233. + wake_up_all_lopri_owners(node);
  24234. + spin_unlock_zlock(&node->lock);
  24235. +
  24236. + /* minus one reference from handle->node */
  24237. + assert("nikita-2190", znode_invariant(node));
  24238. + ON_DEBUG(check_lock_data());
  24239. + ON_DEBUG(check_lock_node_data(node));
  24240. + zput(node);
  24241. +}
  24242. +
  24243. +/* final portion of longterm-lock */
  24244. +static int
  24245. +lock_tail(lock_stack * owner, int ok, znode_lock_mode mode)
  24246. +{
  24247. + znode *node = owner->request.node;
  24248. +
  24249. + assert_spin_locked(&(node->lock.guard));
  24250. +
  24251. + /* If we broke with (ok == 0) it means we can_lock, now do it. */
  24252. + if (ok == 0) {
  24253. + lock_object(owner);
  24254. + owner->request.mode = 0;
  24255. + /* count a reference from lockhandle->node
  24256. +
  24257. + znode was already referenced at the entry to this function,
  24258. + hence taking spin-lock here is not necessary (see comment
  24259. + in the zref()).
  24260. + */
  24261. + zref(node);
  24262. +
  24263. + LOCK_CNT_INC(long_term_locked_znode);
  24264. + }
  24265. + spin_unlock_zlock(&node->lock);
  24266. + ON_DEBUG(check_lock_data());
  24267. + ON_DEBUG(check_lock_node_data(node));
  24268. + return ok;
  24269. +}
  24270. +
  24271. +/*
  24272. + * version of longterm_znode_lock() optimized for the most common case: read
  24273. + * lock without any special flags. This is the kind of lock that any tree
  24274. + * traversal takes on the root node of the tree, which is very frequent.
  24275. + */
  24276. +static int longterm_lock_tryfast(lock_stack * owner)
  24277. +{
  24278. + int result;
  24279. + znode *node;
  24280. + zlock *lock;
  24281. +
  24282. + node = owner->request.node;
  24283. + lock = &node->lock;
  24284. +
  24285. + assert("nikita-3340", reiser4_schedulable());
  24286. + assert("nikita-3341", request_is_deadlock_safe(node,
  24287. + ZNODE_READ_LOCK,
  24288. + ZNODE_LOCK_LOPRI));
  24289. + spin_lock_zlock(lock);
  24290. + result = can_lock_object(owner);
  24291. + spin_unlock_zlock(lock);
  24292. +
  24293. + if (likely(result != -EINVAL)) {
  24294. + spin_lock_znode(node);
  24295. + result = reiser4_try_capture(ZJNODE(node), ZNODE_READ_LOCK, 0);
  24296. + spin_unlock_znode(node);
  24297. + spin_lock_zlock(lock);
  24298. + if (unlikely(result != 0)) {
  24299. + owner->request.mode = 0;
  24300. + } else {
  24301. + result = can_lock_object(owner);
  24302. + if (unlikely(result == -E_REPEAT)) {
  24303. + /* fall back to longterm_lock_znode() */
  24304. + spin_unlock_zlock(lock);
  24305. + return 1;
  24306. + }
  24307. + }
  24308. + return lock_tail(owner, result, ZNODE_READ_LOCK);
  24309. + } else
  24310. + return 1;
  24311. +}
  24312. +
  24313. +/* locks given lock object */
  24314. +int longterm_lock_znode(
  24315. + /* local link object (allocated by lock owner
  24316. + * thread, usually on its own stack) */
  24317. + lock_handle * handle,
  24318. + /* znode we want to lock. */
  24319. + znode * node,
  24320. + /* {ZNODE_READ_LOCK, ZNODE_WRITE_LOCK}; */
  24321. + znode_lock_mode mode,
  24322. + /* {0, -EINVAL, -E_DEADLOCK}, see return codes
  24323. + description. */
  24324. + znode_lock_request request) {
  24325. + int ret;
  24326. + int hipri = (request & ZNODE_LOCK_HIPRI) != 0;
  24327. + int non_blocking = 0;
  24328. + int has_atom;
  24329. + txn_capture cap_flags;
  24330. + zlock *lock;
  24331. + txn_handle *txnh;
  24332. + tree_level level;
  24333. +
  24334. + /* Get current process context */
  24335. + lock_stack *owner = get_current_lock_stack();
  24336. +
  24337. + /* Check that the lock handle is initialized and isn't already being
  24338. + * used. */
  24339. + assert("jmacd-808", handle->owner == NULL);
  24340. + assert("nikita-3026", reiser4_schedulable());
  24341. + assert("nikita-3219", request_is_deadlock_safe(node, mode, request));
  24342. + assert("zam-1056", atomic_read(&ZJNODE(node)->x_count) > 0);
  24343. + /* long term locks are not allowed in the VM contexts (->writepage(),
  24344. + * prune_{d,i}cache()).
  24345. + *
  24346. + * FIXME this doesn't work due to unused-dentry-with-unlinked-inode
  24347. + * bug caused by d_splice_alias() only working for directories.
  24348. + */
  24349. + assert("nikita-3547", 1 || ((current->flags & PF_MEMALLOC) == 0));
  24350. + assert("zam-1055", mode != ZNODE_NO_LOCK);
  24351. +
  24352. + cap_flags = 0;
  24353. + if (request & ZNODE_LOCK_NONBLOCK) {
  24354. + cap_flags |= TXN_CAPTURE_NONBLOCKING;
  24355. + non_blocking = 1;
  24356. + }
  24357. +
  24358. + if (request & ZNODE_LOCK_DONT_FUSE)
  24359. + cap_flags |= TXN_CAPTURE_DONT_FUSE;
  24360. +
  24361. + /* If we are changing our process priority we must adjust a number
  24362. + of high priority owners for each znode that we already lock */
  24363. + if (hipri) {
  24364. + set_high_priority(owner);
  24365. + } else {
  24366. + set_low_priority(owner);
  24367. + }
  24368. +
  24369. + level = znode_get_level(node);
  24370. +
  24371. + /* Fill request structure with our values. */
  24372. + owner->request.mode = mode;
  24373. + owner->request.handle = handle;
  24374. + owner->request.node = node;
  24375. +
  24376. + txnh = get_current_context()->trans;
  24377. + lock = &node->lock;
  24378. +
  24379. + if (mode == ZNODE_READ_LOCK && request == 0) {
  24380. + ret = longterm_lock_tryfast(owner);
  24381. + if (ret <= 0)
  24382. + return ret;
  24383. + }
  24384. +
  24385. + has_atom = (txnh->atom != NULL);
  24386. +
  24387. + /* Synchronize on node's zlock guard lock. */
  24388. + spin_lock_zlock(lock);
  24389. +
  24390. + if (znode_is_locked(node) &&
  24391. + mode == ZNODE_WRITE_LOCK && recursive(owner))
  24392. + return lock_tail(owner, 0, mode);
  24393. +
  24394. + for (;;) {
  24395. + /* Check the lock's availability: if it is unavaiable we get
  24396. + E_REPEAT, 0 indicates "can_lock", otherwise the node is
  24397. + invalid. */
  24398. + ret = can_lock_object(owner);
  24399. +
  24400. + if (unlikely(ret == -EINVAL)) {
  24401. + /* @node is dying. Leave it alone. */
  24402. + break;
  24403. + }
  24404. +
  24405. + if (unlikely(ret == -E_REPEAT && non_blocking)) {
  24406. + /* either locking of @node by the current thread will
  24407. + * lead to the deadlock, or lock modes are
  24408. + * incompatible. */
  24409. + break;
  24410. + }
  24411. +
  24412. + assert("nikita-1844", (ret == 0)
  24413. + || ((ret == -E_REPEAT) && !non_blocking));
  24414. + /* If we can get the lock... Try to capture first before
  24415. + taking the lock. */
  24416. +
  24417. + /* first handle commonest case where node and txnh are already
  24418. + * in the same atom. */
  24419. + /* safe to do without taking locks, because:
  24420. + *
  24421. + * 1. read of aligned word is atomic with respect to writes to
  24422. + * this word
  24423. + *
  24424. + * 2. false negatives are handled in reiser4_try_capture().
  24425. + *
  24426. + * 3. false positives are impossible.
  24427. + *
  24428. + * PROOF: left as an exercise to the curious reader.
  24429. + *
  24430. + * Just kidding. Here is one:
  24431. + *
  24432. + * At the time T0 txnh->atom is stored in txnh_atom.
  24433. + *
  24434. + * At the time T1 node->atom is stored in node_atom.
  24435. + *
  24436. + * At the time T2 we observe that
  24437. + *
  24438. + * txnh_atom != NULL && node_atom == txnh_atom.
  24439. + *
  24440. + * Imagine that at this moment we acquire node and txnh spin
  24441. + * lock in this order. Suppose that under spin lock we have
  24442. + *
  24443. + * node->atom != txnh->atom, (S1)
  24444. + *
  24445. + * at the time T3.
  24446. + *
  24447. + * txnh->atom != NULL still, because txnh is open by the
  24448. + * current thread.
  24449. + *
  24450. + * Suppose node->atom == NULL, that is, node was un-captured
  24451. + * between T1, and T3. But un-capturing of formatted node is
  24452. + * always preceded by the call to reiser4_invalidate_lock(),
  24453. + * which marks znode as JNODE_IS_DYING under zlock spin
  24454. + * lock. Contradiction, because can_lock_object() above checks
  24455. + * for JNODE_IS_DYING. Hence, node->atom != NULL at T3.
  24456. + *
  24457. + * Suppose that node->atom != node_atom, that is, atom, node
  24458. + * belongs to was fused into another atom: node_atom was fused
  24459. + * into node->atom. Atom of txnh was equal to node_atom at T2,
  24460. + * which means that under spin lock, txnh->atom == node->atom,
  24461. + * because txnh->atom can only follow fusion
  24462. + * chain. Contradicts S1.
  24463. + *
  24464. + * The same for hypothesis txnh->atom != txnh_atom. Hence,
  24465. + * node->atom == node_atom == txnh_atom == txnh->atom. Again
  24466. + * contradicts S1. Hence S1 is false. QED.
  24467. + *
  24468. + */
  24469. +
  24470. + if (likely(has_atom && ZJNODE(node)->atom == txnh->atom)) {
  24471. + ;
  24472. + } else {
  24473. + /*
  24474. + * unlock zlock spin lock here. It is possible for
  24475. + * longterm_unlock_znode() to sneak in here, but there
  24476. + * is no harm: reiser4_invalidate_lock() will mark znode
  24477. + * as JNODE_IS_DYING and this will be noted by
  24478. + * can_lock_object() below.
  24479. + */
  24480. + spin_unlock_zlock(lock);
  24481. + spin_lock_znode(node);
  24482. + ret = reiser4_try_capture(ZJNODE(node), mode,
  24483. + cap_flags);
  24484. + spin_unlock_znode(node);
  24485. + spin_lock_zlock(lock);
  24486. + if (unlikely(ret != 0)) {
  24487. + /* In the failure case, the txnmgr releases
  24488. + the znode's lock (or in some cases, it was
  24489. + released a while ago). There's no need to
  24490. + reacquire it so we should return here,
  24491. + avoid releasing the lock. */
  24492. + owner->request.mode = 0;
  24493. + break;
  24494. + }
  24495. +
  24496. + /* Check the lock's availability again -- this is
  24497. + because under some circumstances the capture code
  24498. + has to release and reacquire the znode spinlock. */
  24499. + ret = can_lock_object(owner);
  24500. + }
  24501. +
  24502. + /* This time, a return of (ret == 0) means we can lock, so we
  24503. + should break out of the loop. */
  24504. + if (likely(ret != -E_REPEAT || non_blocking))
  24505. + break;
  24506. +
  24507. + /* Lock is unavailable, we have to wait. */
  24508. + ret = reiser4_prepare_to_sleep(owner);
  24509. + if (unlikely(ret != 0))
  24510. + break;
  24511. +
  24512. + assert_spin_locked(&(node->lock.guard));
  24513. + if (hipri) {
  24514. + /* If we are going in high priority direction then
  24515. + increase high priority requests counter for the
  24516. + node */
  24517. + lock->nr_hipri_requests++;
  24518. + if (mode == ZNODE_WRITE_LOCK)
  24519. + lock->nr_hipri_write_requests++;
  24520. + /* If there are no high priority owners for a node,
  24521. + then immediately wake up low priority owners, so
  24522. + they can detect possible deadlock */
  24523. + if (lock->nr_hipri_owners == 0)
  24524. + wake_up_all_lopri_owners(node);
  24525. + }
  24526. + list_add_tail(&owner->requestors_link, &lock->requestors);
  24527. +
  24528. + /* Ok, here we have prepared a lock request, so unlock
  24529. + a znode ... */
  24530. + spin_unlock_zlock(lock);
  24531. + /* ... and sleep */
  24532. + reiser4_go_to_sleep(owner);
  24533. + if (owner->request.mode == ZNODE_NO_LOCK)
  24534. + goto request_is_done;
  24535. + spin_lock_zlock(lock);
  24536. + if (owner->request.mode == ZNODE_NO_LOCK) {
  24537. + spin_unlock_zlock(lock);
  24538. +request_is_done:
  24539. + if (owner->request.ret_code == 0) {
  24540. + LOCK_CNT_INC(long_term_locked_znode);
  24541. + zref(node);
  24542. + }
  24543. + return owner->request.ret_code;
  24544. + }
  24545. + remove_lock_request(owner);
  24546. + }
  24547. +
  24548. + return lock_tail(owner, ret, mode);
  24549. +}
  24550. +
  24551. +/* lock object invalidation means changing of lock object state to `INVALID'
  24552. + and waiting for all other processes to cancel theirs lock requests. */
  24553. +void reiser4_invalidate_lock(lock_handle * handle /* path to lock
  24554. + * owner and lock
  24555. + * object is being
  24556. + * invalidated. */ )
  24557. +{
  24558. + znode *node = handle->node;
  24559. + lock_stack *owner = handle->owner;
  24560. +
  24561. + assert("zam-325", owner == get_current_lock_stack());
  24562. + assert("zam-103", znode_is_write_locked(node));
  24563. + assert("nikita-1393", !ZF_ISSET(node, JNODE_LEFT_CONNECTED));
  24564. + assert("nikita-1793", !ZF_ISSET(node, JNODE_RIGHT_CONNECTED));
  24565. + assert("nikita-1394", ZF_ISSET(node, JNODE_HEARD_BANSHEE));
  24566. + assert("nikita-3097", znode_is_wlocked_once(node));
  24567. + assert_spin_locked(&(node->lock.guard));
  24568. +
  24569. + if (handle->signaled)
  24570. + atomic_dec(&owner->nr_signaled);
  24571. +
  24572. + ZF_SET(node, JNODE_IS_DYING);
  24573. + unlink_object(handle);
  24574. + node->lock.nr_readers = 0;
  24575. +
  24576. + invalidate_all_lock_requests(node);
  24577. + spin_unlock_zlock(&node->lock);
  24578. +}
  24579. +
  24580. +/* Initializes lock_stack. */
  24581. +void init_lock_stack(lock_stack * owner /* pointer to
  24582. + * allocated
  24583. + * structure. */ )
  24584. +{
  24585. + INIT_LIST_HEAD(&owner->locks);
  24586. + INIT_LIST_HEAD(&owner->requestors_link);
  24587. + spin_lock_init(&owner->sguard);
  24588. + owner->curpri = 1;
  24589. + init_waitqueue_head(&owner->wait);
  24590. +}
  24591. +
  24592. +/* Initializes lock object. */
  24593. +void reiser4_init_lock(zlock * lock /* pointer on allocated
  24594. + * uninitialized lock object
  24595. + * structure. */ )
  24596. +{
  24597. + memset(lock, 0, sizeof(zlock));
  24598. + spin_lock_init(&lock->guard);
  24599. + INIT_LIST_HEAD(&lock->requestors);
  24600. + INIT_LIST_HEAD(&lock->owners);
  24601. +}
  24602. +
  24603. +/* Transfer a lock handle (presumably so that variables can be moved between
  24604. + stack and heap locations). */
  24605. +static void
  24606. +move_lh_internal(lock_handle * new, lock_handle * old, int unlink_old)
  24607. +{
  24608. + znode *node = old->node;
  24609. + lock_stack *owner = old->owner;
  24610. + int signaled;
  24611. +
  24612. + /* locks_list, modified by link_object() is not protected by
  24613. + anything. This is valid because only current thread ever modifies
  24614. + locks_list of its lock_stack.
  24615. + */
  24616. + assert("nikita-1827", owner == get_current_lock_stack());
  24617. + assert("nikita-1831", new->owner == NULL);
  24618. +
  24619. + spin_lock_zlock(&node->lock);
  24620. +
  24621. + signaled = old->signaled;
  24622. + if (unlink_old) {
  24623. + unlink_object(old);
  24624. + } else {
  24625. + if (node->lock.nr_readers > 0) {
  24626. + node->lock.nr_readers += 1;
  24627. + } else {
  24628. + node->lock.nr_readers -= 1;
  24629. + }
  24630. + if (signaled)
  24631. + atomic_inc(&owner->nr_signaled);
  24632. + if (owner->curpri)
  24633. + node->lock.nr_hipri_owners += 1;
  24634. + LOCK_CNT_INC(long_term_locked_znode);
  24635. +
  24636. + zref(node);
  24637. + }
  24638. + link_object(new, owner, node);
  24639. + new->signaled = signaled;
  24640. +
  24641. + spin_unlock_zlock(&node->lock);
  24642. +}
  24643. +
  24644. +void move_lh(lock_handle * new, lock_handle * old)
  24645. +{
  24646. + move_lh_internal(new, old, /*unlink_old */ 1);
  24647. +}
  24648. +
  24649. +void copy_lh(lock_handle * new, lock_handle * old)
  24650. +{
  24651. + move_lh_internal(new, old, /*unlink_old */ 0);
  24652. +}
  24653. +
  24654. +/* after getting -E_DEADLOCK we unlock znodes until this function returns false
  24655. + */
  24656. +int reiser4_check_deadlock(void)
  24657. +{
  24658. + lock_stack *owner = get_current_lock_stack();
  24659. + return atomic_read(&owner->nr_signaled) != 0;
  24660. +}
  24661. +
  24662. +/* Before going to sleep we re-check "release lock" requests which might come
  24663. + from threads with hi-pri lock priorities. */
  24664. +int reiser4_prepare_to_sleep(lock_stack * owner)
  24665. +{
  24666. + assert("nikita-1847", owner == get_current_lock_stack());
  24667. +
  24668. + /* We return -E_DEADLOCK if one or more "give me the lock" messages are
  24669. + * counted in nr_signaled */
  24670. + if (unlikely(atomic_read(&owner->nr_signaled) != 0)) {
  24671. + assert("zam-959", !owner->curpri);
  24672. + return RETERR(-E_DEADLOCK);
  24673. + }
  24674. + return 0;
  24675. +}
  24676. +
  24677. +/* Wakes up a single thread */
  24678. +void __reiser4_wake_up(lock_stack * owner)
  24679. +{
  24680. + atomic_set(&owner->wakeup, 1);
  24681. + wake_up(&owner->wait);
  24682. +}
  24683. +
  24684. +/* Puts a thread to sleep */
  24685. +void reiser4_go_to_sleep(lock_stack * owner)
  24686. +{
  24687. + /* Well, we might sleep here, so holding of any spinlocks is no-no */
  24688. + assert("nikita-3027", reiser4_schedulable());
  24689. +
  24690. + wait_event(owner->wait, atomic_read(&owner->wakeup));
  24691. + atomic_set(&owner->wakeup, 0);
  24692. +}
  24693. +
  24694. +int lock_stack_isclean(lock_stack * owner)
  24695. +{
  24696. + if (list_empty_careful(&owner->locks)) {
  24697. + assert("zam-353", atomic_read(&owner->nr_signaled) == 0);
  24698. + return 1;
  24699. + }
  24700. +
  24701. + return 0;
  24702. +}
  24703. +
  24704. +#if REISER4_DEBUG
  24705. +
  24706. +/*
  24707. + * debugging functions
  24708. + */
  24709. +
  24710. +static void list_check(struct list_head *head)
  24711. +{
  24712. + struct list_head *pos;
  24713. +
  24714. + list_for_each(pos, head)
  24715. + assert("", (pos->prev != NULL && pos->next != NULL &&
  24716. + pos->prev->next == pos && pos->next->prev == pos));
  24717. +}
  24718. +
  24719. +/* check consistency of locking data-structures hanging of the @stack */
  24720. +static void check_lock_stack(lock_stack * stack)
  24721. +{
  24722. + spin_lock_stack(stack);
  24723. + /* check that stack->locks is not corrupted */
  24724. + list_check(&stack->locks);
  24725. + spin_unlock_stack(stack);
  24726. +}
  24727. +
  24728. +/* check consistency of locking data structures */
  24729. +void check_lock_data(void)
  24730. +{
  24731. + check_lock_stack(&get_current_context()->stack);
  24732. +}
  24733. +
  24734. +/* check consistency of locking data structures for @node */
  24735. +void check_lock_node_data(znode * node)
  24736. +{
  24737. + spin_lock_zlock(&node->lock);
  24738. + list_check(&node->lock.owners);
  24739. + list_check(&node->lock.requestors);
  24740. + spin_unlock_zlock(&node->lock);
  24741. +}
  24742. +
  24743. +/* check that given lock request is dead lock safe. This check is, of course,
  24744. + * not exhaustive. */
  24745. +static int
  24746. +request_is_deadlock_safe(znode * node, znode_lock_mode mode,
  24747. + znode_lock_request request)
  24748. +{
  24749. + lock_stack *owner;
  24750. +
  24751. + owner = get_current_lock_stack();
  24752. + /*
  24753. + * check that hipri lock request is not issued when there are locked
  24754. + * nodes at the higher levels.
  24755. + */
  24756. + if (request & ZNODE_LOCK_HIPRI && !(request & ZNODE_LOCK_NONBLOCK) &&
  24757. + znode_get_level(node) != 0) {
  24758. + lock_handle *item;
  24759. +
  24760. + list_for_each_entry(item, &owner->locks, locks_link) {
  24761. + znode *other;
  24762. +
  24763. + other = item->node;
  24764. +
  24765. + if (znode_get_level(other) == 0)
  24766. + continue;
  24767. + if (znode_get_level(other) > znode_get_level(node))
  24768. + return 0;
  24769. + }
  24770. + }
  24771. + return 1;
  24772. +}
  24773. +
  24774. +#endif
  24775. +
  24776. +/* return pointer to static storage with name of lock_mode. For
  24777. + debugging */
  24778. +const char *lock_mode_name(znode_lock_mode lock/* lock mode to get name of */)
  24779. +{
  24780. + if (lock == ZNODE_READ_LOCK)
  24781. + return "read";
  24782. + else if (lock == ZNODE_WRITE_LOCK)
  24783. + return "write";
  24784. + else {
  24785. + static char buf[30];
  24786. +
  24787. + sprintf(buf, "unknown: %i", lock);
  24788. + return buf;
  24789. + }
  24790. +}
  24791. +
  24792. +/* Make Linus happy.
  24793. + Local variables:
  24794. + c-indentation-style: "K&R"
  24795. + mode-name: "LC"
  24796. + c-basic-offset: 8
  24797. + tab-width: 8
  24798. + fill-column: 79
  24799. + End:
  24800. +*/
  24801. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/lock.h linux-4.14.2/fs/reiser4/lock.h
  24802. --- linux-4.14.2.orig/fs/reiser4/lock.h 1970-01-01 01:00:00.000000000 +0100
  24803. +++ linux-4.14.2/fs/reiser4/lock.h 2017-11-26 22:13:09.000000000 +0100
  24804. @@ -0,0 +1,250 @@
  24805. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  24806. + * reiser4/README */
  24807. +
  24808. +/* Long term locking data structures. See lock.c for details. */
  24809. +
  24810. +#ifndef __LOCK_H__
  24811. +#define __LOCK_H__
  24812. +
  24813. +#include "forward.h"
  24814. +#include "debug.h"
  24815. +#include "dformat.h"
  24816. +#include "key.h"
  24817. +#include "coord.h"
  24818. +#include "plugin/node/node.h"
  24819. +#include "txnmgr.h"
  24820. +#include "readahead.h"
  24821. +
  24822. +#include <linux/types.h>
  24823. +#include <linux/spinlock.h>
  24824. +#include <linux/pagemap.h> /* for PAGE_CACHE_SIZE */
  24825. +#include <asm/atomic.h>
  24826. +#include <linux/wait.h>
  24827. +
  24828. +/* Per-znode lock object */
  24829. +struct zlock {
  24830. + spinlock_t guard;
  24831. + /* The number of readers if positive; the number of recursively taken
  24832. + write locks if negative. Protected by zlock spin lock. */
  24833. + int nr_readers;
  24834. + /* A number of processes (lock_stacks) that have this object
  24835. + locked with high priority */
  24836. + unsigned nr_hipri_owners;
  24837. + /* A number of attempts to lock znode in high priority direction */
  24838. + unsigned nr_hipri_requests;
  24839. + /* A linked list of lock_handle objects that contains pointers
  24840. + for all lock_stacks which have this lock object locked */
  24841. + unsigned nr_hipri_write_requests;
  24842. + struct list_head owners;
  24843. + /* A linked list of lock_stacks that wait for this lock */
  24844. + struct list_head requestors;
  24845. +};
  24846. +
  24847. +static inline void spin_lock_zlock(zlock *lock)
  24848. +{
  24849. + /* check that zlock is not locked */
  24850. + assert("", LOCK_CNT_NIL(spin_locked_zlock));
  24851. + /* check that spinlocks of lower priorities are not held */
  24852. + assert("", LOCK_CNT_NIL(spin_locked_stack));
  24853. +
  24854. + spin_lock(&lock->guard);
  24855. +
  24856. + LOCK_CNT_INC(spin_locked_zlock);
  24857. + LOCK_CNT_INC(spin_locked);
  24858. +}
  24859. +
  24860. +static inline void spin_unlock_zlock(zlock *lock)
  24861. +{
  24862. + assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_zlock));
  24863. + assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
  24864. +
  24865. + LOCK_CNT_DEC(spin_locked_zlock);
  24866. + LOCK_CNT_DEC(spin_locked);
  24867. +
  24868. + spin_unlock(&lock->guard);
  24869. +}
  24870. +
  24871. +#define lock_is_locked(lock) ((lock)->nr_readers != 0)
  24872. +#define lock_is_rlocked(lock) ((lock)->nr_readers > 0)
  24873. +#define lock_is_wlocked(lock) ((lock)->nr_readers < 0)
  24874. +#define lock_is_wlocked_once(lock) ((lock)->nr_readers == -1)
  24875. +#define lock_can_be_rlocked(lock) ((lock)->nr_readers >= 0)
  24876. +#define lock_mode_compatible(lock, mode) \
  24877. + (((mode) == ZNODE_WRITE_LOCK && !lock_is_locked(lock)) || \
  24878. + ((mode) == ZNODE_READ_LOCK && lock_can_be_rlocked(lock)))
  24879. +
  24880. +/* Since we have R/W znode locks we need additional bidirectional `link'
  24881. + objects to implement n<->m relationship between lock owners and lock
  24882. + objects. We call them `lock handles'.
  24883. +
  24884. + Locking: see lock.c/"SHORT-TERM LOCKING"
  24885. +*/
  24886. +struct lock_handle {
  24887. + /* This flag indicates that a signal to yield a lock was passed to
  24888. + lock owner and counted in owner->nr_signalled
  24889. +
  24890. + Locking: this is accessed under spin lock on ->node.
  24891. + */
  24892. + int signaled;
  24893. + /* A link to owner of a lock */
  24894. + lock_stack *owner;
  24895. + /* A link to znode locked */
  24896. + znode *node;
  24897. + /* A list of all locks for a process */
  24898. + struct list_head locks_link;
  24899. + /* A list of all owners for a znode */
  24900. + struct list_head owners_link;
  24901. +};
  24902. +
  24903. +struct lock_request {
  24904. + /* A pointer to uninitialized link object */
  24905. + lock_handle *handle;
  24906. + /* A pointer to the object we want to lock */
  24907. + znode *node;
  24908. + /* Lock mode (ZNODE_READ_LOCK or ZNODE_WRITE_LOCK) */
  24909. + znode_lock_mode mode;
  24910. + /* how dispatch_lock_requests() returns lock request result code */
  24911. + int ret_code;
  24912. +};
  24913. +
  24914. +/* A lock stack structure for accumulating locks owned by a process */
  24915. +struct lock_stack {
  24916. + /* A guard lock protecting a lock stack */
  24917. + spinlock_t sguard;
  24918. + /* number of znodes which were requested by high priority processes */
  24919. + atomic_t nr_signaled;
  24920. + /* Current priority of a process
  24921. +
  24922. + This is only accessed by the current thread and thus requires no
  24923. + locking.
  24924. + */
  24925. + int curpri;
  24926. + /* A list of all locks owned by this process. Elements can be added to
  24927. + * this list only by the current thread. ->node pointers in this list
  24928. + * can be only changed by the current thread. */
  24929. + struct list_head locks;
  24930. + /* When lock_stack waits for the lock, it puts itself on double-linked
  24931. + requestors list of that lock */
  24932. + struct list_head requestors_link;
  24933. + /* Current lock request info.
  24934. +
  24935. + This is only accessed by the current thread and thus requires no
  24936. + locking.
  24937. + */
  24938. + struct lock_request request;
  24939. + /* the following two fields are the lock stack's
  24940. + * synchronization object to use with the standard linux/wait.h
  24941. + * interface. See reiser4_go_to_sleep and __reiser4_wake_up for
  24942. + * usage details. */
  24943. + wait_queue_head_t wait;
  24944. + atomic_t wakeup;
  24945. +#if REISER4_DEBUG
  24946. + int nr_locks; /* number of lock handles in the above list */
  24947. +#endif
  24948. +};
  24949. +
  24950. +/*
  24951. + User-visible znode locking functions
  24952. +*/
  24953. +
  24954. +extern int longterm_lock_znode(lock_handle * handle,
  24955. + znode * node,
  24956. + znode_lock_mode mode,
  24957. + znode_lock_request request);
  24958. +
  24959. +extern void longterm_unlock_znode(lock_handle * handle);
  24960. +
  24961. +extern int reiser4_check_deadlock(void);
  24962. +
  24963. +extern lock_stack *get_current_lock_stack(void);
  24964. +
  24965. +extern void init_lock_stack(lock_stack * owner);
  24966. +extern void reiser4_init_lock(zlock * lock);
  24967. +
  24968. +static inline void init_lh(lock_handle *lh)
  24969. +{
  24970. +#if REISER4_DEBUG
  24971. + memset(lh, 0, sizeof *lh);
  24972. + INIT_LIST_HEAD(&lh->locks_link);
  24973. + INIT_LIST_HEAD(&lh->owners_link);
  24974. +#else
  24975. + lh->node = NULL;
  24976. +#endif
  24977. +}
  24978. +
  24979. +static inline void done_lh(lock_handle *lh)
  24980. +{
  24981. + assert("zam-342", lh != NULL);
  24982. + if (lh->node != NULL)
  24983. + longterm_unlock_znode(lh);
  24984. +}
  24985. +
  24986. +extern void move_lh(lock_handle * new, lock_handle * old);
  24987. +extern void copy_lh(lock_handle * new, lock_handle * old);
  24988. +
  24989. +extern int reiser4_prepare_to_sleep(lock_stack * owner);
  24990. +extern void reiser4_go_to_sleep(lock_stack * owner);
  24991. +extern void __reiser4_wake_up(lock_stack * owner);
  24992. +
  24993. +extern int lock_stack_isclean(lock_stack * owner);
  24994. +
  24995. +/* zlock object state check macros: only used in assertions. Both forms imply
  24996. + that the lock is held by the current thread. */
  24997. +extern int znode_is_write_locked(const znode *);
  24998. +extern void reiser4_invalidate_lock(lock_handle *);
  24999. +
  25000. +/* lock ordering is: first take zlock spin lock, then lock stack spin lock */
  25001. +#define spin_ordering_pred_stack(stack) \
  25002. + (LOCK_CNT_NIL(spin_locked_stack) && \
  25003. + LOCK_CNT_NIL(spin_locked_txnmgr) && \
  25004. + LOCK_CNT_NIL(spin_locked_inode) && \
  25005. + LOCK_CNT_NIL(rw_locked_cbk_cache) && \
  25006. + LOCK_CNT_NIL(spin_locked_super_eflush))
  25007. +
  25008. +static inline void spin_lock_stack(lock_stack *stack)
  25009. +{
  25010. + assert("", spin_ordering_pred_stack(stack));
  25011. + spin_lock(&(stack->sguard));
  25012. + LOCK_CNT_INC(spin_locked_stack);
  25013. + LOCK_CNT_INC(spin_locked);
  25014. +}
  25015. +
  25016. +static inline void spin_unlock_stack(lock_stack *stack)
  25017. +{
  25018. + assert_spin_locked(&(stack->sguard));
  25019. + assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_stack));
  25020. + assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
  25021. + LOCK_CNT_DEC(spin_locked_stack);
  25022. + LOCK_CNT_DEC(spin_locked);
  25023. + spin_unlock(&(stack->sguard));
  25024. +}
  25025. +
  25026. +static inline void reiser4_wake_up(lock_stack * owner)
  25027. +{
  25028. + spin_lock_stack(owner);
  25029. + __reiser4_wake_up(owner);
  25030. + spin_unlock_stack(owner);
  25031. +}
  25032. +
  25033. +const char *lock_mode_name(znode_lock_mode lock);
  25034. +
  25035. +#if REISER4_DEBUG
  25036. +extern void check_lock_data(void);
  25037. +extern void check_lock_node_data(znode * node);
  25038. +#else
  25039. +#define check_lock_data() noop
  25040. +#define check_lock_node_data() noop
  25041. +#endif
  25042. +
  25043. +/* __LOCK_H__ */
  25044. +#endif
  25045. +
  25046. +/* Make Linus happy.
  25047. + Local variables:
  25048. + c-indentation-style: "K&R"
  25049. + mode-name: "LC"
  25050. + c-basic-offset: 8
  25051. + tab-width: 8
  25052. + fill-column: 120
  25053. + End:
  25054. +*/
  25055. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/Makefile linux-4.14.2/fs/reiser4/Makefile
  25056. --- linux-4.14.2.orig/fs/reiser4/Makefile 1970-01-01 01:00:00.000000000 +0100
  25057. +++ linux-4.14.2/fs/reiser4/Makefile 2017-11-26 22:13:09.000000000 +0100
  25058. @@ -0,0 +1,103 @@
  25059. +#
  25060. +# reiser4/Makefile
  25061. +#
  25062. +
  25063. +obj-$(CONFIG_REISER4_FS) += reiser4.o
  25064. +
  25065. +reiser4-y := \
  25066. + debug.o \
  25067. + jnode.o \
  25068. + znode.o \
  25069. + key.o \
  25070. + pool.o \
  25071. + tree_mod.o \
  25072. + estimate.o \
  25073. + carry.o \
  25074. + carry_ops.o \
  25075. + lock.o \
  25076. + tree.o \
  25077. + context.o \
  25078. + tap.o \
  25079. + coord.o \
  25080. + block_alloc.o \
  25081. + txnmgr.o \
  25082. + kassign.o \
  25083. + flush.o \
  25084. + wander.o \
  25085. + eottl.o \
  25086. + search.o \
  25087. + page_cache.o \
  25088. + seal.o \
  25089. + dscale.o \
  25090. + flush_queue.o \
  25091. + ktxnmgrd.o \
  25092. + blocknrset.o \
  25093. + super.o \
  25094. + super_ops.o \
  25095. + fsdata.o \
  25096. + export_ops.o \
  25097. + oid.o \
  25098. + tree_walk.o \
  25099. + inode.o \
  25100. + vfs_ops.o \
  25101. + as_ops.o \
  25102. + entd.o\
  25103. + readahead.o \
  25104. + status_flags.o \
  25105. + init_super.o \
  25106. + safe_link.o \
  25107. + blocknrlist.o \
  25108. + discard.o \
  25109. + checksum.o \
  25110. + \
  25111. + plugin/plugin.o \
  25112. + plugin/plugin_set.o \
  25113. + plugin/node/node.o \
  25114. + plugin/object.o \
  25115. + plugin/cluster.o \
  25116. + plugin/txmod.o \
  25117. + plugin/inode_ops.o \
  25118. + plugin/inode_ops_rename.o \
  25119. + plugin/file_ops.o \
  25120. + plugin/file_ops_readdir.o \
  25121. + plugin/file_plugin_common.o \
  25122. + plugin/file/file.o \
  25123. + plugin/file/tail_conversion.o \
  25124. + plugin/file/file_conversion.o \
  25125. + plugin/file/symlink.o \
  25126. + plugin/file/cryptcompress.o \
  25127. + plugin/dir_plugin_common.o \
  25128. + plugin/dir/hashed_dir.o \
  25129. + plugin/dir/seekable_dir.o \
  25130. + plugin/node/node40.o \
  25131. + plugin/node/node41.o \
  25132. + \
  25133. + plugin/crypto/cipher.o \
  25134. + plugin/crypto/digest.o \
  25135. + \
  25136. + plugin/compress/compress.o \
  25137. + plugin/compress/compress_mode.o \
  25138. + \
  25139. + plugin/item/static_stat.o \
  25140. + plugin/item/sde.o \
  25141. + plugin/item/cde.o \
  25142. + plugin/item/blackbox.o \
  25143. + plugin/item/internal.o \
  25144. + plugin/item/tail.o \
  25145. + plugin/item/ctail.o \
  25146. + plugin/item/extent.o \
  25147. + plugin/item/extent_item_ops.o \
  25148. + plugin/item/extent_file_ops.o \
  25149. + plugin/item/extent_flush_ops.o \
  25150. + \
  25151. + plugin/hash.o \
  25152. + plugin/fibration.o \
  25153. + plugin/tail_policy.o \
  25154. + plugin/item/item.o \
  25155. + \
  25156. + plugin/security/perm.o \
  25157. + plugin/space/bitmap.o \
  25158. + \
  25159. + plugin/disk_format/disk_format40.o \
  25160. + plugin/disk_format/disk_format.o
  25161. +
  25162. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/oid.c linux-4.14.2/fs/reiser4/oid.c
  25163. --- linux-4.14.2.orig/fs/reiser4/oid.c 1970-01-01 01:00:00.000000000 +0100
  25164. +++ linux-4.14.2/fs/reiser4/oid.c 2017-11-26 22:13:09.000000000 +0100
  25165. @@ -0,0 +1,141 @@
  25166. +/* Copyright 2003 by Hans Reiser, licensing governed by reiser4/README */
  25167. +
  25168. +#include "debug.h"
  25169. +#include "super.h"
  25170. +#include "txnmgr.h"
  25171. +
  25172. +/* we used to have oid allocation plugin. It was removed because it
  25173. + was recognized as providing unneeded level of abstraction. If one
  25174. + ever will find it useful - look at yet_unneeded_abstractions/oid
  25175. +*/
  25176. +
  25177. +/*
  25178. + * initialize in-memory data for oid allocator at @super. @nr_files and @next
  25179. + * are provided by disk format plugin that reads them from the disk during
  25180. + * mount.
  25181. + */
  25182. +int oid_init_allocator(struct super_block *super, oid_t nr_files, oid_t next)
  25183. +{
  25184. + reiser4_super_info_data *sbinfo;
  25185. +
  25186. + sbinfo = get_super_private(super);
  25187. +
  25188. + sbinfo->next_to_use = next;
  25189. + sbinfo->oids_in_use = nr_files;
  25190. + return 0;
  25191. +}
  25192. +
  25193. +/*
  25194. + * allocate oid and return it. ABSOLUTE_MAX_OID is returned when allocator
  25195. + * runs out of oids.
  25196. + */
  25197. +oid_t oid_allocate(struct super_block *super)
  25198. +{
  25199. + reiser4_super_info_data *sbinfo;
  25200. + oid_t oid;
  25201. +
  25202. + sbinfo = get_super_private(super);
  25203. +
  25204. + spin_lock_reiser4_super(sbinfo);
  25205. + if (sbinfo->next_to_use != ABSOLUTE_MAX_OID) {
  25206. + oid = sbinfo->next_to_use++;
  25207. + sbinfo->oids_in_use++;
  25208. + } else
  25209. + oid = ABSOLUTE_MAX_OID;
  25210. + spin_unlock_reiser4_super(sbinfo);
  25211. + return oid;
  25212. +}
  25213. +
  25214. +/*
  25215. + * Tell oid allocator that @oid is now free.
  25216. + */
  25217. +int oid_release(struct super_block *super, oid_t oid UNUSED_ARG)
  25218. +{
  25219. + reiser4_super_info_data *sbinfo;
  25220. +
  25221. + sbinfo = get_super_private(super);
  25222. +
  25223. + spin_lock_reiser4_super(sbinfo);
  25224. + sbinfo->oids_in_use--;
  25225. + spin_unlock_reiser4_super(sbinfo);
  25226. + return 0;
  25227. +}
  25228. +
  25229. +/*
  25230. + * return next @oid that would be allocated (i.e., returned by oid_allocate())
  25231. + * without actually allocating it. This is used by disk format plugin to save
  25232. + * oid allocator state on the disk.
  25233. + */
  25234. +oid_t oid_next(const struct super_block *super)
  25235. +{
  25236. + reiser4_super_info_data *sbinfo;
  25237. + oid_t oid;
  25238. +
  25239. + sbinfo = get_super_private(super);
  25240. +
  25241. + spin_lock_reiser4_super(sbinfo);
  25242. + oid = sbinfo->next_to_use;
  25243. + spin_unlock_reiser4_super(sbinfo);
  25244. + return oid;
  25245. +}
  25246. +
  25247. +/*
  25248. + * returns number of currently used oids. This is used by statfs(2) to report
  25249. + * number of "inodes" and by disk format plugin to save oid allocator state on
  25250. + * the disk.
  25251. + */
  25252. +long oids_used(const struct super_block *super)
  25253. +{
  25254. + reiser4_super_info_data *sbinfo;
  25255. + oid_t used;
  25256. +
  25257. + sbinfo = get_super_private(super);
  25258. +
  25259. + spin_lock_reiser4_super(sbinfo);
  25260. + used = sbinfo->oids_in_use;
  25261. + spin_unlock_reiser4_super(sbinfo);
  25262. + if (used < (__u64) ((long)~0) >> 1)
  25263. + return (long)used;
  25264. + else
  25265. + return (long)-1;
  25266. +}
  25267. +
  25268. +/*
  25269. + * Count oid as allocated in atom. This is done after call to oid_allocate()
  25270. + * at the point when we are irrevocably committed to creation of the new file
  25271. + * (i.e., when oid allocation cannot be any longer rolled back due to some
  25272. + * error).
  25273. + */
  25274. +void oid_count_allocated(void)
  25275. +{
  25276. + txn_atom *atom;
  25277. +
  25278. + atom = get_current_atom_locked();
  25279. + atom->nr_objects_created++;
  25280. + spin_unlock_atom(atom);
  25281. +}
  25282. +
  25283. +/*
  25284. + * Count oid as free in atom. This is done after call to oid_release() at the
  25285. + * point when we are irrevocably committed to the deletion of the file (i.e.,
  25286. + * when oid release cannot be any longer rolled back due to some error).
  25287. + */
  25288. +void oid_count_released(void)
  25289. +{
  25290. + txn_atom *atom;
  25291. +
  25292. + atom = get_current_atom_locked();
  25293. + atom->nr_objects_deleted++;
  25294. + spin_unlock_atom(atom);
  25295. +}
  25296. +
  25297. +/*
  25298. + Local variables:
  25299. + c-indentation-style: "K&R"
  25300. + mode-name: "LC"
  25301. + c-basic-offset: 8
  25302. + tab-width: 8
  25303. + fill-column: 120
  25304. + scroll-step: 1
  25305. + End:
  25306. +*/
  25307. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/page_cache.c linux-4.14.2/fs/reiser4/page_cache.c
  25308. --- linux-4.14.2.orig/fs/reiser4/page_cache.c 1970-01-01 01:00:00.000000000 +0100
  25309. +++ linux-4.14.2/fs/reiser4/page_cache.c 2017-11-26 22:14:18.000000000 +0100
  25310. @@ -0,0 +1,691 @@
  25311. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  25312. + * reiser4/README */
  25313. +
  25314. +/* Memory pressure hooks. Fake inodes handling. */
  25315. +
  25316. +/* GLOSSARY
  25317. +
  25318. + . Formatted and unformatted nodes.
  25319. + Elements of reiser4 balanced tree to store data and metadata.
  25320. + Unformatted nodes are pointed to by extent pointers. Such nodes
  25321. + are used to store data of large objects. Unlike unformatted nodes,
  25322. + formatted ones have associated format described by node4X plugin.
  25323. +
  25324. + . Jnode (or journal node)
  25325. + The in-memory header which is used to track formatted and unformatted
  25326. + nodes, bitmap nodes, etc. In particular, jnodes are used to track
  25327. + transactional information associated with each block(see reiser4/jnode.c
  25328. + for details).
  25329. +
  25330. + . Znode
  25331. + The in-memory header which is used to track formatted nodes. Contains
  25332. + embedded jnode (see reiser4/znode.c for details).
  25333. +*/
  25334. +
  25335. +/* We store all file system meta data (and data, of course) in the page cache.
  25336. +
  25337. + What does this mean? In stead of using bread/brelse we create special
  25338. + "fake" inode (one per super block) and store content of formatted nodes
  25339. + into pages bound to this inode in the page cache. In newer kernels bread()
  25340. + already uses inode attached to block device (bd_inode). Advantage of having
  25341. + our own fake inode is that we can install appropriate methods in its
  25342. + address_space operations. Such methods are called by VM on memory pressure
  25343. + (or during background page flushing) and we can use them to react
  25344. + appropriately.
  25345. +
  25346. + In initial version we only support one block per page. Support for multiple
  25347. + blocks per page is complicated by relocation.
  25348. +
  25349. + To each page, used by reiser4, jnode is attached. jnode is analogous to
  25350. + buffer head. Difference is that jnode is bound to the page permanently:
  25351. + jnode cannot be removed from memory until its backing page is.
  25352. +
  25353. + jnode contain pointer to page (->pg field) and page contain pointer to
  25354. + jnode in ->private field. Pointer from jnode to page is protected to by
  25355. + jnode's spinlock and pointer from page to jnode is protected by page lock
  25356. + (PG_locked bit). Lock ordering is: first take page lock, then jnode spin
  25357. + lock. To go into reverse direction use jnode_lock_page() function that uses
  25358. + standard try-lock-and-release device.
  25359. +
  25360. + Properties:
  25361. +
  25362. + 1. when jnode-to-page mapping is established (by jnode_attach_page()), page
  25363. + reference counter is increased.
  25364. +
  25365. + 2. when jnode-to-page mapping is destroyed (by page_clear_jnode(), page
  25366. + reference counter is decreased.
  25367. +
  25368. + 3. on jload() reference counter on jnode page is increased, page is
  25369. + kmapped and `referenced'.
  25370. +
  25371. + 4. on jrelse() inverse operations are performed.
  25372. +
  25373. + 5. kmapping/kunmapping of unformatted pages is done by read/write methods.
  25374. +
  25375. + DEADLOCKS RELATED TO MEMORY PRESSURE. [OUTDATED. Only interesting
  25376. + historically.]
  25377. +
  25378. + [In the following discussion, `lock' invariably means long term lock on
  25379. + znode.] (What about page locks?)
  25380. +
  25381. + There is some special class of deadlock possibilities related to memory
  25382. + pressure. Locks acquired by other reiser4 threads are accounted for in
  25383. + deadlock prevention mechanism (lock.c), but when ->vm_writeback() is
  25384. + invoked additional hidden arc is added to the locking graph: thread that
  25385. + tries to allocate memory waits for ->vm_writeback() to finish. If this
  25386. + thread keeps lock and ->vm_writeback() tries to acquire this lock, deadlock
  25387. + prevention is useless.
  25388. +
  25389. + Another related problem is possibility for ->vm_writeback() to run out of
  25390. + memory itself. This is not a problem for ext2 and friends, because their
  25391. + ->vm_writeback() don't allocate much memory, but reiser4 flush is
  25392. + definitely able to allocate huge amounts of memory.
  25393. +
  25394. + It seems that there is no reliable way to cope with the problems above. In
  25395. + stead it was decided that ->vm_writeback() (as invoked in the kswapd
  25396. + context) wouldn't perform any flushing itself, but rather should just wake
  25397. + up some auxiliary thread dedicated for this purpose (or, the same thread
  25398. + that does periodic commit of old atoms (ktxnmgrd.c)).
  25399. +
  25400. + Details:
  25401. +
  25402. + 1. Page is called `reclaimable' against particular reiser4 mount F if this
  25403. + page can be ultimately released by try_to_free_pages() under presumptions
  25404. + that:
  25405. +
  25406. + a. ->vm_writeback() for F is no-op, and
  25407. +
  25408. + b. none of the threads accessing F are making any progress, and
  25409. +
  25410. + c. other reiser4 mounts obey the same memory reservation protocol as F
  25411. + (described below).
  25412. +
  25413. + For example, clean un-pinned page, or page occupied by ext2 data are
  25414. + reclaimable against any reiser4 mount.
  25415. +
  25416. + When there is more than one reiser4 mount in a system, condition (c) makes
  25417. + reclaim-ability not easily verifiable beyond trivial cases mentioned above.
  25418. +
  25419. + THIS COMMENT IS VALID FOR "MANY BLOCKS ON PAGE" CASE
  25420. +
  25421. + Fake inode is used to bound formatted nodes and each node is indexed within
  25422. + fake inode by its block number. If block size of smaller than page size, it
  25423. + may so happen that block mapped to the page with formatted node is occupied
  25424. + by unformatted node or is unallocated. This lead to some complications,
  25425. + because flushing whole page can lead to an incorrect overwrite of
  25426. + unformatted node that is moreover, can be cached in some other place as
  25427. + part of the file body. To avoid this, buffers for unformatted nodes are
  25428. + never marked dirty. Also pages in the fake are never marked dirty. This
  25429. + rules out usage of ->writepage() as memory pressure hook. In stead
  25430. + ->releasepage() is used.
  25431. +
  25432. + Josh is concerned that page->buffer is going to die. This should not pose
  25433. + significant problem though, because we need to add some data structures to
  25434. + the page anyway (jnode) and all necessary book keeping can be put there.
  25435. +
  25436. +*/
  25437. +
  25438. +/* Life cycle of pages/nodes.
  25439. +
  25440. + jnode contains reference to page and page contains reference back to
  25441. + jnode. This reference is counted in page ->count. Thus, page bound to jnode
  25442. + cannot be released back into free pool.
  25443. +
  25444. + 1. Formatted nodes.
  25445. +
  25446. + 1. formatted node is represented by znode. When new znode is created its
  25447. + ->pg pointer is NULL initially.
  25448. +
  25449. + 2. when node content is loaded into znode (by call to zload()) for the
  25450. + first time following happens (in call to ->read_node() or
  25451. + ->allocate_node()):
  25452. +
  25453. + 1. new page is added to the page cache.
  25454. +
  25455. + 2. this page is attached to znode and its ->count is increased.
  25456. +
  25457. + 3. page is kmapped.
  25458. +
  25459. + 3. if more calls to zload() follow (without corresponding zrelses), page
  25460. + counter is left intact and in its stead ->d_count is increased in znode.
  25461. +
  25462. + 4. each call to zrelse decreases ->d_count. When ->d_count drops to zero
  25463. + ->release_node() is called and page is kunmapped as result.
  25464. +
  25465. + 5. at some moment node can be captured by a transaction. Its ->x_count
  25466. + is then increased by transaction manager.
  25467. +
  25468. + 6. if node is removed from the tree (empty node with JNODE_HEARD_BANSHEE
  25469. + bit set) following will happen (also see comment at the top of znode.c):
  25470. +
  25471. + 1. when last lock is released, node will be uncaptured from
  25472. + transaction. This released reference that transaction manager acquired
  25473. + at the step 5.
  25474. +
  25475. + 2. when last reference is released, zput() detects that node is
  25476. + actually deleted and calls ->delete_node()
  25477. + operation. page_cache_delete_node() implementation detaches jnode from
  25478. + page and releases page.
  25479. +
  25480. + 7. otherwise (node wasn't removed from the tree), last reference to
  25481. + znode will be released after transaction manager committed transaction
  25482. + node was in. This implies squallocing of this node (see
  25483. + flush.c). Nothing special happens at this point. Znode is still in the
  25484. + hash table and page is still attached to it.
  25485. +
  25486. + 8. znode is actually removed from the memory because of the memory
  25487. + pressure, or during umount (znodes_tree_done()). Anyway, znode is
  25488. + removed by the call to zdrop(). At this moment, page is detached from
  25489. + znode and removed from the inode address space.
  25490. +
  25491. +*/
  25492. +
  25493. +#include "debug.h"
  25494. +#include "dformat.h"
  25495. +#include "key.h"
  25496. +#include "txnmgr.h"
  25497. +#include "jnode.h"
  25498. +#include "znode.h"
  25499. +#include "block_alloc.h"
  25500. +#include "tree.h"
  25501. +#include "vfs_ops.h"
  25502. +#include "inode.h"
  25503. +#include "super.h"
  25504. +#include "entd.h"
  25505. +#include "page_cache.h"
  25506. +#include "ktxnmgrd.h"
  25507. +
  25508. +#include <linux/types.h>
  25509. +#include <linux/fs.h>
  25510. +#include <linux/mm.h> /* for struct page */
  25511. +#include <linux/swap.h> /* for struct page */
  25512. +#include <linux/pagemap.h>
  25513. +#include <linux/bio.h>
  25514. +#include <linux/writeback.h>
  25515. +#include <linux/blkdev.h>
  25516. +
  25517. +static struct bio *page_bio(struct page *, jnode * , int rw, gfp_t gfp);
  25518. +
  25519. +static struct address_space_operations formatted_fake_as_ops;
  25520. +
  25521. +static const oid_t fake_ino = 0x1;
  25522. +static const oid_t bitmap_ino = 0x2;
  25523. +static const oid_t cc_ino = 0x3;
  25524. +
  25525. +static void
  25526. +init_fake_inode(struct super_block *super, struct inode *fake,
  25527. + struct inode **pfake)
  25528. +{
  25529. + assert("nikita-2168", fake->i_state & I_NEW);
  25530. + fake->i_mapping->a_ops = &formatted_fake_as_ops;
  25531. + inode_attach_wb(fake, NULL);
  25532. + *pfake = fake;
  25533. + /* NOTE-NIKITA something else? */
  25534. + unlock_new_inode(fake);
  25535. +}
  25536. +
  25537. +/**
  25538. + * reiser4_init_formatted_fake - iget inodes for formatted nodes and bitmaps
  25539. + * @super: super block to init fake inode for
  25540. + *
  25541. + * Initializes fake inode to which formatted nodes are bound in the page cache
  25542. + * and inode for bitmaps.
  25543. + */
  25544. +int reiser4_init_formatted_fake(struct super_block *super)
  25545. +{
  25546. + struct inode *fake;
  25547. + struct inode *bitmap;
  25548. + struct inode *cc;
  25549. + reiser4_super_info_data *sinfo;
  25550. +
  25551. + assert("nikita-1703", super != NULL);
  25552. +
  25553. + sinfo = get_super_private_nocheck(super);
  25554. + fake = iget_locked(super, oid_to_ino(fake_ino));
  25555. +
  25556. + if (fake != NULL) {
  25557. + init_fake_inode(super, fake, &sinfo->fake);
  25558. +
  25559. + bitmap = iget_locked(super, oid_to_ino(bitmap_ino));
  25560. + if (bitmap != NULL) {
  25561. + init_fake_inode(super, bitmap, &sinfo->bitmap);
  25562. +
  25563. + cc = iget_locked(super, oid_to_ino(cc_ino));
  25564. + if (cc != NULL) {
  25565. + init_fake_inode(super, cc, &sinfo->cc);
  25566. + return 0;
  25567. + } else {
  25568. + iput(sinfo->fake);
  25569. + iput(sinfo->bitmap);
  25570. + sinfo->fake = NULL;
  25571. + sinfo->bitmap = NULL;
  25572. + }
  25573. + } else {
  25574. + iput(sinfo->fake);
  25575. + sinfo->fake = NULL;
  25576. + }
  25577. + }
  25578. + return RETERR(-ENOMEM);
  25579. +}
  25580. +
  25581. +/**
  25582. + * reiser4_done_formatted_fake - release inode used by formatted nodes and bitmaps
  25583. + * @super: super block to init fake inode for
  25584. + *
  25585. + * Releases inodes which were used as address spaces of bitmap and formatted
  25586. + * nodes.
  25587. + */
  25588. +void reiser4_done_formatted_fake(struct super_block *super)
  25589. +{
  25590. + reiser4_super_info_data *sinfo;
  25591. +
  25592. + sinfo = get_super_private_nocheck(super);
  25593. +
  25594. + if (sinfo->fake != NULL) {
  25595. + iput(sinfo->fake);
  25596. + sinfo->fake = NULL;
  25597. + }
  25598. +
  25599. + if (sinfo->bitmap != NULL) {
  25600. + iput(sinfo->bitmap);
  25601. + sinfo->bitmap = NULL;
  25602. + }
  25603. +
  25604. + if (sinfo->cc != NULL) {
  25605. + iput(sinfo->cc);
  25606. + sinfo->cc = NULL;
  25607. + }
  25608. + return;
  25609. +}
  25610. +
  25611. +void reiser4_wait_page_writeback(struct page *page)
  25612. +{
  25613. + assert("zam-783", PageLocked(page));
  25614. +
  25615. + do {
  25616. + unlock_page(page);
  25617. + wait_on_page_writeback(page);
  25618. + lock_page(page);
  25619. + } while (PageWriteback(page));
  25620. +}
  25621. +
  25622. +/* return tree @page is in */
  25623. +reiser4_tree *reiser4_tree_by_page(const struct page *page/* page to query */)
  25624. +{
  25625. + assert("nikita-2461", page != NULL);
  25626. + return &get_super_private(page->mapping->host->i_sb)->tree;
  25627. +}
  25628. +
  25629. +/* completion handler for single page bio-based read.
  25630. +
  25631. + mpage_end_io_read() would also do. But it's static.
  25632. +
  25633. +*/
  25634. +static void end_bio_single_page_read(struct bio *bio)
  25635. +{
  25636. + struct page *page;
  25637. +
  25638. + page = bio->bi_io_vec[0].bv_page;
  25639. +
  25640. + if (!bio->bi_status)
  25641. + SetPageUptodate(page);
  25642. + else {
  25643. + ClearPageUptodate(page);
  25644. + SetPageError(page);
  25645. + }
  25646. + unlock_page(page);
  25647. + bio_put(bio);
  25648. +}
  25649. +
  25650. +/* completion handler for single page bio-based write.
  25651. +
  25652. + mpage_end_io_write() would also do. But it's static.
  25653. +
  25654. +*/
  25655. +static void end_bio_single_page_write(struct bio *bio)
  25656. +{
  25657. + struct page *page;
  25658. +
  25659. + page = bio->bi_io_vec[0].bv_page;
  25660. +
  25661. + if (bio->bi_status)
  25662. + SetPageError(page);
  25663. + end_page_writeback(page);
  25664. + bio_put(bio);
  25665. +}
  25666. +
  25667. +/* ->readpage() method for formatted nodes */
  25668. +static int formatted_readpage(struct file *f UNUSED_ARG,
  25669. + struct page *page/* page to read */)
  25670. +{
  25671. + assert("nikita-2412", PagePrivate(page) && jprivate(page));
  25672. + return reiser4_page_io(page, jprivate(page), READ,
  25673. + reiser4_ctx_gfp_mask_get());
  25674. +}
  25675. +
  25676. +/**
  25677. + * reiser4_page_io - submit single-page bio request
  25678. + * @page: page to perform io for
  25679. + * @node: jnode of page
  25680. + * @rw: read or write
  25681. + * @gfp: gfp mask for bio allocation
  25682. + *
  25683. + * Submits single page read or write.
  25684. + */
  25685. +int reiser4_page_io(struct page *page, jnode *node, int rw, gfp_t gfp)
  25686. +{
  25687. + struct bio *bio;
  25688. + int result;
  25689. +
  25690. + assert("nikita-2094", page != NULL);
  25691. + assert("nikita-2226", PageLocked(page));
  25692. + assert("nikita-2634", node != NULL);
  25693. + assert("nikita-2893", rw == READ || rw == WRITE);
  25694. +
  25695. + if (rw) {
  25696. + if (unlikely(page->mapping->host->i_sb->s_flags & MS_RDONLY)) {
  25697. + unlock_page(page);
  25698. + return 0;
  25699. + }
  25700. + }
  25701. +
  25702. + bio = page_bio(page, node, rw, gfp);
  25703. + if (!IS_ERR(bio)) {
  25704. + if (rw == WRITE) {
  25705. + set_page_writeback(page);
  25706. + unlock_page(page);
  25707. + }
  25708. + bio_set_op_attrs(bio, rw, 0);
  25709. + submit_bio(bio);
  25710. + result = 0;
  25711. + } else {
  25712. + unlock_page(page);
  25713. + result = PTR_ERR(bio);
  25714. + }
  25715. +
  25716. + return result;
  25717. +}
  25718. +
  25719. +/* helper function to construct bio for page */
  25720. +static struct bio *page_bio(struct page *page, jnode * node, int rw, gfp_t gfp)
  25721. +{
  25722. + struct bio *bio;
  25723. + assert("nikita-2092", page != NULL);
  25724. + assert("nikita-2633", node != NULL);
  25725. +
  25726. + /* Simple implementation in the assumption that blocksize == pagesize.
  25727. +
  25728. + We only have to submit one block, but submit_bh() will allocate bio
  25729. + anyway, so lets use all the bells-and-whistles of bio code.
  25730. + */
  25731. +
  25732. + bio = bio_alloc(gfp, 1);
  25733. + if (bio != NULL) {
  25734. + int blksz;
  25735. + struct super_block *super;
  25736. + reiser4_block_nr blocknr;
  25737. +
  25738. + super = page->mapping->host->i_sb;
  25739. + assert("nikita-2029", super != NULL);
  25740. + blksz = super->s_blocksize;
  25741. + assert("nikita-2028", blksz == (int)PAGE_SIZE);
  25742. +
  25743. + spin_lock_jnode(node);
  25744. + blocknr = *jnode_get_io_block(node);
  25745. + spin_unlock_jnode(node);
  25746. +
  25747. + assert("nikita-2275", blocknr != (reiser4_block_nr) 0);
  25748. + assert("nikita-2276", !reiser4_blocknr_is_fake(&blocknr));
  25749. +
  25750. + bio_set_dev(bio, super->s_bdev);
  25751. + /* fill bio->bi_iter.bi_sector before calling bio_add_page(), because
  25752. + * q->merge_bvec_fn may want to inspect it (see
  25753. + * drivers/md/linear.c:linear_mergeable_bvec() for example. */
  25754. + bio->bi_iter.bi_sector = blocknr * (blksz >> 9);
  25755. +
  25756. + if (!bio_add_page(bio, page, blksz, 0)) {
  25757. + warning("nikita-3452",
  25758. + "Single page bio cannot be constructed");
  25759. + return ERR_PTR(RETERR(-EINVAL));
  25760. + }
  25761. +
  25762. + /* bio -> bi_idx is filled by bio_init() */
  25763. + bio->bi_end_io = (rw == READ) ?
  25764. + end_bio_single_page_read : end_bio_single_page_write;
  25765. +
  25766. + return bio;
  25767. + } else
  25768. + return ERR_PTR(RETERR(-ENOMEM));
  25769. +}
  25770. +
  25771. +#if 0
  25772. +static int can_hit_entd(reiser4_context *ctx, struct super_block *s)
  25773. +{
  25774. + if (ctx == NULL || ((unsigned long)ctx->magic) != context_magic)
  25775. + return 1;
  25776. + if (ctx->super != s)
  25777. + return 1;
  25778. + if (get_super_private(s)->entd.tsk == current)
  25779. + return 0;
  25780. + if (!lock_stack_isclean(&ctx->stack))
  25781. + return 0;
  25782. + if (ctx->trans->atom != NULL)
  25783. + return 0;
  25784. + return 1;
  25785. +}
  25786. +#endif
  25787. +
  25788. +/**
  25789. + * reiser4_writepage - writepage of struct address_space_operations
  25790. + * @page: page to write
  25791. + * @wbc:
  25792. + *
  25793. + *
  25794. + */
  25795. +/* Common memory pressure notification. */
  25796. +int reiser4_writepage(struct page *page,
  25797. + struct writeback_control *wbc)
  25798. +{
  25799. + /*
  25800. + * assert("edward-1562",
  25801. + * can_hit_entd(get_current_context_check(), sb));
  25802. + */
  25803. + assert("vs-828", PageLocked(page));
  25804. +
  25805. + return write_page_by_ent(page, wbc);
  25806. +}
  25807. +
  25808. +/* ->set_page_dirty() method of formatted address_space */
  25809. +static int formatted_set_page_dirty(struct page *page)
  25810. +{
  25811. + assert("nikita-2173", page != NULL);
  25812. + BUG();
  25813. + return __set_page_dirty_nobuffers(page);
  25814. +}
  25815. +
  25816. +/* writepages method of address space operations in reiser4 is used to involve
  25817. + into transactions pages which are dirtied via mmap. Only regular files can
  25818. + have such pages. Fake inode is used to access formatted nodes via page
  25819. + cache. As formatted nodes can never be mmaped, fake inode's writepages has
  25820. + nothing to do */
  25821. +static int
  25822. +writepages_fake(struct address_space *mapping, struct writeback_control *wbc)
  25823. +{
  25824. + return 0;
  25825. +}
  25826. +
  25827. +/* address space operations for the fake inode */
  25828. +static struct address_space_operations formatted_fake_as_ops = {
  25829. + /* Perform a writeback of a single page as a memory-freeing
  25830. + * operation. */
  25831. + .writepage = reiser4_writepage,
  25832. + /* this is called to read formatted node */
  25833. + .readpage = formatted_readpage,
  25834. + /* ->sync_page() method of fake inode address space operations. Called
  25835. + from wait_on_page() and lock_page().
  25836. +
  25837. + This is most annoyingly misnomered method. Actually it is called
  25838. + from wait_on_page_bit() and lock_page() and its purpose is to
  25839. + actually start io by jabbing device drivers.
  25840. + .sync_page = block_sync_page,
  25841. + */
  25842. + /* Write back some dirty pages from this mapping. Called from sync.
  25843. + called during sync (pdflush) */
  25844. + .writepages = writepages_fake,
  25845. + /* Set a page dirty */
  25846. + .set_page_dirty = formatted_set_page_dirty,
  25847. + /* used for read-ahead. Not applicable */
  25848. + .readpages = NULL,
  25849. + .write_begin = NULL,
  25850. + .write_end = NULL,
  25851. + .bmap = NULL,
  25852. + /* called just before page is being detached from inode mapping and
  25853. + removed from memory. Called on truncate, cut/squeeze, and
  25854. + umount. */
  25855. + .invalidatepage = reiser4_invalidatepage,
  25856. + /* this is called by shrink_cache() so that file system can try to
  25857. + release objects (jnodes, buffers, journal heads) attached to page
  25858. + and, may be made page itself free-able.
  25859. + */
  25860. + .releasepage = reiser4_releasepage,
  25861. + .direct_IO = NULL,
  25862. + .migratepage = reiser4_migratepage
  25863. +};
  25864. +
  25865. +/* called just before page is released (no longer used by reiser4). Callers:
  25866. + jdelete() and extent2tail(). */
  25867. +void reiser4_drop_page(struct page *page)
  25868. +{
  25869. + assert("nikita-2181", PageLocked(page));
  25870. + clear_page_dirty_for_io(page);
  25871. + ClearPageUptodate(page);
  25872. +#if defined(PG_skipped)
  25873. + ClearPageSkipped(page);
  25874. +#endif
  25875. + unlock_page(page);
  25876. +}
  25877. +
  25878. +#define JNODE_GANG_SIZE (16)
  25879. +
  25880. +/* find all jnodes from range specified and invalidate them */
  25881. +static int
  25882. +truncate_jnodes_range(struct inode *inode, pgoff_t from, pgoff_t count)
  25883. +{
  25884. + reiser4_inode *info;
  25885. + int truncated_jnodes;
  25886. + reiser4_tree *tree;
  25887. + unsigned long index;
  25888. + unsigned long end;
  25889. +
  25890. + if (inode_file_plugin(inode) ==
  25891. + file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID))
  25892. + /*
  25893. + * No need to get rid of jnodes here: if the single jnode of
  25894. + * page cluster did not have page, then it was found and killed
  25895. + * before in
  25896. + * truncate_complete_page_cluster()->jput()->jput_final(),
  25897. + * otherwise it will be dropped by reiser4_invalidatepage()
  25898. + */
  25899. + return 0;
  25900. + truncated_jnodes = 0;
  25901. +
  25902. + info = reiser4_inode_data(inode);
  25903. + tree = reiser4_tree_by_inode(inode);
  25904. +
  25905. + index = from;
  25906. + end = from + count;
  25907. +
  25908. + while (1) {
  25909. + jnode *gang[JNODE_GANG_SIZE];
  25910. + int taken;
  25911. + int i;
  25912. + jnode *node;
  25913. +
  25914. + assert("nikita-3466", index <= end);
  25915. +
  25916. + read_lock_tree(tree);
  25917. + taken =
  25918. + radix_tree_gang_lookup(jnode_tree_by_reiser4_inode(info),
  25919. + (void **)gang, index,
  25920. + JNODE_GANG_SIZE);
  25921. + for (i = 0; i < taken; ++i) {
  25922. + node = gang[i];
  25923. + if (index_jnode(node) < end)
  25924. + jref(node);
  25925. + else
  25926. + gang[i] = NULL;
  25927. + }
  25928. + read_unlock_tree(tree);
  25929. +
  25930. + for (i = 0; i < taken; ++i) {
  25931. + node = gang[i];
  25932. + if (node != NULL) {
  25933. + index = max(index, index_jnode(node));
  25934. + spin_lock_jnode(node);
  25935. + assert("edward-1457", node->pg == NULL);
  25936. + /* this is always called after
  25937. + truncate_inode_pages_range(). Therefore, here
  25938. + jnode can not have page. New pages can not be
  25939. + created because truncate_jnodes_range goes
  25940. + under exclusive access on file obtained,
  25941. + where as new page creation requires
  25942. + non-exclusive access obtained */
  25943. + JF_SET(node, JNODE_HEARD_BANSHEE);
  25944. + reiser4_uncapture_jnode(node);
  25945. + unhash_unformatted_jnode(node);
  25946. + truncated_jnodes++;
  25947. + jput(node);
  25948. + } else
  25949. + break;
  25950. + }
  25951. + if (i != taken || taken == 0)
  25952. + break;
  25953. + }
  25954. + return truncated_jnodes;
  25955. +}
  25956. +
  25957. +/* Truncating files in reiser4: problems and solutions.
  25958. +
  25959. + VFS calls fs's truncate after it has called truncate_inode_pages()
  25960. + to get rid of pages corresponding to part of file being truncated.
  25961. + In reiser4 it may cause existence of unallocated extents which do
  25962. + not have jnodes. Flush code does not expect that. Solution of this
  25963. + problem is straightforward. As vfs's truncate is implemented using
  25964. + setattr operation, it seems reasonable to have ->setattr() that
  25965. + will cut file body. However, flush code also does not expect dirty
  25966. + pages without parent items, so it is impossible to cut all items,
  25967. + then truncate all pages in two steps. We resolve this problem by
  25968. + cutting items one-by-one. Each such fine-grained step performed
  25969. + under longterm znode lock calls at the end ->kill_hook() method of
  25970. + a killed item to remove its binded pages and jnodes.
  25971. +
  25972. + The following function is a common part of mentioned kill hooks.
  25973. + Also, this is called before tail-to-extent conversion (to not manage
  25974. + few copies of the data).
  25975. +*/
  25976. +void reiser4_invalidate_pages(struct address_space *mapping, pgoff_t from,
  25977. + unsigned long count, int even_cows)
  25978. +{
  25979. + loff_t from_bytes, count_bytes;
  25980. +
  25981. + if (count == 0)
  25982. + return;
  25983. + from_bytes = ((loff_t) from) << PAGE_SHIFT;
  25984. + count_bytes = ((loff_t) count) << PAGE_SHIFT;
  25985. +
  25986. + unmap_mapping_range(mapping, from_bytes, count_bytes, even_cows);
  25987. + truncate_inode_pages_range(mapping, from_bytes,
  25988. + from_bytes + count_bytes - 1);
  25989. + truncate_jnodes_range(mapping->host, from, count);
  25990. +}
  25991. +
  25992. +/*
  25993. + * Local variables:
  25994. + * c-indentation-style: "K&R"
  25995. + * mode-name: "LC"
  25996. + * c-basic-offset: 8
  25997. + * tab-width: 8
  25998. + * fill-column: 120
  25999. + * scroll-step: 1
  26000. + * End:
  26001. + */
  26002. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/page_cache.h linux-4.14.2/fs/reiser4/page_cache.h
  26003. --- linux-4.14.2.orig/fs/reiser4/page_cache.h 1970-01-01 01:00:00.000000000 +0100
  26004. +++ linux-4.14.2/fs/reiser4/page_cache.h 2017-11-26 22:13:09.000000000 +0100
  26005. @@ -0,0 +1,64 @@
  26006. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  26007. + * reiser4/README */
  26008. +/* Memory pressure hooks. Fake inodes handling. See page_cache.c. */
  26009. +
  26010. +#if !defined(__REISER4_PAGE_CACHE_H__)
  26011. +#define __REISER4_PAGE_CACHE_H__
  26012. +
  26013. +#include "forward.h"
  26014. +#include "context.h" /* for reiser4_ctx_gfp_mask_get() */
  26015. +
  26016. +#include <linux/fs.h> /* for struct super_block, address_space */
  26017. +#include <linux/mm.h> /* for struct page */
  26018. +#include <linux/pagemap.h> /* for lock_page() */
  26019. +#include <linux/vmalloc.h> /* for __vmalloc() */
  26020. +
  26021. +extern int reiser4_init_formatted_fake(struct super_block *);
  26022. +extern void reiser4_done_formatted_fake(struct super_block *);
  26023. +
  26024. +extern reiser4_tree *reiser4_tree_by_page(const struct page *);
  26025. +
  26026. +extern void reiser4_wait_page_writeback(struct page *);
  26027. +static inline void lock_and_wait_page_writeback(struct page *page)
  26028. +{
  26029. + lock_page(page);
  26030. + if (unlikely(PageWriteback(page)))
  26031. + reiser4_wait_page_writeback(page);
  26032. +}
  26033. +
  26034. +#define jprivate(page) ((jnode *)page_private(page))
  26035. +
  26036. +extern int reiser4_page_io(struct page *, jnode *, int rw, gfp_t);
  26037. +extern void reiser4_drop_page(struct page *);
  26038. +extern void reiser4_invalidate_pages(struct address_space *, pgoff_t from,
  26039. + unsigned long count, int even_cows);
  26040. +extern void capture_reiser4_inodes(struct super_block *,
  26041. + struct writeback_control *);
  26042. +static inline void *reiser4_vmalloc(unsigned long size)
  26043. +{
  26044. + return __vmalloc(size,
  26045. + reiser4_ctx_gfp_mask_get() | __GFP_HIGHMEM,
  26046. + PAGE_KERNEL);
  26047. +}
  26048. +
  26049. +#define PAGECACHE_TAG_REISER4_MOVED PAGECACHE_TAG_DIRTY
  26050. +
  26051. +#if REISER4_DEBUG
  26052. +extern void print_page(const char *prefix, struct page *page);
  26053. +#else
  26054. +#define print_page(prf, p) noop
  26055. +#endif
  26056. +
  26057. +/* __REISER4_PAGE_CACHE_H__ */
  26058. +#endif
  26059. +
  26060. +/* Make Linus happy.
  26061. + Local variables:
  26062. + c-indentation-style: "K&R"
  26063. + mode-name: "LC"
  26064. + c-basic-offset: 8
  26065. + tab-width: 8
  26066. + fill-column: 120
  26067. + scroll-step: 1
  26068. + End:
  26069. +*/
  26070. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/cluster.c linux-4.14.2/fs/reiser4/plugin/cluster.c
  26071. --- linux-4.14.2.orig/fs/reiser4/plugin/cluster.c 1970-01-01 01:00:00.000000000 +0100
  26072. +++ linux-4.14.2/fs/reiser4/plugin/cluster.c 2017-11-26 22:13:09.000000000 +0100
  26073. @@ -0,0 +1,72 @@
  26074. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  26075. + * reiser4/README */
  26076. +
  26077. +/* Contains reiser4 cluster plugins (see
  26078. + http://www.namesys.com/cryptcompress_design.html
  26079. + "Concepts of clustering" for details). */
  26080. +
  26081. +#include "plugin_header.h"
  26082. +#include "plugin.h"
  26083. +#include "../inode.h"
  26084. +
  26085. +static int change_cluster(struct inode *inode,
  26086. + reiser4_plugin * plugin,
  26087. + pset_member memb)
  26088. +{
  26089. + assert("edward-1324", inode != NULL);
  26090. + assert("edward-1325", plugin != NULL);
  26091. + assert("edward-1326", is_reiser4_inode(inode));
  26092. + assert("edward-1327", plugin->h.type_id == REISER4_CLUSTER_PLUGIN_TYPE);
  26093. +
  26094. + /* Can't change the cluster plugin for already existent regular files */
  26095. + if (!plugin_of_group(inode_file_plugin(inode), REISER4_DIRECTORY_FILE))
  26096. + return RETERR(-EINVAL);
  26097. +
  26098. + /* If matches, nothing to change. */
  26099. + if (inode_hash_plugin(inode) != NULL &&
  26100. + inode_hash_plugin(inode)->h.id == plugin->h.id)
  26101. + return 0;
  26102. +
  26103. + return aset_set_unsafe(&reiser4_inode_data(inode)->pset,
  26104. + PSET_CLUSTER, plugin);
  26105. +}
  26106. +
  26107. +static reiser4_plugin_ops cluster_plugin_ops = {
  26108. + .init = NULL,
  26109. + .load = NULL,
  26110. + .save_len = NULL,
  26111. + .save = NULL,
  26112. + .change = &change_cluster
  26113. +};
  26114. +
  26115. +#define SUPPORT_CLUSTER(SHIFT, ID, LABEL, DESC) \
  26116. + [CLUSTER_ ## ID ## _ID] = { \
  26117. + .h = { \
  26118. + .type_id = REISER4_CLUSTER_PLUGIN_TYPE, \
  26119. + .id = CLUSTER_ ## ID ## _ID, \
  26120. + .pops = &cluster_plugin_ops, \
  26121. + .label = LABEL, \
  26122. + .desc = DESC, \
  26123. + .linkage = {NULL, NULL} \
  26124. + }, \
  26125. + .shift = SHIFT \
  26126. + }
  26127. +
  26128. +cluster_plugin cluster_plugins[LAST_CLUSTER_ID] = {
  26129. + SUPPORT_CLUSTER(16, 64K, "64K", "Large"),
  26130. + SUPPORT_CLUSTER(15, 32K, "32K", "Big"),
  26131. + SUPPORT_CLUSTER(14, 16K, "16K", "Average"),
  26132. + SUPPORT_CLUSTER(13, 8K, "8K", "Small"),
  26133. + SUPPORT_CLUSTER(12, 4K, "4K", "Minimal")
  26134. +};
  26135. +
  26136. +/*
  26137. + Local variables:
  26138. + c-indentation-style: "K&R"
  26139. + mode-name: "LC"
  26140. + c-basic-offset: 8
  26141. + tab-width: 8
  26142. + fill-column: 120
  26143. + scroll-step: 1
  26144. + End:
  26145. +*/
  26146. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/cluster.h linux-4.14.2/fs/reiser4/plugin/cluster.h
  26147. --- linux-4.14.2.orig/fs/reiser4/plugin/cluster.h 1970-01-01 01:00:00.000000000 +0100
  26148. +++ linux-4.14.2/fs/reiser4/plugin/cluster.h 2017-11-26 22:13:09.000000000 +0100
  26149. @@ -0,0 +1,410 @@
  26150. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  26151. +
  26152. +/* This file contains size/offset translators, modulators
  26153. + and other helper functions. */
  26154. +
  26155. +#if !defined(__FS_REISER4_CLUSTER_H__)
  26156. +#define __FS_REISER4_CLUSTER_H__
  26157. +
  26158. +#include "../inode.h"
  26159. +
  26160. +static inline int inode_cluster_shift(struct inode *inode)
  26161. +{
  26162. + assert("edward-92", inode != NULL);
  26163. + assert("edward-93", reiser4_inode_data(inode) != NULL);
  26164. +
  26165. + return inode_cluster_plugin(inode)->shift;
  26166. +}
  26167. +
  26168. +static inline unsigned cluster_nrpages_shift(struct inode *inode)
  26169. +{
  26170. + return inode_cluster_shift(inode) - PAGE_SHIFT;
  26171. +}
  26172. +
  26173. +/* cluster size in page units */
  26174. +static inline unsigned cluster_nrpages(struct inode *inode)
  26175. +{
  26176. + return 1U << cluster_nrpages_shift(inode);
  26177. +}
  26178. +
  26179. +static inline size_t inode_cluster_size(struct inode *inode)
  26180. +{
  26181. + assert("edward-96", inode != NULL);
  26182. +
  26183. + return 1U << inode_cluster_shift(inode);
  26184. +}
  26185. +
  26186. +static inline cloff_t pg_to_clust(pgoff_t idx, struct inode *inode)
  26187. +{
  26188. + return idx >> cluster_nrpages_shift(inode);
  26189. +}
  26190. +
  26191. +static inline pgoff_t clust_to_pg(cloff_t idx, struct inode *inode)
  26192. +{
  26193. + return idx << cluster_nrpages_shift(inode);
  26194. +}
  26195. +
  26196. +static inline pgoff_t pg_to_clust_to_pg(pgoff_t idx, struct inode *inode)
  26197. +{
  26198. + return clust_to_pg(pg_to_clust(idx, inode), inode);
  26199. +}
  26200. +
  26201. +static inline pgoff_t off_to_pg(loff_t off)
  26202. +{
  26203. + return (off >> PAGE_SHIFT);
  26204. +}
  26205. +
  26206. +static inline loff_t pg_to_off(pgoff_t idx)
  26207. +{
  26208. + return ((loff_t) (idx) << PAGE_SHIFT);
  26209. +}
  26210. +
  26211. +static inline cloff_t off_to_clust(loff_t off, struct inode *inode)
  26212. +{
  26213. + return off >> inode_cluster_shift(inode);
  26214. +}
  26215. +
  26216. +static inline loff_t clust_to_off(cloff_t idx, struct inode *inode)
  26217. +{
  26218. + return (loff_t) idx << inode_cluster_shift(inode);
  26219. +}
  26220. +
  26221. +static inline loff_t off_to_clust_to_off(loff_t off, struct inode *inode)
  26222. +{
  26223. + return clust_to_off(off_to_clust(off, inode), inode);
  26224. +}
  26225. +
  26226. +static inline pgoff_t off_to_clust_to_pg(loff_t off, struct inode *inode)
  26227. +{
  26228. + return clust_to_pg(off_to_clust(off, inode), inode);
  26229. +}
  26230. +
  26231. +static inline unsigned off_to_pgoff(loff_t off)
  26232. +{
  26233. + return off & (PAGE_SIZE - 1);
  26234. +}
  26235. +
  26236. +static inline unsigned off_to_cloff(loff_t off, struct inode *inode)
  26237. +{
  26238. + return off & ((loff_t) (inode_cluster_size(inode)) - 1);
  26239. +}
  26240. +
  26241. +static inline pgoff_t offset_in_clust(struct page *page)
  26242. +{
  26243. + assert("edward-1488", page != NULL);
  26244. + assert("edward-1489", page->mapping != NULL);
  26245. +
  26246. + return page_index(page) & ((cluster_nrpages(page->mapping->host)) - 1);
  26247. +}
  26248. +
  26249. +static inline int first_page_in_cluster(struct page *page)
  26250. +{
  26251. + return offset_in_clust(page) == 0;
  26252. +}
  26253. +
  26254. +static inline int last_page_in_cluster(struct page *page)
  26255. +{
  26256. + return offset_in_clust(page) ==
  26257. + cluster_nrpages(page->mapping->host) - 1;
  26258. +}
  26259. +
  26260. +static inline unsigned
  26261. +pg_to_off_to_cloff(unsigned long idx, struct inode *inode)
  26262. +{
  26263. + return off_to_cloff(pg_to_off(idx), inode);
  26264. +}
  26265. +
  26266. +/*********************** Size translators **************************/
  26267. +
  26268. +/* Translate linear size.
  26269. + * New units are (1 << @blk_shift) times larger, then old ones.
  26270. + * In other words, calculate number of logical blocks, occupied
  26271. + * by @count elements
  26272. + */
  26273. +static inline unsigned long size_in_blocks(loff_t count, unsigned blkbits)
  26274. +{
  26275. + return (count + (1UL << blkbits) - 1) >> blkbits;
  26276. +}
  26277. +
  26278. +/* size in pages */
  26279. +static inline pgoff_t size_in_pages(loff_t size)
  26280. +{
  26281. + return size_in_blocks(size, PAGE_SHIFT);
  26282. +}
  26283. +
  26284. +/* size in logical clusters */
  26285. +static inline cloff_t size_in_lc(loff_t size, struct inode *inode)
  26286. +{
  26287. + return size_in_blocks(size, inode_cluster_shift(inode));
  26288. +}
  26289. +
  26290. +/* size in pages to the size in page clusters */
  26291. +static inline cloff_t sp_to_spcl(pgoff_t size, struct inode *inode)
  26292. +{
  26293. + return size_in_blocks(size, cluster_nrpages_shift(inode));
  26294. +}
  26295. +
  26296. +/*********************** Size modulators ***************************/
  26297. +
  26298. +/*
  26299. + Modulate linear size by nominated block size and offset.
  26300. +
  26301. + The "finite" function (which is zero almost everywhere).
  26302. + How much is a height of the figure at a position @pos,
  26303. + when trying to construct rectangle of height (1 << @blkbits),
  26304. + and square @size.
  26305. +
  26306. + ******
  26307. + *******
  26308. + *******
  26309. + *******
  26310. + ----------> pos
  26311. +*/
  26312. +static inline unsigned __mbb(loff_t size, unsigned long pos, int blkbits)
  26313. +{
  26314. + unsigned end = size >> blkbits;
  26315. + if (pos < end)
  26316. + return 1U << blkbits;
  26317. + if (unlikely(pos > end))
  26318. + return 0;
  26319. + return size & ~(~0ull << blkbits);
  26320. +}
  26321. +
  26322. +/* the same as above, but block size is page size */
  26323. +static inline unsigned __mbp(loff_t size, pgoff_t pos)
  26324. +{
  26325. + return __mbb(size, pos, PAGE_SHIFT);
  26326. +}
  26327. +
  26328. +/* number of file's bytes in the nominated logical cluster */
  26329. +static inline unsigned lbytes(cloff_t index, struct inode *inode)
  26330. +{
  26331. + return __mbb(i_size_read(inode), index, inode_cluster_shift(inode));
  26332. +}
  26333. +
  26334. +/* number of file's bytes in the nominated page */
  26335. +static inline unsigned pbytes(pgoff_t index, struct inode *inode)
  26336. +{
  26337. + return __mbp(i_size_read(inode), index);
  26338. +}
  26339. +
  26340. +/**
  26341. + * number of pages occuped by @win->count bytes starting from
  26342. + * @win->off at logical cluster defined by @win. This is exactly
  26343. + * a number of pages to be modified and dirtied in any cluster operation.
  26344. + */
  26345. +static inline pgoff_t win_count_to_nrpages(struct reiser4_slide * win)
  26346. +{
  26347. + return ((win->off + win->count +
  26348. + (1UL << PAGE_SHIFT) - 1) >> PAGE_SHIFT) -
  26349. + off_to_pg(win->off);
  26350. +}
  26351. +
  26352. +/* return true, if logical cluster is not occupied by the file */
  26353. +static inline int new_logical_cluster(struct cluster_handle *clust,
  26354. + struct inode *inode)
  26355. +{
  26356. + return clust_to_off(clust->index, inode) >= i_size_read(inode);
  26357. +}
  26358. +
  26359. +/* return true, if pages @p1 and @p2 are of the same page cluster */
  26360. +static inline int same_page_cluster(struct page *p1, struct page *p2)
  26361. +{
  26362. + assert("edward-1490", p1 != NULL);
  26363. + assert("edward-1491", p2 != NULL);
  26364. + assert("edward-1492", p1->mapping != NULL);
  26365. + assert("edward-1493", p2->mapping != NULL);
  26366. +
  26367. + return (pg_to_clust(page_index(p1), p1->mapping->host) ==
  26368. + pg_to_clust(page_index(p2), p2->mapping->host));
  26369. +}
  26370. +
  26371. +static inline int cluster_is_complete(struct cluster_handle *clust,
  26372. + struct inode *inode)
  26373. +{
  26374. + return clust->tc.lsize == inode_cluster_size(inode);
  26375. +}
  26376. +
  26377. +static inline void reiser4_slide_init(struct reiser4_slide *win)
  26378. +{
  26379. + assert("edward-1084", win != NULL);
  26380. + memset(win, 0, sizeof *win);
  26381. +}
  26382. +
  26383. +static inline tfm_action
  26384. +cluster_get_tfm_act(struct tfm_cluster *tc)
  26385. +{
  26386. + assert("edward-1356", tc != NULL);
  26387. + return tc->act;
  26388. +}
  26389. +
  26390. +static inline void
  26391. +cluster_set_tfm_act(struct tfm_cluster *tc, tfm_action act)
  26392. +{
  26393. + assert("edward-1356", tc != NULL);
  26394. + tc->act = act;
  26395. +}
  26396. +
  26397. +static inline void cluster_init_act(struct cluster_handle *clust,
  26398. + tfm_action act,
  26399. + struct reiser4_slide *window)
  26400. +{
  26401. + assert("edward-84", clust != NULL);
  26402. + memset(clust, 0, sizeof *clust);
  26403. + cluster_set_tfm_act(&clust->tc, act);
  26404. + clust->dstat = INVAL_DISK_CLUSTER;
  26405. + clust->win = window;
  26406. +}
  26407. +
  26408. +static inline void cluster_init_read(struct cluster_handle *clust,
  26409. + struct reiser4_slide *window)
  26410. +{
  26411. + cluster_init_act(clust, TFMA_READ, window);
  26412. +}
  26413. +
  26414. +static inline void cluster_init_write(struct cluster_handle *clust,
  26415. + struct reiser4_slide *window)
  26416. +{
  26417. + cluster_init_act(clust, TFMA_WRITE, window);
  26418. +}
  26419. +
  26420. +/* true if @p1 and @p2 are items of the same disk cluster */
  26421. +static inline int same_disk_cluster(const coord_t *p1, const coord_t *p2)
  26422. +{
  26423. + /* drop this if you have other items to aggregate */
  26424. + assert("edward-1494", item_id_by_coord(p1) == CTAIL_ID);
  26425. +
  26426. + return item_plugin_by_coord(p1)->b.mergeable(p1, p2);
  26427. +}
  26428. +
  26429. +static inline int dclust_get_extension_dsize(hint_t *hint)
  26430. +{
  26431. + return hint->ext_coord.extension.ctail.dsize;
  26432. +}
  26433. +
  26434. +static inline void dclust_set_extension_dsize(hint_t *hint, int dsize)
  26435. +{
  26436. + hint->ext_coord.extension.ctail.dsize = dsize;
  26437. +}
  26438. +
  26439. +static inline int dclust_get_extension_shift(hint_t *hint)
  26440. +{
  26441. + return hint->ext_coord.extension.ctail.shift;
  26442. +}
  26443. +
  26444. +static inline int dclust_get_extension_ncount(hint_t *hint)
  26445. +{
  26446. + return hint->ext_coord.extension.ctail.ncount;
  26447. +}
  26448. +
  26449. +static inline void dclust_inc_extension_ncount(hint_t *hint)
  26450. +{
  26451. + hint->ext_coord.extension.ctail.ncount++;
  26452. +}
  26453. +
  26454. +static inline void dclust_init_extension(hint_t *hint)
  26455. +{
  26456. + memset(&hint->ext_coord.extension.ctail, 0,
  26457. + sizeof(hint->ext_coord.extension.ctail));
  26458. +}
  26459. +
  26460. +static inline int hint_is_unprepped_dclust(hint_t *hint)
  26461. +{
  26462. + assert("edward-1451", hint_is_valid(hint));
  26463. + return dclust_get_extension_shift(hint) == (int)UCTAIL_SHIFT;
  26464. +}
  26465. +
  26466. +static inline void coord_set_between_clusters(coord_t *coord)
  26467. +{
  26468. +#if REISER4_DEBUG
  26469. + int result;
  26470. + result = zload(coord->node);
  26471. + assert("edward-1296", !result);
  26472. +#endif
  26473. + if (!coord_is_between_items(coord)) {
  26474. + coord->between = AFTER_ITEM;
  26475. + coord->unit_pos = 0;
  26476. + }
  26477. +#if REISER4_DEBUG
  26478. + zrelse(coord->node);
  26479. +#endif
  26480. +}
  26481. +
  26482. +int reiser4_inflate_cluster(struct cluster_handle *, struct inode *);
  26483. +int find_disk_cluster(struct cluster_handle *, struct inode *, int read,
  26484. + znode_lock_mode mode);
  26485. +int checkout_logical_cluster(struct cluster_handle *, jnode * , struct inode *);
  26486. +int reiser4_deflate_cluster(struct cluster_handle *, struct inode *);
  26487. +void truncate_complete_page_cluster(struct inode *inode, cloff_t start,
  26488. + int even_cows);
  26489. +void invalidate_hint_cluster(struct cluster_handle *clust);
  26490. +int get_disk_cluster_locked(struct cluster_handle *clust, struct inode *inode,
  26491. + znode_lock_mode lock_mode);
  26492. +void reset_cluster_params(struct cluster_handle *clust);
  26493. +int set_cluster_by_page(struct cluster_handle *clust, struct page *page,
  26494. + int count);
  26495. +int prepare_page_cluster(struct inode *inode, struct cluster_handle *clust,
  26496. + rw_op rw);
  26497. +void __put_page_cluster(int from, int count, struct page **pages,
  26498. + struct inode *inode);
  26499. +void put_page_cluster(struct cluster_handle *clust,
  26500. + struct inode *inode, rw_op rw);
  26501. +void put_cluster_handle(struct cluster_handle *clust);
  26502. +int grab_tfm_stream(struct inode *inode, struct tfm_cluster *tc,
  26503. + tfm_stream_id id);
  26504. +int tfm_cluster_is_uptodate(struct tfm_cluster *tc);
  26505. +void tfm_cluster_set_uptodate(struct tfm_cluster *tc);
  26506. +void tfm_cluster_clr_uptodate(struct tfm_cluster *tc);
  26507. +
  26508. +/* move cluster handle to the target position
  26509. + specified by the page of index @pgidx */
  26510. +static inline void move_cluster_forward(struct cluster_handle *clust,
  26511. + struct inode *inode,
  26512. + pgoff_t pgidx)
  26513. +{
  26514. + assert("edward-1297", clust != NULL);
  26515. + assert("edward-1298", inode != NULL);
  26516. +
  26517. + reset_cluster_params(clust);
  26518. + if (clust->index_valid &&
  26519. + /* Hole in the indices. Hint became invalid and can not be
  26520. + used by find_cluster_item() even if seal/node versions
  26521. + will coincide */
  26522. + pg_to_clust(pgidx, inode) != clust->index + 1) {
  26523. + reiser4_unset_hint(clust->hint);
  26524. + invalidate_hint_cluster(clust);
  26525. + }
  26526. + clust->index = pg_to_clust(pgidx, inode);
  26527. + clust->index_valid = 1;
  26528. +}
  26529. +
  26530. +static inline int alloc_clust_pages(struct cluster_handle *clust,
  26531. + struct inode *inode)
  26532. +{
  26533. + assert("edward-791", clust != NULL);
  26534. + assert("edward-792", inode != NULL);
  26535. + clust->pages =
  26536. + kmalloc(sizeof(*clust->pages) << inode_cluster_shift(inode),
  26537. + reiser4_ctx_gfp_mask_get());
  26538. + if (!clust->pages)
  26539. + return -ENOMEM;
  26540. + return 0;
  26541. +}
  26542. +
  26543. +static inline void free_clust_pages(struct cluster_handle *clust)
  26544. +{
  26545. + kfree(clust->pages);
  26546. +}
  26547. +
  26548. +#endif /* __FS_REISER4_CLUSTER_H__ */
  26549. +
  26550. +/* Make Linus happy.
  26551. + Local variables:
  26552. + c-indentation-style: "K&R"
  26553. + mode-name: "LC"
  26554. + c-basic-offset: 8
  26555. + tab-width: 8
  26556. + fill-column: 120
  26557. + scroll-step: 1
  26558. + End:
  26559. +*/
  26560. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/compress/compress.c linux-4.14.2/fs/reiser4/plugin/compress/compress.c
  26561. --- linux-4.14.2.orig/fs/reiser4/plugin/compress/compress.c 1970-01-01 01:00:00.000000000 +0100
  26562. +++ linux-4.14.2/fs/reiser4/plugin/compress/compress.c 2017-11-26 22:14:44.000000000 +0100
  26563. @@ -0,0 +1,521 @@
  26564. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  26565. +/* reiser4 compression transform plugins */
  26566. +
  26567. +#include "../../debug.h"
  26568. +#include "../../inode.h"
  26569. +#include "../plugin.h"
  26570. +
  26571. +#include <linux/lzo.h>
  26572. +#include <linux/zstd.h>
  26573. +#include <linux/zlib.h>
  26574. +#include <linux/types.h>
  26575. +#include <linux/hardirq.h>
  26576. +
  26577. +static int change_compression(struct inode *inode,
  26578. + reiser4_plugin * plugin,
  26579. + pset_member memb)
  26580. +{
  26581. + assert("edward-1316", inode != NULL);
  26582. + assert("edward-1317", plugin != NULL);
  26583. + assert("edward-1318", is_reiser4_inode(inode));
  26584. + assert("edward-1319",
  26585. + plugin->h.type_id == REISER4_COMPRESSION_PLUGIN_TYPE);
  26586. +
  26587. + /* cannot change compression plugin of already existing regular object */
  26588. + if (!plugin_of_group(inode_file_plugin(inode), REISER4_DIRECTORY_FILE))
  26589. + return RETERR(-EINVAL);
  26590. +
  26591. + /* If matches, nothing to change. */
  26592. + if (inode_hash_plugin(inode) != NULL &&
  26593. + inode_hash_plugin(inode)->h.id == plugin->h.id)
  26594. + return 0;
  26595. +
  26596. + return aset_set_unsafe(&reiser4_inode_data(inode)->pset,
  26597. + PSET_COMPRESSION, plugin);
  26598. +}
  26599. +
  26600. +static reiser4_plugin_ops compression_plugin_ops = {
  26601. + .init = NULL,
  26602. + .load = NULL,
  26603. + .save_len = NULL,
  26604. + .save = NULL,
  26605. + .change = &change_compression
  26606. +};
  26607. +
  26608. +/******************************************************************************/
  26609. +/* gzip1 compression */
  26610. +/******************************************************************************/
  26611. +
  26612. +#define GZIP1_DEF_LEVEL Z_BEST_SPEED
  26613. +#define GZIP1_DEF_WINBITS 15
  26614. +#define GZIP1_DEF_MEMLEVEL MAX_MEM_LEVEL
  26615. +
  26616. +static int gzip1_init(void)
  26617. +{
  26618. + return 0;
  26619. +}
  26620. +
  26621. +static int gzip1_overrun(unsigned src_len UNUSED_ARG)
  26622. +{
  26623. + return 0;
  26624. +}
  26625. +
  26626. +static coa_t gzip1_alloc(tfm_action act)
  26627. +{
  26628. + coa_t coa = NULL;
  26629. + int ret = 0;
  26630. + switch (act) {
  26631. + case TFMA_WRITE: /* compress */
  26632. + coa = reiser4_vmalloc(zlib_deflate_workspacesize(MAX_WBITS,
  26633. + MAX_MEM_LEVEL));
  26634. + if (!coa) {
  26635. + ret = -ENOMEM;
  26636. + break;
  26637. + }
  26638. + break;
  26639. + case TFMA_READ: /* decompress */
  26640. + coa = reiser4_vmalloc(zlib_inflate_workspacesize());
  26641. + if (!coa) {
  26642. + ret = -ENOMEM;
  26643. + break;
  26644. + }
  26645. + break;
  26646. + default:
  26647. + impossible("edward-767", "unknown tfm action");
  26648. + }
  26649. + if (ret)
  26650. + return ERR_PTR(ret);
  26651. + return coa;
  26652. +}
  26653. +
  26654. +static void gzip1_free(coa_t coa, tfm_action act)
  26655. +{
  26656. + assert("edward-769", coa != NULL);
  26657. +
  26658. + switch (act) {
  26659. + case TFMA_WRITE: /* compress */
  26660. + vfree(coa);
  26661. + break;
  26662. + case TFMA_READ: /* decompress */
  26663. + vfree(coa);
  26664. + break;
  26665. + default:
  26666. + impossible("edward-770", "unknown tfm action");
  26667. + }
  26668. + return;
  26669. +}
  26670. +
  26671. +static int gzip1_min_size_deflate(void)
  26672. +{
  26673. + return 64;
  26674. +}
  26675. +
  26676. +static void
  26677. +gzip1_compress(coa_t coa, __u8 * src_first, size_t src_len,
  26678. + __u8 * dst_first, size_t *dst_len)
  26679. +{
  26680. + int ret = 0;
  26681. + struct z_stream_s stream;
  26682. +
  26683. + assert("edward-842", coa != NULL);
  26684. + assert("edward-875", src_len != 0);
  26685. +
  26686. + stream.workspace = coa;
  26687. + ret = zlib_deflateInit2(&stream, GZIP1_DEF_LEVEL, Z_DEFLATED,
  26688. + -GZIP1_DEF_WINBITS, GZIP1_DEF_MEMLEVEL,
  26689. + Z_DEFAULT_STRATEGY);
  26690. + if (ret != Z_OK) {
  26691. + warning("edward-771", "zlib_deflateInit2 returned %d\n", ret);
  26692. + goto rollback;
  26693. + }
  26694. + ret = zlib_deflateReset(&stream);
  26695. + if (ret != Z_OK) {
  26696. + warning("edward-772", "zlib_deflateReset returned %d\n", ret);
  26697. + goto rollback;
  26698. + }
  26699. + stream.next_in = src_first;
  26700. + stream.avail_in = src_len;
  26701. + stream.next_out = dst_first;
  26702. + stream.avail_out = *dst_len;
  26703. +
  26704. + ret = zlib_deflate(&stream, Z_FINISH);
  26705. + if (ret != Z_STREAM_END) {
  26706. + if (ret != Z_OK)
  26707. + warning("edward-773",
  26708. + "zlib_deflate returned %d\n", ret);
  26709. + goto rollback;
  26710. + }
  26711. + *dst_len = stream.total_out;
  26712. + return;
  26713. + rollback:
  26714. + *dst_len = src_len;
  26715. + return;
  26716. +}
  26717. +
  26718. +static void
  26719. +gzip1_decompress(coa_t coa, __u8 * src_first, size_t src_len,
  26720. + __u8 * dst_first, size_t *dst_len)
  26721. +{
  26722. + int ret = 0;
  26723. + struct z_stream_s stream;
  26724. +
  26725. + assert("edward-843", coa != NULL);
  26726. + assert("edward-876", src_len != 0);
  26727. +
  26728. + stream.workspace = coa;
  26729. + ret = zlib_inflateInit2(&stream, -GZIP1_DEF_WINBITS);
  26730. + if (ret != Z_OK) {
  26731. + warning("edward-774", "zlib_inflateInit2 returned %d\n", ret);
  26732. + return;
  26733. + }
  26734. + ret = zlib_inflateReset(&stream);
  26735. + if (ret != Z_OK) {
  26736. + warning("edward-775", "zlib_inflateReset returned %d\n", ret);
  26737. + return;
  26738. + }
  26739. +
  26740. + stream.next_in = src_first;
  26741. + stream.avail_in = src_len;
  26742. + stream.next_out = dst_first;
  26743. + stream.avail_out = *dst_len;
  26744. +
  26745. + ret = zlib_inflate(&stream, Z_SYNC_FLUSH);
  26746. + /*
  26747. + * Work around a bug in zlib, which sometimes wants to taste an extra
  26748. + * byte when being used in the (undocumented) raw deflate mode.
  26749. + * (From USAGI).
  26750. + */
  26751. + if (ret == Z_OK && !stream.avail_in && stream.avail_out) {
  26752. + u8 zerostuff = 0;
  26753. + stream.next_in = &zerostuff;
  26754. + stream.avail_in = 1;
  26755. + ret = zlib_inflate(&stream, Z_FINISH);
  26756. + }
  26757. + if (ret != Z_STREAM_END) {
  26758. + warning("edward-776", "zlib_inflate returned %d\n", ret);
  26759. + return;
  26760. + }
  26761. + *dst_len = stream.total_out;
  26762. + return;
  26763. +}
  26764. +
  26765. +/******************************************************************************/
  26766. +/* lzo1 compression */
  26767. +/******************************************************************************/
  26768. +
  26769. +static int lzo1_init(void)
  26770. +{
  26771. + return 0;
  26772. +}
  26773. +
  26774. +static int lzo1_overrun(unsigned in_len)
  26775. +{
  26776. + return in_len / 16 + 64 + 3;
  26777. +}
  26778. +
  26779. +static coa_t lzo1_alloc(tfm_action act)
  26780. +{
  26781. + int ret = 0;
  26782. + coa_t coa = NULL;
  26783. +
  26784. + switch (act) {
  26785. + case TFMA_WRITE: /* compress */
  26786. + coa = reiser4_vmalloc(LZO1X_1_MEM_COMPRESS);
  26787. + if (!coa) {
  26788. + ret = -ENOMEM;
  26789. + break;
  26790. + }
  26791. + case TFMA_READ: /* decompress */
  26792. + break;
  26793. + default:
  26794. + impossible("edward-877", "unknown tfm action");
  26795. + }
  26796. + if (ret)
  26797. + return ERR_PTR(ret);
  26798. + return coa;
  26799. +}
  26800. +
  26801. +static void lzo1_free(coa_t coa, tfm_action act)
  26802. +{
  26803. + assert("edward-879", coa != NULL);
  26804. +
  26805. + switch (act) {
  26806. + case TFMA_WRITE: /* compress */
  26807. + vfree(coa);
  26808. + break;
  26809. + case TFMA_READ: /* decompress */
  26810. + impossible("edward-1304",
  26811. + "trying to free non-allocated workspace");
  26812. + default:
  26813. + impossible("edward-880", "unknown tfm action");
  26814. + }
  26815. + return;
  26816. +}
  26817. +
  26818. +static int lzo1_min_size_deflate(void)
  26819. +{
  26820. + return 256;
  26821. +}
  26822. +
  26823. +static void
  26824. +lzo1_compress(coa_t coa, __u8 * src_first, size_t src_len,
  26825. + __u8 * dst_first, size_t *dst_len)
  26826. +{
  26827. + int result;
  26828. +
  26829. + assert("edward-846", coa != NULL);
  26830. + assert("edward-847", src_len != 0);
  26831. +
  26832. + result = lzo1x_1_compress(src_first, src_len, dst_first, dst_len, coa);
  26833. + if (unlikely(result != LZO_E_OK)) {
  26834. + warning("edward-849", "lzo1x_1_compress failed\n");
  26835. + goto out;
  26836. + }
  26837. + if (*dst_len >= src_len) {
  26838. + //warning("edward-850", "lzo1x_1_compress: incompressible data\n");
  26839. + goto out;
  26840. + }
  26841. + return;
  26842. + out:
  26843. + *dst_len = src_len;
  26844. + return;
  26845. +}
  26846. +
  26847. +static void
  26848. +lzo1_decompress(coa_t coa, __u8 * src_first, size_t src_len,
  26849. + __u8 * dst_first, size_t *dst_len)
  26850. +{
  26851. + int result;
  26852. +
  26853. + assert("edward-851", coa == NULL);
  26854. + assert("edward-852", src_len != 0);
  26855. +
  26856. + result = lzo1x_decompress_safe(src_first, src_len, dst_first, dst_len);
  26857. + if (result != LZO_E_OK)
  26858. + warning("edward-853", "lzo1x_1_decompress failed\n");
  26859. + return;
  26860. +}
  26861. +
  26862. +/******************************************************************************/
  26863. +/* zstd1 compression */
  26864. +/******************************************************************************/
  26865. +
  26866. +typedef struct {
  26867. + ZSTD_parameters params;
  26868. + void* workspace;
  26869. + ZSTD_CCtx* cctx;
  26870. +} zstd1_coa_c;
  26871. +typedef struct {
  26872. + void* workspace;
  26873. + ZSTD_DCtx* dctx;
  26874. +} zstd1_coa_d;
  26875. +
  26876. +static int zstd1_init(void)
  26877. +{
  26878. + return 0;
  26879. +}
  26880. +
  26881. +static int zstd1_overrun(unsigned src_len UNUSED_ARG)
  26882. +{
  26883. + return ZSTD_compressBound(src_len) - src_len;
  26884. +}
  26885. +
  26886. +static coa_t zstd1_alloc(tfm_action act)
  26887. +{
  26888. + int ret = 0;
  26889. + size_t workspace_size;
  26890. + coa_t coa = NULL;
  26891. +
  26892. + switch (act) {
  26893. + case TFMA_WRITE: /* compress */
  26894. + coa = reiser4_vmalloc(sizeof(zstd1_coa_c));
  26895. + if (!coa) {
  26896. + ret = -ENOMEM;
  26897. + break;
  26898. + }
  26899. + /* ZSTD benchmark use level 1 as default. Max is 22. */
  26900. + ((zstd1_coa_c*)coa)->params = ZSTD_getParams(1, 0, 0);
  26901. + workspace_size = ZSTD_CCtxWorkspaceBound(((zstd1_coa_c*)coa)->params.cParams);
  26902. + ((zstd1_coa_c*)coa)->workspace = reiser4_vmalloc(workspace_size);
  26903. + if (!(((zstd1_coa_c*)coa)->workspace)) {
  26904. + ret = -ENOMEM;
  26905. + vfree(coa);
  26906. + break;
  26907. + }
  26908. + ((zstd1_coa_c*)coa)->cctx = ZSTD_initCCtx(((zstd1_coa_c*)coa)->workspace, workspace_size);
  26909. + if (!(((zstd1_coa_c*)coa)->cctx)) {
  26910. + ret = -ENOMEM;
  26911. + vfree(((zstd1_coa_c*)coa)->workspace);
  26912. + vfree(coa);
  26913. + break;
  26914. + }
  26915. + break;
  26916. + case TFMA_READ: /* decompress */
  26917. + coa = reiser4_vmalloc(sizeof(zstd1_coa_d));
  26918. + if (!coa) {
  26919. + ret = -ENOMEM;
  26920. + break;
  26921. + }
  26922. + workspace_size = ZSTD_DCtxWorkspaceBound();
  26923. + ((zstd1_coa_d*)coa)->workspace = reiser4_vmalloc(workspace_size);
  26924. + if (!(((zstd1_coa_d*)coa)->workspace)) {
  26925. + ret = -ENOMEM;
  26926. + vfree(coa);
  26927. + break;
  26928. + }
  26929. + ((zstd1_coa_d*)coa)->dctx = ZSTD_initDCtx(((zstd1_coa_d*)coa)->workspace, workspace_size);
  26930. + if (!(((zstd1_coa_d*)coa)->dctx)) {
  26931. + ret = -ENOMEM;
  26932. + vfree(((zstd1_coa_d*)coa)->workspace);
  26933. + vfree(coa);
  26934. + break;
  26935. + }
  26936. + break;
  26937. + default:
  26938. + impossible("bsinot-1",
  26939. + "trying to alloc workspace for unknown tfm action");
  26940. + }
  26941. + if (ret) {
  26942. + warning("bsinot-2",
  26943. + "alloc workspace for zstd (tfm action = %d) failed\n",
  26944. + act);
  26945. + return ERR_PTR(ret);
  26946. + }
  26947. + return coa;
  26948. +}
  26949. +
  26950. +static void zstd1_free(coa_t coa, tfm_action act)
  26951. +{
  26952. + assert("bsinot-3", coa != NULL);
  26953. +
  26954. + switch (act) {
  26955. + case TFMA_WRITE: /* compress */
  26956. + vfree(((zstd1_coa_c*)coa)->workspace);
  26957. + vfree(coa);
  26958. + //printk(KERN_WARNING "free comp memory -- %p\n", coa);
  26959. + break;
  26960. + case TFMA_READ: /* decompress */
  26961. + vfree(((zstd1_coa_d*)coa)->workspace);
  26962. + vfree(coa);
  26963. + //printk(KERN_WARNING "free decomp memory -- %p\n", coa);
  26964. + break;
  26965. + default:
  26966. + impossible("bsinot-4", "unknown tfm action");
  26967. + }
  26968. + return;
  26969. +}
  26970. +
  26971. +static int zstd1_min_size_deflate(void)
  26972. +{
  26973. + return 256; /* I'm not sure about the correct value, so took from LZO1 */
  26974. +}
  26975. +
  26976. +static void
  26977. +zstd1_compress(coa_t coa, __u8 * src_first, size_t src_len,
  26978. + __u8 * dst_first, size_t *dst_len)
  26979. +{
  26980. + unsigned int result;
  26981. +
  26982. + assert("bsinot-5", coa != NULL);
  26983. + assert("bsinot-6", src_len != 0);
  26984. + result = ZSTD_compressCCtx(((zstd1_coa_c*)coa)->cctx, dst_first, *dst_len, src_first, src_len, ((zstd1_coa_c*)coa)->params);
  26985. + if (ZSTD_isError(result)) {
  26986. + warning("bsinot-7", "zstd1_compressCCtx failed\n");
  26987. + goto out;
  26988. + }
  26989. + *dst_len = result;
  26990. + if (*dst_len >= src_len) {
  26991. + //warning("bsinot-8", "zstd1_compressCCtx: incompressible data\n");
  26992. + goto out;
  26993. + }
  26994. + return;
  26995. + out:
  26996. + *dst_len = src_len;
  26997. + return;
  26998. +}
  26999. +
  27000. +static void
  27001. +zstd1_decompress(coa_t coa, __u8 * src_first, size_t src_len,
  27002. + __u8 * dst_first, size_t *dst_len)
  27003. +{
  27004. + unsigned int result;
  27005. +
  27006. + assert("bsinot-9", coa != NULL);
  27007. + assert("bsinot-10", src_len != 0);
  27008. +
  27009. + result = ZSTD_decompressDCtx(((zstd1_coa_d*)coa)->dctx, dst_first, *dst_len, src_first, src_len);
  27010. + /* Same here. */
  27011. + if (ZSTD_isError(result))
  27012. + warning("bsinot-11", "zstd1_decompressDCtx failed\n");
  27013. + *dst_len = result;
  27014. + return;
  27015. +}
  27016. +
  27017. +
  27018. +compression_plugin compression_plugins[LAST_COMPRESSION_ID] = {
  27019. + [LZO1_COMPRESSION_ID] = {
  27020. + .h = {
  27021. + .type_id = REISER4_COMPRESSION_PLUGIN_TYPE,
  27022. + .id = LZO1_COMPRESSION_ID,
  27023. + .pops = &compression_plugin_ops,
  27024. + .label = "lzo1",
  27025. + .desc = "lzo1 compression transform",
  27026. + .linkage = {NULL, NULL}
  27027. + },
  27028. + .init = lzo1_init,
  27029. + .overrun = lzo1_overrun,
  27030. + .alloc = lzo1_alloc,
  27031. + .free = lzo1_free,
  27032. + .min_size_deflate = lzo1_min_size_deflate,
  27033. + .checksum = reiser4_adler32,
  27034. + .compress = lzo1_compress,
  27035. + .decompress = lzo1_decompress
  27036. + },
  27037. + [GZIP1_COMPRESSION_ID] = {
  27038. + .h = {
  27039. + .type_id = REISER4_COMPRESSION_PLUGIN_TYPE,
  27040. + .id = GZIP1_COMPRESSION_ID,
  27041. + .pops = &compression_plugin_ops,
  27042. + .label = "gzip1",
  27043. + .desc = "gzip1 compression transform",
  27044. + .linkage = {NULL, NULL}
  27045. + },
  27046. + .init = gzip1_init,
  27047. + .overrun = gzip1_overrun,
  27048. + .alloc = gzip1_alloc,
  27049. + .free = gzip1_free,
  27050. + .min_size_deflate = gzip1_min_size_deflate,
  27051. + .checksum = reiser4_adler32,
  27052. + .compress = gzip1_compress,
  27053. + .decompress = gzip1_decompress
  27054. + },
  27055. + [ZSTD1_COMPRESSION_ID] = {
  27056. + .h = {
  27057. + .type_id = REISER4_COMPRESSION_PLUGIN_TYPE,
  27058. + .id = ZSTD1_COMPRESSION_ID,
  27059. + .pops = &compression_plugin_ops,
  27060. + .label = "zstd1",
  27061. + .desc = "zstd1 compression transform",
  27062. + .linkage = {NULL, NULL}
  27063. + },
  27064. + .init = zstd1_init,
  27065. + .overrun = zstd1_overrun,
  27066. + .alloc = zstd1_alloc,
  27067. + .free = zstd1_free,
  27068. + .min_size_deflate = zstd1_min_size_deflate,
  27069. + .checksum = reiser4_adler32,
  27070. + .compress = zstd1_compress,
  27071. + .decompress = zstd1_decompress
  27072. + }
  27073. +};
  27074. +
  27075. +/*
  27076. + Local variables:
  27077. + c-indentation-style: "K&R"
  27078. + mode-name: "LC"
  27079. + c-basic-offset: 8
  27080. + tab-width: 8
  27081. + fill-column: 120
  27082. + scroll-step: 1
  27083. + End:
  27084. +*/
  27085. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/compress/compress.h linux-4.14.2/fs/reiser4/plugin/compress/compress.h
  27086. --- linux-4.14.2.orig/fs/reiser4/plugin/compress/compress.h 1970-01-01 01:00:00.000000000 +0100
  27087. +++ linux-4.14.2/fs/reiser4/plugin/compress/compress.h 2017-11-26 22:14:44.000000000 +0100
  27088. @@ -0,0 +1,44 @@
  27089. +#if !defined( __FS_REISER4_COMPRESS_H__ )
  27090. +#define __FS_REISER4_COMPRESS_H__
  27091. +
  27092. +#include <linux/types.h>
  27093. +#include <linux/string.h>
  27094. +
  27095. +/* transform direction */
  27096. +typedef enum {
  27097. + TFMA_READ, /* decrypt, decompress */
  27098. + TFMA_WRITE, /* encrypt, compress */
  27099. + TFMA_LAST
  27100. +} tfm_action;
  27101. +
  27102. +/* supported compression algorithms */
  27103. +typedef enum {
  27104. + LZO1_COMPRESSION_ID,
  27105. + GZIP1_COMPRESSION_ID,
  27106. + ZSTD1_COMPRESSION_ID,
  27107. + LAST_COMPRESSION_ID,
  27108. +} reiser4_compression_id;
  27109. +
  27110. +/* the same as pgoff, but units are page clusters */
  27111. +typedef unsigned long cloff_t;
  27112. +
  27113. +/* working data of a (de)compression algorithm */
  27114. +typedef void *coa_t;
  27115. +
  27116. +/* table for all supported (de)compression algorithms */
  27117. +typedef coa_t coa_set[LAST_COMPRESSION_ID][TFMA_LAST];
  27118. +
  27119. +__u32 reiser4_adler32(char *data, __u32 len);
  27120. +
  27121. +#endif /* __FS_REISER4_COMPRESS_H__ */
  27122. +
  27123. +/* Make Linus happy.
  27124. + Local variables:
  27125. + c-indentation-style: "K&R"
  27126. + mode-name: "LC"
  27127. + c-basic-offset: 8
  27128. + tab-width: 8
  27129. + fill-column: 120
  27130. + scroll-step: 1
  27131. + End:
  27132. +*/
  27133. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/compress/compress_mode.c linux-4.14.2/fs/reiser4/plugin/compress/compress_mode.c
  27134. --- linux-4.14.2.orig/fs/reiser4/plugin/compress/compress_mode.c 1970-01-01 01:00:00.000000000 +0100
  27135. +++ linux-4.14.2/fs/reiser4/plugin/compress/compress_mode.c 2017-11-26 22:13:09.000000000 +0100
  27136. @@ -0,0 +1,162 @@
  27137. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  27138. +/* This file contains Reiser4 compression mode plugins.
  27139. +
  27140. + Compression mode plugin is a set of handlers called by compressor
  27141. + at flush time and represent some heuristics including the ones
  27142. + which are to avoid compression of incompressible data, see
  27143. + http://www.namesys.com/cryptcompress_design.html for more details.
  27144. +*/
  27145. +#include "../../inode.h"
  27146. +#include "../plugin.h"
  27147. +
  27148. +static int should_deflate_none(struct inode * inode, cloff_t index)
  27149. +{
  27150. + return 0;
  27151. +}
  27152. +
  27153. +static int should_deflate_common(struct inode * inode, cloff_t index)
  27154. +{
  27155. + return compression_is_on(cryptcompress_inode_data(inode));
  27156. +}
  27157. +
  27158. +static int discard_hook_ultim(struct inode *inode, cloff_t index)
  27159. +{
  27160. + turn_off_compression(cryptcompress_inode_data(inode));
  27161. + return 0;
  27162. +}
  27163. +
  27164. +static int discard_hook_lattd(struct inode *inode, cloff_t index)
  27165. +{
  27166. + struct cryptcompress_info * info = cryptcompress_inode_data(inode);
  27167. +
  27168. + assert("edward-1462",
  27169. + get_lattice_factor(info) >= MIN_LATTICE_FACTOR &&
  27170. + get_lattice_factor(info) <= MAX_LATTICE_FACTOR);
  27171. +
  27172. + turn_off_compression(info);
  27173. + if (get_lattice_factor(info) < MAX_LATTICE_FACTOR)
  27174. + set_lattice_factor(info, get_lattice_factor(info) << 1);
  27175. + return 0;
  27176. +}
  27177. +
  27178. +static int accept_hook_lattd(struct inode *inode, cloff_t index)
  27179. +{
  27180. + turn_on_compression(cryptcompress_inode_data(inode));
  27181. + set_lattice_factor(cryptcompress_inode_data(inode), MIN_LATTICE_FACTOR);
  27182. + return 0;
  27183. +}
  27184. +
  27185. +/* Check on dynamic lattice, the adaptive compression modes which
  27186. + defines the following behavior:
  27187. +
  27188. + Compression is on: try to compress everything and turn
  27189. + it off, whenever cluster is incompressible.
  27190. +
  27191. + Compression is off: try to compress clusters of indexes
  27192. + k * FACTOR (k = 0, 1, 2, ...) and turn it on, if some of
  27193. + them is compressible. If incompressible, then increase FACTOR */
  27194. +
  27195. +/* check if @index belongs to one-dimensional lattice
  27196. + of sparce factor @factor */
  27197. +static int is_on_lattice(cloff_t index, int factor)
  27198. +{
  27199. + return (factor ? index % factor == 0: index == 0);
  27200. +}
  27201. +
  27202. +static int should_deflate_lattd(struct inode * inode, cloff_t index)
  27203. +{
  27204. + return should_deflate_common(inode, index) ||
  27205. + is_on_lattice(index,
  27206. + get_lattice_factor
  27207. + (cryptcompress_inode_data(inode)));
  27208. +}
  27209. +
  27210. +/* compression mode_plugins */
  27211. +compression_mode_plugin compression_mode_plugins[LAST_COMPRESSION_MODE_ID] = {
  27212. + [NONE_COMPRESSION_MODE_ID] = {
  27213. + .h = {
  27214. + .type_id = REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
  27215. + .id = NONE_COMPRESSION_MODE_ID,
  27216. + .pops = NULL,
  27217. + .label = "none",
  27218. + .desc = "Compress nothing",
  27219. + .linkage = {NULL, NULL}
  27220. + },
  27221. + .should_deflate = should_deflate_none,
  27222. + .accept_hook = NULL,
  27223. + .discard_hook = NULL
  27224. + },
  27225. + /* Check-on-dynamic-lattice adaptive compression mode */
  27226. + [LATTD_COMPRESSION_MODE_ID] = {
  27227. + .h = {
  27228. + .type_id = REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
  27229. + .id = LATTD_COMPRESSION_MODE_ID,
  27230. + .pops = NULL,
  27231. + .label = "lattd",
  27232. + .desc = "Check on dynamic lattice",
  27233. + .linkage = {NULL, NULL}
  27234. + },
  27235. + .should_deflate = should_deflate_lattd,
  27236. + .accept_hook = accept_hook_lattd,
  27237. + .discard_hook = discard_hook_lattd
  27238. + },
  27239. + /* Check-ultimately compression mode:
  27240. + Turn off compression forever as soon as we meet
  27241. + incompressible data */
  27242. + [ULTIM_COMPRESSION_MODE_ID] = {
  27243. + .h = {
  27244. + .type_id = REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
  27245. + .id = ULTIM_COMPRESSION_MODE_ID,
  27246. + .pops = NULL,
  27247. + .label = "ultim",
  27248. + .desc = "Check ultimately",
  27249. + .linkage = {NULL, NULL}
  27250. + },
  27251. + .should_deflate = should_deflate_common,
  27252. + .accept_hook = NULL,
  27253. + .discard_hook = discard_hook_ultim
  27254. + },
  27255. + /* Force-to-compress-everything compression mode */
  27256. + [FORCE_COMPRESSION_MODE_ID] = {
  27257. + .h = {
  27258. + .type_id = REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
  27259. + .id = FORCE_COMPRESSION_MODE_ID,
  27260. + .pops = NULL,
  27261. + .label = "force",
  27262. + .desc = "Force to compress everything",
  27263. + .linkage = {NULL, NULL}
  27264. + },
  27265. + .should_deflate = NULL,
  27266. + .accept_hook = NULL,
  27267. + .discard_hook = NULL
  27268. + },
  27269. + /* Convert-to-extent compression mode.
  27270. + In this mode items will be converted to extents and management
  27271. + will be passed to (classic) unix file plugin as soon as ->write()
  27272. + detects that the first complete logical cluster (of index #0) is
  27273. + incompressible. */
  27274. + [CONVX_COMPRESSION_MODE_ID] = {
  27275. + .h = {
  27276. + .type_id = REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
  27277. + .id = CONVX_COMPRESSION_MODE_ID,
  27278. + .pops = NULL,
  27279. + .label = "conv",
  27280. + .desc = "Convert to extent",
  27281. + .linkage = {NULL, NULL}
  27282. + },
  27283. + .should_deflate = should_deflate_common,
  27284. + .accept_hook = NULL,
  27285. + .discard_hook = NULL
  27286. + }
  27287. +};
  27288. +
  27289. +/*
  27290. + Local variables:
  27291. + c-indentation-style: "K&R"
  27292. + mode-name: "LC"
  27293. + c-basic-offset: 8
  27294. + tab-width: 8
  27295. + fill-column: 120
  27296. + scroll-step: 1
  27297. + End:
  27298. +*/
  27299. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/compress/Makefile linux-4.14.2/fs/reiser4/plugin/compress/Makefile
  27300. --- linux-4.14.2.orig/fs/reiser4/plugin/compress/Makefile 1970-01-01 01:00:00.000000000 +0100
  27301. +++ linux-4.14.2/fs/reiser4/plugin/compress/Makefile 2017-11-26 22:13:09.000000000 +0100
  27302. @@ -0,0 +1,5 @@
  27303. +obj-$(CONFIG_REISER4_FS) += compress_plugins.o
  27304. +
  27305. +compress_plugins-objs := \
  27306. + compress.o \
  27307. + compress_mode.o
  27308. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/crypto/cipher.c linux-4.14.2/fs/reiser4/plugin/crypto/cipher.c
  27309. --- linux-4.14.2.orig/fs/reiser4/plugin/crypto/cipher.c 1970-01-01 01:00:00.000000000 +0100
  27310. +++ linux-4.14.2/fs/reiser4/plugin/crypto/cipher.c 2017-11-26 22:13:09.000000000 +0100
  27311. @@ -0,0 +1,37 @@
  27312. +/* Copyright 2001, 2002, 2003 by Hans Reiser,
  27313. + licensing governed by reiser4/README */
  27314. +/* Reiser4 cipher transform plugins */
  27315. +
  27316. +#include "../../debug.h"
  27317. +#include "../plugin.h"
  27318. +
  27319. +cipher_plugin cipher_plugins[LAST_CIPHER_ID] = {
  27320. + [NONE_CIPHER_ID] = {
  27321. + .h = {
  27322. + .type_id = REISER4_CIPHER_PLUGIN_TYPE,
  27323. + .id = NONE_CIPHER_ID,
  27324. + .pops = NULL,
  27325. + .label = "none",
  27326. + .desc = "no cipher transform",
  27327. + .linkage = {NULL, NULL}
  27328. + },
  27329. + .alloc = NULL,
  27330. + .free = NULL,
  27331. + .scale = NULL,
  27332. + .align_stream = NULL,
  27333. + .setkey = NULL,
  27334. + .encrypt = NULL,
  27335. + .decrypt = NULL
  27336. + }
  27337. +};
  27338. +
  27339. +/* Make Linus happy.
  27340. + Local variables:
  27341. + c-indentation-style: "K&R"
  27342. + mode-name: "LC"
  27343. + c-basic-offset: 8
  27344. + tab-width: 8
  27345. + fill-column: 120
  27346. + scroll-step: 1
  27347. + End:
  27348. +*/
  27349. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/crypto/cipher.h linux-4.14.2/fs/reiser4/plugin/crypto/cipher.h
  27350. --- linux-4.14.2.orig/fs/reiser4/plugin/crypto/cipher.h 1970-01-01 01:00:00.000000000 +0100
  27351. +++ linux-4.14.2/fs/reiser4/plugin/crypto/cipher.h 2017-11-26 22:13:09.000000000 +0100
  27352. @@ -0,0 +1,55 @@
  27353. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  27354. +/* This file contains definitions for the objects operated
  27355. + by reiser4 key manager, which is something like keyring
  27356. + wrapped by appropriate reiser4 plugin */
  27357. +
  27358. +#if !defined( __FS_REISER4_CRYPT_H__ )
  27359. +#define __FS_REISER4_CRYPT_H__
  27360. +
  27361. +#include <linux/crypto.h>
  27362. +
  27363. +/* key info imported from user space */
  27364. +struct reiser4_crypto_data {
  27365. + int keysize; /* uninstantiated key size */
  27366. + __u8 * key; /* uninstantiated key */
  27367. + int keyid_size; /* size of passphrase */
  27368. + __u8 * keyid; /* passphrase */
  27369. +};
  27370. +
  27371. +/* This object contains all needed infrastructure to implement
  27372. + cipher transform. This is operated (allocating, inheriting,
  27373. + validating, binding to host inode, etc..) by reiser4 key manager.
  27374. +
  27375. + This info can be allocated in two cases:
  27376. + 1. importing a key from user space.
  27377. + 2. reading inode from disk */
  27378. +struct reiser4_crypto_info {
  27379. + struct inode * host;
  27380. + struct crypto_hash * digest;
  27381. + struct crypto_blkcipher * cipher;
  27382. +#if 0
  27383. + cipher_key_plugin * kplug; /* key manager */
  27384. +#endif
  27385. + __u8 * keyid; /* key fingerprint, created by digest plugin,
  27386. + using uninstantiated key and passphrase.
  27387. + supposed to be stored in disk stat-data */
  27388. + int inst; /* this indicates if the cipher key is
  27389. + instantiated (case 1 above) */
  27390. + int keysize; /* uninstantiated key size (bytes), supposed
  27391. + to be stored in disk stat-data */
  27392. + int keyload_count; /* number of the objects which has this
  27393. + crypto-stat attached */
  27394. +};
  27395. +
  27396. +#endif /* __FS_REISER4_CRYPT_H__ */
  27397. +
  27398. +/*
  27399. + Local variables:
  27400. + c-indentation-style: "K&R"
  27401. + mode-name: "LC"
  27402. + c-basic-offset: 8
  27403. + tab-width: 8
  27404. + fill-column: 120
  27405. + scroll-step: 1
  27406. + End:
  27407. +*/
  27408. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/crypto/digest.c linux-4.14.2/fs/reiser4/plugin/crypto/digest.c
  27409. --- linux-4.14.2.orig/fs/reiser4/plugin/crypto/digest.c 1970-01-01 01:00:00.000000000 +0100
  27410. +++ linux-4.14.2/fs/reiser4/plugin/crypto/digest.c 2017-11-26 22:13:09.000000000 +0100
  27411. @@ -0,0 +1,58 @@
  27412. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  27413. +
  27414. +/* reiser4 digest transform plugin (is used by cryptcompress object plugin) */
  27415. +/* EDWARD-FIXME-HANS: and it does what? a digest is a what? */
  27416. +#include "../../debug.h"
  27417. +#include "../plugin_header.h"
  27418. +#include "../plugin.h"
  27419. +#include "../file/cryptcompress.h"
  27420. +
  27421. +#include <linux/types.h>
  27422. +
  27423. +extern digest_plugin digest_plugins[LAST_DIGEST_ID];
  27424. +
  27425. +static struct crypto_hash * alloc_sha256 (void)
  27426. +{
  27427. +#if REISER4_SHA256
  27428. + return crypto_alloc_hash ("sha256", 0, CRYPTO_ALG_ASYNC);
  27429. +#else
  27430. + warning("edward-1418", "sha256 unsupported");
  27431. + return ERR_PTR(-EINVAL);
  27432. +#endif
  27433. +}
  27434. +
  27435. +static void free_sha256 (struct crypto_hash * tfm)
  27436. +{
  27437. +#if REISER4_SHA256
  27438. + crypto_free_hash(tfm);
  27439. +#endif
  27440. + return;
  27441. +}
  27442. +
  27443. +/* digest plugins */
  27444. +digest_plugin digest_plugins[LAST_DIGEST_ID] = {
  27445. + [SHA256_32_DIGEST_ID] = {
  27446. + .h = {
  27447. + .type_id = REISER4_DIGEST_PLUGIN_TYPE,
  27448. + .id = SHA256_32_DIGEST_ID,
  27449. + .pops = NULL,
  27450. + .label = "sha256_32",
  27451. + .desc = "sha256_32 digest transform",
  27452. + .linkage = {NULL, NULL}
  27453. + },
  27454. + .fipsize = sizeof(__u32),
  27455. + .alloc = alloc_sha256,
  27456. + .free = free_sha256
  27457. + }
  27458. +};
  27459. +
  27460. +/*
  27461. + Local variables:
  27462. + c-indentation-style: "K&R"
  27463. + mode-name: "LC"
  27464. + c-basic-offset: 8
  27465. + tab-width: 8
  27466. + fill-column: 120
  27467. + scroll-step: 1
  27468. + End:
  27469. +*/
  27470. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/dir/dir.h linux-4.14.2/fs/reiser4/plugin/dir/dir.h
  27471. --- linux-4.14.2.orig/fs/reiser4/plugin/dir/dir.h 1970-01-01 01:00:00.000000000 +0100
  27472. +++ linux-4.14.2/fs/reiser4/plugin/dir/dir.h 2017-11-26 22:13:09.000000000 +0100
  27473. @@ -0,0 +1,36 @@
  27474. +/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
  27475. + * reiser4/README */
  27476. +
  27477. +/* this file contains declarations of methods implementing directory plugins */
  27478. +
  27479. +#if !defined( __REISER4_DIR_H__ )
  27480. +#define __REISER4_DIR_H__
  27481. +
  27482. +/*#include "../../key.h"
  27483. +
  27484. +#include <linux/fs.h>*/
  27485. +
  27486. +/* declarations of functions implementing HASHED_DIR_PLUGIN_ID dir plugin */
  27487. +
  27488. +/* "hashed" directory methods of dir plugin */
  27489. +void build_entry_key_hashed(const struct inode *, const struct qstr *,
  27490. + reiser4_key *);
  27491. +
  27492. +/* declarations of functions implementing SEEKABLE_HASHED_DIR_PLUGIN_ID dir plugin */
  27493. +
  27494. +/* "seekable" directory methods of dir plugin */
  27495. +void build_entry_key_seekable(const struct inode *, const struct qstr *,
  27496. + reiser4_key *);
  27497. +
  27498. +/* __REISER4_DIR_H__ */
  27499. +#endif
  27500. +
  27501. +/*
  27502. + Local variables:
  27503. + c-indentation-style: "K&R"
  27504. + mode-name: "LC"
  27505. + c-basic-offset: 8
  27506. + tab-width: 8
  27507. + fill-column: 120
  27508. + End:
  27509. +*/
  27510. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/dir/hashed_dir.c linux-4.14.2/fs/reiser4/plugin/dir/hashed_dir.c
  27511. --- linux-4.14.2.orig/fs/reiser4/plugin/dir/hashed_dir.c 1970-01-01 01:00:00.000000000 +0100
  27512. +++ linux-4.14.2/fs/reiser4/plugin/dir/hashed_dir.c 2017-11-26 22:13:09.000000000 +0100
  27513. @@ -0,0 +1,81 @@
  27514. +/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
  27515. + * reiser4/README */
  27516. +
  27517. +/* Directory plugin using hashes (see fs/reiser4/plugin/hash.c) to map file
  27518. + names to the files. */
  27519. +
  27520. +/*
  27521. + * Hashed directory logically consists of persistent directory
  27522. + * entries. Directory entry is a pair of a file name and a key of stat-data of
  27523. + * a file that has this name in the given directory.
  27524. + *
  27525. + * Directory entries are stored in the tree in the form of directory
  27526. + * items. Directory item should implement dir_entry_ops portion of item plugin
  27527. + * interface (see plugin/item/item.h). Hashed directory interacts with
  27528. + * directory item plugin exclusively through dir_entry_ops operations.
  27529. + *
  27530. + * Currently there are two implementations of directory items: "simple
  27531. + * directory item" (plugin/item/sde.[ch]), and "compound directory item"
  27532. + * (plugin/item/cde.[ch]) with the latter being the default.
  27533. + *
  27534. + * There is, however some delicate way through which directory code interferes
  27535. + * with item plugin: key assignment policy. A key for a directory item is
  27536. + * chosen by directory code, and as described in kassign.c, this key contains
  27537. + * a portion of file name. Directory item uses this knowledge to avoid storing
  27538. + * this portion of file name twice: in the key and in the directory item body.
  27539. + *
  27540. + */
  27541. +
  27542. +#include "../../inode.h"
  27543. +
  27544. +void complete_entry_key(const struct inode *, const char *name,
  27545. + int len, reiser4_key * result);
  27546. +
  27547. +/* this is implementation of build_entry_key method of dir
  27548. + plugin for HASHED_DIR_PLUGIN_ID
  27549. + */
  27550. +void build_entry_key_hashed(const struct inode *dir, /* directory where entry is
  27551. + * (or will be) in.*/
  27552. + const struct qstr *qname, /* name of file referenced
  27553. + * by this entry */
  27554. + reiser4_key * result /* resulting key of directory
  27555. + * entry */ )
  27556. +{
  27557. + const char *name;
  27558. + int len;
  27559. +
  27560. + assert("nikita-1139", dir != NULL);
  27561. + assert("nikita-1140", qname != NULL);
  27562. + assert("nikita-1141", qname->name != NULL);
  27563. + assert("nikita-1142", result != NULL);
  27564. +
  27565. + name = qname->name;
  27566. + len = qname->len;
  27567. +
  27568. + assert("nikita-2867", strlen(name) == len);
  27569. +
  27570. + reiser4_key_init(result);
  27571. + /* locality of directory entry's key is objectid of parent
  27572. + directory */
  27573. + set_key_locality(result, get_inode_oid(dir));
  27574. + /* minor packing locality is constant */
  27575. + set_key_type(result, KEY_FILE_NAME_MINOR);
  27576. + /* dot is special case---we always want it to be first entry in
  27577. + a directory. Actually, we just want to have smallest
  27578. + directory entry.
  27579. + */
  27580. + if (len == 1 && name[0] == '.')
  27581. + return;
  27582. +
  27583. + /* initialize part of entry key which depends on file name */
  27584. + complete_entry_key(dir, name, len, result);
  27585. +}
  27586. +
  27587. +/* Local variables:
  27588. + c-indentation-style: "K&R"
  27589. + mode-name: "LC"
  27590. + c-basic-offset: 8
  27591. + tab-width: 8
  27592. + fill-column: 120
  27593. + End:
  27594. +*/
  27595. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/dir/Makefile linux-4.14.2/fs/reiser4/plugin/dir/Makefile
  27596. --- linux-4.14.2.orig/fs/reiser4/plugin/dir/Makefile 1970-01-01 01:00:00.000000000 +0100
  27597. +++ linux-4.14.2/fs/reiser4/plugin/dir/Makefile 2017-11-26 22:13:09.000000000 +0100
  27598. @@ -0,0 +1,5 @@
  27599. +obj-$(CONFIG_REISER4_FS) += dir_plugins.o
  27600. +
  27601. +dir_plugins-objs := \
  27602. + hashed_dir.o \
  27603. + seekable_dir.o
  27604. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/dir/seekable_dir.c linux-4.14.2/fs/reiser4/plugin/dir/seekable_dir.c
  27605. --- linux-4.14.2.orig/fs/reiser4/plugin/dir/seekable_dir.c 1970-01-01 01:00:00.000000000 +0100
  27606. +++ linux-4.14.2/fs/reiser4/plugin/dir/seekable_dir.c 2017-11-26 22:13:09.000000000 +0100
  27607. @@ -0,0 +1,46 @@
  27608. +/* Copyright 2005 by Hans Reiser, licensing governed by
  27609. + * reiser4/README */
  27610. +
  27611. +#include "../../inode.h"
  27612. +
  27613. +/* this is implementation of build_entry_key method of dir
  27614. + plugin for SEEKABLE_HASHED_DIR_PLUGIN_ID
  27615. + This is for directories where we want repeatable and restartable readdir()
  27616. + even in case 32bit user level struct dirent (readdir(3)).
  27617. +*/
  27618. +void
  27619. +build_entry_key_seekable(const struct inode *dir, const struct qstr *name,
  27620. + reiser4_key * result)
  27621. +{
  27622. + oid_t objectid;
  27623. +
  27624. + assert("nikita-2283", dir != NULL);
  27625. + assert("nikita-2284", name != NULL);
  27626. + assert("nikita-2285", name->name != NULL);
  27627. + assert("nikita-2286", result != NULL);
  27628. +
  27629. + reiser4_key_init(result);
  27630. + /* locality of directory entry's key is objectid of parent
  27631. + directory */
  27632. + set_key_locality(result, get_inode_oid(dir));
  27633. + /* minor packing locality is constant */
  27634. + set_key_type(result, KEY_FILE_NAME_MINOR);
  27635. + /* dot is special case---we always want it to be first entry in
  27636. + a directory. Actually, we just want to have smallest
  27637. + directory entry.
  27638. + */
  27639. + if ((name->len == 1) && (name->name[0] == '.'))
  27640. + return;
  27641. +
  27642. + /* objectid of key is 31 lowest bits of hash. */
  27643. + objectid =
  27644. + inode_hash_plugin(dir)->hash(name->name,
  27645. + (int)name->len) & 0x7fffffff;
  27646. +
  27647. + assert("nikita-2303", !(objectid & ~KEY_OBJECTID_MASK));
  27648. + set_key_objectid(result, objectid);
  27649. +
  27650. + /* offset is always 0. */
  27651. + set_key_offset(result, (__u64) 0);
  27652. + return;
  27653. +}
  27654. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/dir_plugin_common.c linux-4.14.2/fs/reiser4/plugin/dir_plugin_common.c
  27655. --- linux-4.14.2.orig/fs/reiser4/plugin/dir_plugin_common.c 1970-01-01 01:00:00.000000000 +0100
  27656. +++ linux-4.14.2/fs/reiser4/plugin/dir_plugin_common.c 2017-11-26 22:13:09.000000000 +0100
  27657. @@ -0,0 +1,865 @@
  27658. +/* Copyright 2005 by Hans Reiser, licensing governed by
  27659. + reiser4/README */
  27660. +
  27661. +/* this file contains typical implementations for most of methods of
  27662. + directory plugin
  27663. +*/
  27664. +
  27665. +#include "../inode.h"
  27666. +
  27667. +int reiser4_find_entry(struct inode *dir, struct dentry *name,
  27668. + lock_handle * , znode_lock_mode, reiser4_dir_entry_desc *);
  27669. +int reiser4_lookup_name(struct inode *parent, struct dentry *dentry,
  27670. + reiser4_key * key);
  27671. +void check_light_weight(struct inode *inode, struct inode *parent);
  27672. +
  27673. +/* this is common implementation of get_parent method of dir plugin
  27674. + this is used by NFS kernel server to "climb" up directory tree to
  27675. + check permissions
  27676. + */
  27677. +struct dentry *get_parent_common(struct inode *child)
  27678. +{
  27679. + struct super_block *s;
  27680. + struct inode *parent;
  27681. + struct dentry dotdot;
  27682. + struct dentry *dentry;
  27683. + reiser4_key key;
  27684. + int result;
  27685. +
  27686. + /*
  27687. + * lookup dotdot entry.
  27688. + */
  27689. +
  27690. + s = child->i_sb;
  27691. + memset(&dotdot, 0, sizeof(dotdot));
  27692. + dotdot.d_name.name = "..";
  27693. + dotdot.d_name.len = 2;
  27694. + dotdot.d_op = &get_super_private(s)->ops.dentry;
  27695. +
  27696. + result = reiser4_lookup_name(child, &dotdot, &key);
  27697. + if (result != 0)
  27698. + return ERR_PTR(result);
  27699. +
  27700. + parent = reiser4_iget(s, &key, 1);
  27701. + if (!IS_ERR(parent)) {
  27702. + /*
  27703. + * FIXME-NIKITA dubious: attributes are inherited from @child
  27704. + * to @parent. But:
  27705. + *
  27706. + * (*) this is the only this we can do
  27707. + *
  27708. + * (*) attributes of light-weight object are inherited
  27709. + * from a parent through which object was looked up first,
  27710. + * so it is ambiguous anyway.
  27711. + *
  27712. + */
  27713. + check_light_weight(parent, child);
  27714. + reiser4_iget_complete(parent);
  27715. + dentry = d_obtain_alias(parent);
  27716. + if (!IS_ERR(dentry))
  27717. + dentry->d_op = &get_super_private(s)->ops.dentry;
  27718. + } else if (PTR_ERR(parent) == -ENOENT)
  27719. + dentry = ERR_PTR(RETERR(-ESTALE));
  27720. + else
  27721. + dentry = (void *)parent;
  27722. + return dentry;
  27723. +}
  27724. +
  27725. +/* this is common implementation of is_name_acceptable method of dir
  27726. + plugin
  27727. + */
  27728. +int is_name_acceptable_common(const struct inode *inode, /* directory to check*/
  27729. + const char *name UNUSED_ARG, /* name to check */
  27730. + int len/* @name's length */)
  27731. +{
  27732. + assert("nikita-733", inode != NULL);
  27733. + assert("nikita-734", name != NULL);
  27734. + assert("nikita-735", len > 0);
  27735. +
  27736. + return len <= reiser4_max_filename_len(inode);
  27737. +}
  27738. +
  27739. +/* there is no common implementation of build_entry_key method of dir
  27740. + plugin. See plugin/dir/hashed_dir.c:build_entry_key_hashed() or
  27741. + plugin/dir/seekable.c:build_entry_key_seekable() for example
  27742. +*/
  27743. +
  27744. +/* this is common implementation of build_readdir_key method of dir
  27745. + plugin
  27746. + see reiser4_readdir_common for more details
  27747. +*/
  27748. +int build_readdir_key_common(struct file *dir /* directory being read */ ,
  27749. + reiser4_key * result/* where to store key */)
  27750. +{
  27751. + reiser4_file_fsdata *fdata;
  27752. + struct inode *inode;
  27753. +
  27754. + assert("nikita-1361", dir != NULL);
  27755. + assert("nikita-1362", result != NULL);
  27756. + assert("nikita-1363", dir->f_path.dentry != NULL);
  27757. + inode = file_inode(dir);
  27758. + assert("nikita-1373", inode != NULL);
  27759. +
  27760. + fdata = reiser4_get_file_fsdata(dir);
  27761. + if (IS_ERR(fdata))
  27762. + return PTR_ERR(fdata);
  27763. + assert("nikita-1364", fdata != NULL);
  27764. + return extract_key_from_de_id(get_inode_oid(inode),
  27765. + &fdata->dir.readdir.position.
  27766. + dir_entry_key, result);
  27767. +
  27768. +}
  27769. +
  27770. +void reiser4_adjust_dir_file(struct inode *, const struct dentry *, int offset,
  27771. + int adj);
  27772. +
  27773. +/* this is common implementation of add_entry method of dir plugin
  27774. +*/
  27775. +int reiser4_add_entry_common(struct inode *object, /* directory to add new name
  27776. + * in */
  27777. + struct dentry *where, /* new name */
  27778. + reiser4_object_create_data * data, /* parameters of
  27779. + * new object */
  27780. + reiser4_dir_entry_desc * entry /* parameters of
  27781. + * new directory
  27782. + * entry */)
  27783. +{
  27784. + int result;
  27785. + coord_t *coord;
  27786. + lock_handle lh;
  27787. + struct reiser4_dentry_fsdata *fsdata;
  27788. + reiser4_block_nr reserve;
  27789. +
  27790. + assert("nikita-1114", object != NULL);
  27791. + assert("nikita-1250", where != NULL);
  27792. +
  27793. + fsdata = reiser4_get_dentry_fsdata(where);
  27794. + if (unlikely(IS_ERR(fsdata)))
  27795. + return PTR_ERR(fsdata);
  27796. +
  27797. + reserve = inode_dir_plugin(object)->estimate.add_entry(object);
  27798. + if (reiser4_grab_space(reserve, BA_CAN_COMMIT))
  27799. + return RETERR(-ENOSPC);
  27800. +
  27801. + init_lh(&lh);
  27802. + coord = &fsdata->dec.entry_coord;
  27803. + coord_clear_iplug(coord);
  27804. +
  27805. + /* check for this entry in a directory. This is plugin method. */
  27806. + result = reiser4_find_entry(object, where, &lh, ZNODE_WRITE_LOCK,
  27807. + entry);
  27808. + if (likely(result == -ENOENT)) {
  27809. + /* add new entry. Just pass control to the directory
  27810. + item plugin. */
  27811. + assert("nikita-1709", inode_dir_item_plugin(object));
  27812. + assert("nikita-2230", coord->node == lh.node);
  27813. + reiser4_seal_done(&fsdata->dec.entry_seal);
  27814. + result =
  27815. + inode_dir_item_plugin(object)->s.dir.add_entry(object,
  27816. + coord, &lh,
  27817. + where,
  27818. + entry);
  27819. + if (result == 0) {
  27820. + reiser4_adjust_dir_file(object, where,
  27821. + fsdata->dec.pos + 1, +1);
  27822. + INODE_INC_FIELD(object, i_size);
  27823. + }
  27824. + } else if (result == 0) {
  27825. + assert("nikita-2232", coord->node == lh.node);
  27826. + result = RETERR(-EEXIST);
  27827. + }
  27828. + done_lh(&lh);
  27829. +
  27830. + return result;
  27831. +}
  27832. +
  27833. +/**
  27834. + * rem_entry - remove entry from directory item
  27835. + * @dir:
  27836. + * @dentry:
  27837. + * @entry:
  27838. + * @coord:
  27839. + * @lh:
  27840. + *
  27841. + * Checks that coordinate @coord is set properly and calls item plugin
  27842. + * method to cut entry.
  27843. + */
  27844. +static int
  27845. +rem_entry(struct inode *dir, struct dentry *dentry,
  27846. + reiser4_dir_entry_desc * entry, coord_t *coord, lock_handle * lh)
  27847. +{
  27848. + item_plugin *iplug;
  27849. + struct inode *child;
  27850. +
  27851. + iplug = inode_dir_item_plugin(dir);
  27852. + child = dentry->d_inode;
  27853. + assert("nikita-3399", child != NULL);
  27854. +
  27855. + /* check that we are really destroying an entry for @child */
  27856. + if (REISER4_DEBUG) {
  27857. + int result;
  27858. + reiser4_key key;
  27859. +
  27860. + result = iplug->s.dir.extract_key(coord, &key);
  27861. + if (result != 0)
  27862. + return result;
  27863. + if (get_key_objectid(&key) != get_inode_oid(child)) {
  27864. + warning("nikita-3397",
  27865. + "rem_entry: %#llx != %#llx\n",
  27866. + get_key_objectid(&key),
  27867. + (unsigned long long)get_inode_oid(child));
  27868. + return RETERR(-EIO);
  27869. + }
  27870. + }
  27871. + return iplug->s.dir.rem_entry(dir, &dentry->d_name, coord, lh, entry);
  27872. +}
  27873. +
  27874. +/**
  27875. + * reiser4_rem_entry_common - remove entry from a directory
  27876. + * @dir: directory to remove entry from
  27877. + * @where: name that is being removed
  27878. + * @entry: description of entry being removed
  27879. + *
  27880. + * This is common implementation of rem_entry method of dir plugin.
  27881. + */
  27882. +int reiser4_rem_entry_common(struct inode *dir,
  27883. + struct dentry *dentry,
  27884. + reiser4_dir_entry_desc * entry)
  27885. +{
  27886. + int result;
  27887. + coord_t *coord;
  27888. + lock_handle lh;
  27889. + struct reiser4_dentry_fsdata *fsdata;
  27890. + __u64 tograb;
  27891. +
  27892. + assert("nikita-1124", dir != NULL);
  27893. + assert("nikita-1125", dentry != NULL);
  27894. +
  27895. + tograb = inode_dir_plugin(dir)->estimate.rem_entry(dir);
  27896. + result = reiser4_grab_space(tograb, BA_CAN_COMMIT | BA_RESERVED);
  27897. + if (result != 0)
  27898. + return RETERR(-ENOSPC);
  27899. +
  27900. + init_lh(&lh);
  27901. +
  27902. + /* check for this entry in a directory. This is plugin method. */
  27903. + result = reiser4_find_entry(dir, dentry, &lh, ZNODE_WRITE_LOCK, entry);
  27904. + fsdata = reiser4_get_dentry_fsdata(dentry);
  27905. + if (IS_ERR(fsdata)) {
  27906. + done_lh(&lh);
  27907. + return PTR_ERR(fsdata);
  27908. + }
  27909. +
  27910. + coord = &fsdata->dec.entry_coord;
  27911. +
  27912. + assert("nikita-3404",
  27913. + get_inode_oid(dentry->d_inode) != get_inode_oid(dir) ||
  27914. + dir->i_size <= 1);
  27915. +
  27916. + coord_clear_iplug(coord);
  27917. + if (result == 0) {
  27918. + /* remove entry. Just pass control to the directory item
  27919. + plugin. */
  27920. + assert("vs-542", inode_dir_item_plugin(dir));
  27921. + reiser4_seal_done(&fsdata->dec.entry_seal);
  27922. + reiser4_adjust_dir_file(dir, dentry, fsdata->dec.pos, -1);
  27923. + result =
  27924. + WITH_COORD(coord,
  27925. + rem_entry(dir, dentry, entry, coord, &lh));
  27926. + if (result == 0) {
  27927. + if (dir->i_size >= 1)
  27928. + INODE_DEC_FIELD(dir, i_size);
  27929. + else {
  27930. + warning("nikita-2509", "Dir %llu is runt",
  27931. + (unsigned long long)
  27932. + get_inode_oid(dir));
  27933. + result = RETERR(-EIO);
  27934. + }
  27935. +
  27936. + assert("nikita-3405", dentry->d_inode->i_nlink != 1 ||
  27937. + dentry->d_inode->i_size != 2 ||
  27938. + inode_dir_plugin(dentry->d_inode) == NULL);
  27939. + }
  27940. + }
  27941. + done_lh(&lh);
  27942. +
  27943. + return result;
  27944. +}
  27945. +
  27946. +static reiser4_block_nr estimate_init(struct inode *parent,
  27947. + struct inode *object);
  27948. +static int create_dot_dotdot(struct inode *object, struct inode *parent);
  27949. +
  27950. +/* this is common implementation of init method of dir plugin
  27951. + create "." and ".." entries
  27952. +*/
  27953. +int reiser4_dir_init_common(struct inode *object, /* new directory */
  27954. + struct inode *parent, /* parent directory */
  27955. + reiser4_object_create_data * data /* info passed
  27956. + * to us, this
  27957. + * is filled by
  27958. + * reiser4()
  27959. + * syscall in
  27960. + * particular */)
  27961. +{
  27962. + reiser4_block_nr reserve;
  27963. +
  27964. + assert("nikita-680", object != NULL);
  27965. + assert("nikita-681", S_ISDIR(object->i_mode));
  27966. + assert("nikita-682", parent != NULL);
  27967. + assert("nikita-684", data != NULL);
  27968. + assert("nikita-686", data->id == DIRECTORY_FILE_PLUGIN_ID);
  27969. + assert("nikita-687", object->i_mode & S_IFDIR);
  27970. +
  27971. + reserve = estimate_init(parent, object);
  27972. + if (reiser4_grab_space(reserve, BA_CAN_COMMIT))
  27973. + return RETERR(-ENOSPC);
  27974. +
  27975. + return create_dot_dotdot(object, parent);
  27976. +}
  27977. +
  27978. +/* this is common implementation of done method of dir plugin
  27979. + remove "." entry
  27980. +*/
  27981. +int reiser4_dir_done_common(struct inode *object/* object being deleted */)
  27982. +{
  27983. + int result;
  27984. + reiser4_block_nr reserve;
  27985. + struct dentry goodby_dots;
  27986. + reiser4_dir_entry_desc entry;
  27987. +
  27988. + assert("nikita-1449", object != NULL);
  27989. +
  27990. + if (reiser4_inode_get_flag(object, REISER4_NO_SD))
  27991. + return 0;
  27992. +
  27993. + /* of course, this can be rewritten to sweep everything in one
  27994. + reiser4_cut_tree(). */
  27995. + memset(&entry, 0, sizeof entry);
  27996. +
  27997. + /* FIXME: this done method is called from reiser4_delete_dir_common
  27998. + * which reserved space already */
  27999. + reserve = inode_dir_plugin(object)->estimate.rem_entry(object);
  28000. + if (reiser4_grab_space(reserve, BA_CAN_COMMIT | BA_RESERVED))
  28001. + return RETERR(-ENOSPC);
  28002. +
  28003. + memset(&goodby_dots, 0, sizeof goodby_dots);
  28004. + entry.obj = goodby_dots.d_inode = object;
  28005. + goodby_dots.d_name.name = ".";
  28006. + goodby_dots.d_name.len = 1;
  28007. + result = reiser4_rem_entry_common(object, &goodby_dots, &entry);
  28008. + reiser4_free_dentry_fsdata(&goodby_dots);
  28009. + if (unlikely(result != 0 && result != -ENOMEM && result != -ENOENT))
  28010. + warning("nikita-2252", "Cannot remove dot of %lli: %i",
  28011. + (unsigned long long)get_inode_oid(object), result);
  28012. + return 0;
  28013. +}
  28014. +
  28015. +/* this is common implementation of attach method of dir plugin
  28016. +*/
  28017. +int reiser4_attach_common(struct inode *child UNUSED_ARG,
  28018. + struct inode *parent UNUSED_ARG)
  28019. +{
  28020. + assert("nikita-2647", child != NULL);
  28021. + assert("nikita-2648", parent != NULL);
  28022. +
  28023. + return 0;
  28024. +}
  28025. +
  28026. +/* this is common implementation of detach method of dir plugin
  28027. + remove "..", decrease nlink on parent
  28028. +*/
  28029. +int reiser4_detach_common(struct inode *object, struct inode *parent)
  28030. +{
  28031. + int result;
  28032. + struct dentry goodby_dots;
  28033. + reiser4_dir_entry_desc entry;
  28034. +
  28035. + assert("nikita-2885", object != NULL);
  28036. + assert("nikita-2886", !reiser4_inode_get_flag(object, REISER4_NO_SD));
  28037. +
  28038. + memset(&entry, 0, sizeof entry);
  28039. +
  28040. + /* NOTE-NIKITA this only works if @parent is -the- parent of
  28041. + @object, viz. object whose key is stored in dotdot
  28042. + entry. Wouldn't work with hard-links on directories. */
  28043. + memset(&goodby_dots, 0, sizeof goodby_dots);
  28044. + entry.obj = goodby_dots.d_inode = parent;
  28045. + goodby_dots.d_name.name = "..";
  28046. + goodby_dots.d_name.len = 2;
  28047. + result = reiser4_rem_entry_common(object, &goodby_dots, &entry);
  28048. + reiser4_free_dentry_fsdata(&goodby_dots);
  28049. + if (result == 0) {
  28050. + /* the dot should be the only entry remaining at this time... */
  28051. + assert("nikita-3400",
  28052. + object->i_size == 1 && object->i_nlink <= 2);
  28053. +#if 0
  28054. + /* and, together with the only name directory can have, they
  28055. + * provides for the last 2 remaining references. If we get
  28056. + * here as part of error handling during mkdir, @object
  28057. + * possibly has no name yet, so its nlink == 1. If we get here
  28058. + * from rename (targeting empty directory), it has no name
  28059. + * already, so its nlink == 1. */
  28060. + assert("nikita-3401",
  28061. + object->i_nlink == 2 || object->i_nlink == 1);
  28062. +#endif
  28063. +
  28064. + /* decrement nlink of directory removed ".." pointed
  28065. + to */
  28066. + reiser4_del_nlink(parent, NULL, 0);
  28067. + }
  28068. + return result;
  28069. +}
  28070. +
  28071. +/* this is common implementation of estimate.add_entry method of
  28072. + dir plugin
  28073. + estimation of adding entry which supposes that entry is inserting a
  28074. + unit into item
  28075. +*/
  28076. +reiser4_block_nr estimate_add_entry_common(const struct inode *inode)
  28077. +{
  28078. + return estimate_one_insert_into_item(reiser4_tree_by_inode(inode));
  28079. +}
  28080. +
  28081. +/* this is common implementation of estimate.rem_entry method of dir
  28082. + plugin
  28083. +*/
  28084. +reiser4_block_nr estimate_rem_entry_common(const struct inode *inode)
  28085. +{
  28086. + return estimate_one_item_removal(reiser4_tree_by_inode(inode));
  28087. +}
  28088. +
  28089. +/* this is common implementation of estimate.unlink method of dir
  28090. + plugin
  28091. +*/
  28092. +reiser4_block_nr
  28093. +dir_estimate_unlink_common(const struct inode *parent,
  28094. + const struct inode *object)
  28095. +{
  28096. + reiser4_block_nr res;
  28097. +
  28098. + /* hashed_rem_entry(object) */
  28099. + res = inode_dir_plugin(object)->estimate.rem_entry(object);
  28100. + /* del_nlink(parent) */
  28101. + res += 2 * inode_file_plugin(parent)->estimate.update(parent);
  28102. +
  28103. + return res;
  28104. +}
  28105. +
  28106. +/*
  28107. + * helper for inode_ops ->lookup() and dir plugin's ->get_parent()
  28108. + * methods: if @inode is a light-weight file, setup its credentials
  28109. + * that are not stored in the stat-data in this case
  28110. + */
  28111. +void check_light_weight(struct inode *inode, struct inode *parent)
  28112. +{
  28113. + if (reiser4_inode_get_flag(inode, REISER4_LIGHT_WEIGHT)) {
  28114. + inode->i_uid = parent->i_uid;
  28115. + inode->i_gid = parent->i_gid;
  28116. + /* clear light-weight flag. If inode would be read by any
  28117. + other name, [ug]id wouldn't change. */
  28118. + reiser4_inode_clr_flag(inode, REISER4_LIGHT_WEIGHT);
  28119. + }
  28120. +}
  28121. +
  28122. +/* looks for name specified in @dentry in directory @parent and if name is
  28123. + found - key of object found entry points to is stored in @entry->key */
  28124. +int reiser4_lookup_name(struct inode *parent, /* inode of directory to lookup
  28125. + * for name in */
  28126. + struct dentry *dentry, /* name to look for */
  28127. + reiser4_key * key/* place to store key */)
  28128. +{
  28129. + int result;
  28130. + coord_t *coord;
  28131. + lock_handle lh;
  28132. + const char *name;
  28133. + int len;
  28134. + reiser4_dir_entry_desc entry;
  28135. + struct reiser4_dentry_fsdata *fsdata;
  28136. +
  28137. + assert("nikita-1247", parent != NULL);
  28138. + assert("nikita-1248", dentry != NULL);
  28139. + assert("nikita-1123", dentry->d_name.name != NULL);
  28140. + assert("vs-1486",
  28141. + dentry->d_op == &get_super_private(parent->i_sb)->ops.dentry);
  28142. +
  28143. + name = dentry->d_name.name;
  28144. + len = dentry->d_name.len;
  28145. +
  28146. + if (!inode_dir_plugin(parent)->is_name_acceptable(parent, name, len))
  28147. + /* some arbitrary error code to return */
  28148. + return RETERR(-ENAMETOOLONG);
  28149. +
  28150. + fsdata = reiser4_get_dentry_fsdata(dentry);
  28151. + if (IS_ERR(fsdata))
  28152. + return PTR_ERR(fsdata);
  28153. +
  28154. + coord = &fsdata->dec.entry_coord;
  28155. + coord_clear_iplug(coord);
  28156. + init_lh(&lh);
  28157. +
  28158. + /* find entry in a directory. This is plugin method. */
  28159. + result = reiser4_find_entry(parent, dentry, &lh, ZNODE_READ_LOCK,
  28160. + &entry);
  28161. + if (result == 0) {
  28162. + /* entry was found, extract object key from it. */
  28163. + result =
  28164. + WITH_COORD(coord,
  28165. + item_plugin_by_coord(coord)->s.dir.
  28166. + extract_key(coord, key));
  28167. + }
  28168. + done_lh(&lh);
  28169. + return result;
  28170. +
  28171. +}
  28172. +
  28173. +/* helper for reiser4_dir_init_common(): estimate number of blocks to reserve */
  28174. +static reiser4_block_nr
  28175. +estimate_init(struct inode *parent, struct inode *object)
  28176. +{
  28177. + reiser4_block_nr res = 0;
  28178. +
  28179. + assert("vpf-321", parent != NULL);
  28180. + assert("vpf-322", object != NULL);
  28181. +
  28182. + /* hashed_add_entry(object) */
  28183. + res += inode_dir_plugin(object)->estimate.add_entry(object);
  28184. + /* reiser4_add_nlink(object) */
  28185. + res += inode_file_plugin(object)->estimate.update(object);
  28186. + /* hashed_add_entry(object) */
  28187. + res += inode_dir_plugin(object)->estimate.add_entry(object);
  28188. + /* reiser4_add_nlink(parent) */
  28189. + res += inode_file_plugin(parent)->estimate.update(parent);
  28190. +
  28191. + return 0;
  28192. +}
  28193. +
  28194. +/* helper function for reiser4_dir_init_common(). Create "." and ".." */
  28195. +static int create_dot_dotdot(struct inode *object/* object to create dot and
  28196. + * dotdot for */ ,
  28197. + struct inode *parent/* parent of @object */)
  28198. +{
  28199. + int result;
  28200. + struct dentry dots_entry;
  28201. + reiser4_dir_entry_desc entry;
  28202. +
  28203. + assert("nikita-688", object != NULL);
  28204. + assert("nikita-689", S_ISDIR(object->i_mode));
  28205. + assert("nikita-691", parent != NULL);
  28206. +
  28207. + /* We store dot and dotdot as normal directory entries. This is
  28208. + not necessary, because almost all information stored in them
  28209. + is already in the stat-data of directory, the only thing
  28210. + being missed is objectid of grand-parent directory that can
  28211. + easily be added there as extension.
  28212. +
  28213. + But it is done the way it is done, because not storing dot
  28214. + and dotdot will lead to the following complications:
  28215. +
  28216. + . special case handling in ->lookup().
  28217. + . addition of another extension to the sd.
  28218. + . dependency on key allocation policy for stat data.
  28219. +
  28220. + */
  28221. +
  28222. + memset(&entry, 0, sizeof entry);
  28223. + memset(&dots_entry, 0, sizeof dots_entry);
  28224. + entry.obj = dots_entry.d_inode = object;
  28225. + dots_entry.d_name.name = ".";
  28226. + dots_entry.d_name.len = 1;
  28227. + result = reiser4_add_entry_common(object, &dots_entry, NULL, &entry);
  28228. + reiser4_free_dentry_fsdata(&dots_entry);
  28229. +
  28230. + if (result == 0) {
  28231. + result = reiser4_add_nlink(object, object, 0);
  28232. + if (result == 0) {
  28233. + entry.obj = dots_entry.d_inode = parent;
  28234. + dots_entry.d_name.name = "..";
  28235. + dots_entry.d_name.len = 2;
  28236. + result = reiser4_add_entry_common(object,
  28237. + &dots_entry, NULL, &entry);
  28238. + reiser4_free_dentry_fsdata(&dots_entry);
  28239. + /* if creation of ".." failed, iput() will delete
  28240. + object with ".". */
  28241. + if (result == 0) {
  28242. + result = reiser4_add_nlink(parent, object, 0);
  28243. + if (result != 0)
  28244. + /*
  28245. + * if we failed to bump i_nlink, try
  28246. + * to remove ".."
  28247. + */
  28248. + reiser4_detach_common(object, parent);
  28249. + }
  28250. + }
  28251. + }
  28252. +
  28253. + if (result != 0) {
  28254. + /*
  28255. + * in the case of error, at least update stat-data so that,
  28256. + * ->i_nlink updates are not lingering.
  28257. + */
  28258. + reiser4_update_sd(object);
  28259. + reiser4_update_sd(parent);
  28260. + }
  28261. +
  28262. + return result;
  28263. +}
  28264. +
  28265. +/*
  28266. + * return 0 iff @coord contains a directory entry for the file with the name
  28267. + * @name.
  28268. + */
  28269. +static int
  28270. +check_item(const struct inode *dir, const coord_t *coord, const char *name)
  28271. +{
  28272. + item_plugin *iplug;
  28273. + char buf[DE_NAME_BUF_LEN];
  28274. +
  28275. + iplug = item_plugin_by_coord(coord);
  28276. + if (iplug == NULL) {
  28277. + warning("nikita-1135", "Cannot get item plugin");
  28278. + print_coord("coord", coord, 1);
  28279. + return RETERR(-EIO);
  28280. + } else if (item_id_by_coord(coord) !=
  28281. + item_id_by_plugin(inode_dir_item_plugin(dir))) {
  28282. + /* item id of current item does not match to id of items a
  28283. + directory is built of */
  28284. + warning("nikita-1136", "Wrong item plugin");
  28285. + print_coord("coord", coord, 1);
  28286. + return RETERR(-EIO);
  28287. + }
  28288. + assert("nikita-1137", iplug->s.dir.extract_name);
  28289. +
  28290. + /* Compare name stored in this entry with name we are looking for.
  28291. +
  28292. + NOTE-NIKITA Here should go code for support of something like
  28293. + unicode, code tables, etc.
  28294. + */
  28295. + return !!strcmp(name, iplug->s.dir.extract_name(coord, buf));
  28296. +}
  28297. +
  28298. +static int
  28299. +check_entry(const struct inode *dir, coord_t *coord, const struct qstr *name)
  28300. +{
  28301. + return WITH_COORD(coord, check_item(dir, coord, name->name));
  28302. +}
  28303. +
  28304. +/*
  28305. + * argument package used by entry_actor to scan entries with identical keys.
  28306. + */
  28307. +struct entry_actor_args {
  28308. + /* name we are looking for */
  28309. + const char *name;
  28310. + /* key of directory entry. entry_actor() scans through sequence of
  28311. + * items/units having the same key */
  28312. + reiser4_key *key;
  28313. + /* how many entries with duplicate key was scanned so far. */
  28314. + int non_uniq;
  28315. +#if REISER4_USE_COLLISION_LIMIT
  28316. + /* scan limit */
  28317. + int max_non_uniq;
  28318. +#endif
  28319. + /* return parameter: set to true, if ->name wasn't found */
  28320. + int not_found;
  28321. + /* what type of lock to take when moving to the next node during
  28322. + * scan */
  28323. + znode_lock_mode mode;
  28324. +
  28325. + /* last coord that was visited during scan */
  28326. + coord_t last_coord;
  28327. + /* last node locked during scan */
  28328. + lock_handle last_lh;
  28329. + /* inode of directory */
  28330. + const struct inode *inode;
  28331. +};
  28332. +
  28333. +/* Function called by reiser4_find_entry() to look for given name
  28334. + in the directory. */
  28335. +static int entry_actor(reiser4_tree * tree UNUSED_ARG /* tree being scanned */ ,
  28336. + coord_t *coord /* current coord */ ,
  28337. + lock_handle * lh /* current lock handle */ ,
  28338. + void *entry_actor_arg/* argument to scan */)
  28339. +{
  28340. + reiser4_key unit_key;
  28341. + struct entry_actor_args *args;
  28342. +
  28343. + assert("nikita-1131", tree != NULL);
  28344. + assert("nikita-1132", coord != NULL);
  28345. + assert("nikita-1133", entry_actor_arg != NULL);
  28346. +
  28347. + args = entry_actor_arg;
  28348. + ++args->non_uniq;
  28349. +#if REISER4_USE_COLLISION_LIMIT
  28350. + if (args->non_uniq > args->max_non_uniq) {
  28351. + args->not_found = 1;
  28352. + /* hash collision overflow. */
  28353. + return RETERR(-EBUSY);
  28354. + }
  28355. +#endif
  28356. +
  28357. + /*
  28358. + * did we just reach the end of the sequence of items/units with
  28359. + * identical keys?
  28360. + */
  28361. + if (!keyeq(args->key, unit_key_by_coord(coord, &unit_key))) {
  28362. + assert("nikita-1791",
  28363. + keylt(args->key, unit_key_by_coord(coord, &unit_key)));
  28364. + args->not_found = 1;
  28365. + args->last_coord.between = AFTER_UNIT;
  28366. + return 0;
  28367. + }
  28368. +
  28369. + coord_dup(&args->last_coord, coord);
  28370. + /*
  28371. + * did scan just moved to the next node?
  28372. + */
  28373. + if (args->last_lh.node != lh->node) {
  28374. + int lock_result;
  28375. +
  28376. + /*
  28377. + * if so, lock new node with the mode requested by the caller
  28378. + */
  28379. + done_lh(&args->last_lh);
  28380. + assert("nikita-1896", znode_is_any_locked(lh->node));
  28381. + lock_result = longterm_lock_znode(&args->last_lh, lh->node,
  28382. + args->mode, ZNODE_LOCK_HIPRI);
  28383. + if (lock_result != 0)
  28384. + return lock_result;
  28385. + }
  28386. + return check_item(args->inode, coord, args->name);
  28387. +}
  28388. +
  28389. +/* Look for given @name within directory @dir.
  28390. +
  28391. + This is called during lookup, creation and removal of directory
  28392. + entries and on reiser4_rename_common
  28393. +
  28394. + First calculate key that directory entry for @name would have. Search
  28395. + for this key in the tree. If such key is found, scan all items with
  28396. + the same key, checking name in each directory entry along the way.
  28397. +*/
  28398. +int reiser4_find_entry(struct inode *dir, /* directory to scan */
  28399. + struct dentry *de, /* name to search for */
  28400. + lock_handle * lh, /* resulting lock handle */
  28401. + znode_lock_mode mode, /* required lock mode */
  28402. + reiser4_dir_entry_desc * entry /* parameters of found
  28403. + directory entry */)
  28404. +{
  28405. + const struct qstr *name;
  28406. + seal_t *seal;
  28407. + coord_t *coord;
  28408. + int result;
  28409. + __u32 flags;
  28410. + struct de_location *dec;
  28411. + struct reiser4_dentry_fsdata *fsdata;
  28412. +
  28413. + assert("nikita-1130", lh != NULL);
  28414. + assert("nikita-1128", dir != NULL);
  28415. +
  28416. + name = &de->d_name;
  28417. + assert("nikita-1129", name != NULL);
  28418. +
  28419. + /* dentry private data don't require lock, because dentry
  28420. + manipulations are protected by i_mutex on parent.
  28421. +
  28422. + This is not so for inodes, because there is no -the- parent in
  28423. + inode case.
  28424. + */
  28425. + fsdata = reiser4_get_dentry_fsdata(de);
  28426. + if (IS_ERR(fsdata))
  28427. + return PTR_ERR(fsdata);
  28428. + dec = &fsdata->dec;
  28429. +
  28430. + coord = &dec->entry_coord;
  28431. + coord_clear_iplug(coord);
  28432. + seal = &dec->entry_seal;
  28433. + /* compose key of directory entry for @name */
  28434. + inode_dir_plugin(dir)->build_entry_key(dir, name, &entry->key);
  28435. +
  28436. + if (reiser4_seal_is_set(seal)) {
  28437. + /* check seal */
  28438. + result = reiser4_seal_validate(seal, coord, &entry->key,
  28439. + lh, mode, ZNODE_LOCK_LOPRI);
  28440. + if (result == 0) {
  28441. + /* key was found. Check that it is really item we are
  28442. + looking for. */
  28443. + result = check_entry(dir, coord, name);
  28444. + if (result == 0)
  28445. + return 0;
  28446. + }
  28447. + }
  28448. + flags = (mode == ZNODE_WRITE_LOCK) ? CBK_FOR_INSERT : 0;
  28449. + /*
  28450. + * find place in the tree where directory item should be located.
  28451. + */
  28452. + result = reiser4_object_lookup(dir, &entry->key, coord, lh, mode,
  28453. + FIND_EXACT, LEAF_LEVEL, LEAF_LEVEL,
  28454. + flags, NULL/*ra_info */);
  28455. + if (result == CBK_COORD_FOUND) {
  28456. + struct entry_actor_args arg;
  28457. +
  28458. + /* fast path: no hash collisions */
  28459. + result = check_entry(dir, coord, name);
  28460. + if (result == 0) {
  28461. + reiser4_seal_init(seal, coord, &entry->key);
  28462. + dec->pos = 0;
  28463. + } else if (result > 0) {
  28464. + /* Iterate through all units with the same keys. */
  28465. + arg.name = name->name;
  28466. + arg.key = &entry->key;
  28467. + arg.not_found = 0;
  28468. + arg.non_uniq = 0;
  28469. +#if REISER4_USE_COLLISION_LIMIT
  28470. + arg.max_non_uniq = max_hash_collisions(dir);
  28471. + assert("nikita-2851", arg.max_non_uniq > 1);
  28472. +#endif
  28473. + arg.mode = mode;
  28474. + arg.inode = dir;
  28475. + coord_init_zero(&arg.last_coord);
  28476. + init_lh(&arg.last_lh);
  28477. +
  28478. + result = reiser4_iterate_tree
  28479. + (reiser4_tree_by_inode(dir),
  28480. + coord, lh,
  28481. + entry_actor, &arg, mode, 1);
  28482. + /* if end of the tree or extent was reached during
  28483. + scanning. */
  28484. + if (arg.not_found || (result == -E_NO_NEIGHBOR)) {
  28485. + /* step back */
  28486. + done_lh(lh);
  28487. +
  28488. + result = zload(arg.last_coord.node);
  28489. + if (result == 0) {
  28490. + coord_clear_iplug(&arg.last_coord);
  28491. + coord_dup(coord, &arg.last_coord);
  28492. + move_lh(lh, &arg.last_lh);
  28493. + result = RETERR(-ENOENT);
  28494. + zrelse(arg.last_coord.node);
  28495. + --arg.non_uniq;
  28496. + }
  28497. + }
  28498. +
  28499. + done_lh(&arg.last_lh);
  28500. + if (result == 0)
  28501. + reiser4_seal_init(seal, coord, &entry->key);
  28502. +
  28503. + if (result == 0 || result == -ENOENT) {
  28504. + assert("nikita-2580", arg.non_uniq > 0);
  28505. + dec->pos = arg.non_uniq - 1;
  28506. + }
  28507. + }
  28508. + } else
  28509. + dec->pos = -1;
  28510. + return result;
  28511. +}
  28512. +
  28513. +/*
  28514. + Local variables:
  28515. + c-indentation-style: "K&R"
  28516. + mode-name: "LC"
  28517. + c-basic-offset: 8
  28518. + tab-width: 8
  28519. + fill-column: 120
  28520. + scroll-step: 1
  28521. + End:
  28522. +*/
  28523. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/disk_format/disk_format40.c linux-4.14.2/fs/reiser4/plugin/disk_format/disk_format40.c
  28524. --- linux-4.14.2.orig/fs/reiser4/plugin/disk_format/disk_format40.c 1970-01-01 01:00:00.000000000 +0100
  28525. +++ linux-4.14.2/fs/reiser4/plugin/disk_format/disk_format40.c 2017-11-26 22:13:09.000000000 +0100
  28526. @@ -0,0 +1,664 @@
  28527. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  28528. +
  28529. +#include "../../debug.h"
  28530. +#include "../../dformat.h"
  28531. +#include "../../key.h"
  28532. +#include "../node/node.h"
  28533. +#include "../space/space_allocator.h"
  28534. +#include "disk_format40.h"
  28535. +#include "../plugin.h"
  28536. +#include "../../txnmgr.h"
  28537. +#include "../../jnode.h"
  28538. +#include "../../tree.h"
  28539. +#include "../../super.h"
  28540. +#include "../../wander.h"
  28541. +#include "../../inode.h"
  28542. +#include "../../ktxnmgrd.h"
  28543. +#include "../../status_flags.h"
  28544. +
  28545. +#include <linux/types.h> /* for __u?? */
  28546. +#include <linux/fs.h> /* for struct super_block */
  28547. +#include <linux/buffer_head.h>
  28548. +
  28549. +/* reiser 4.0 default disk layout */
  28550. +
  28551. +/* Amount of free blocks needed to perform release_format40 when fs gets
  28552. + mounted RW: 1 for SB, 1 for non-leaves in overwrite set, 2 for tx header
  28553. + & tx record. */
  28554. +#define RELEASE_RESERVED 4
  28555. +
  28556. +/* This flag indicates that backup should be updated
  28557. + (the update is performed by fsck) */
  28558. +#define FORMAT40_UPDATE_BACKUP (1 << 31)
  28559. +
  28560. +/* functions to access fields of format40_disk_super_block */
  28561. +static __u64 get_format40_block_count(const format40_disk_super_block * sb)
  28562. +{
  28563. + return le64_to_cpu(get_unaligned(&sb->block_count));
  28564. +}
  28565. +
  28566. +static __u64 get_format40_free_blocks(const format40_disk_super_block * sb)
  28567. +{
  28568. + return le64_to_cpu(get_unaligned(&sb->free_blocks));
  28569. +}
  28570. +
  28571. +static __u64 get_format40_root_block(const format40_disk_super_block * sb)
  28572. +{
  28573. + return le64_to_cpu(get_unaligned(&sb->root_block));
  28574. +}
  28575. +
  28576. +static __u16 get_format40_tree_height(const format40_disk_super_block * sb)
  28577. +{
  28578. + return le16_to_cpu(get_unaligned(&sb->tree_height));
  28579. +}
  28580. +
  28581. +static __u64 get_format40_file_count(const format40_disk_super_block * sb)
  28582. +{
  28583. + return le64_to_cpu(get_unaligned(&sb->file_count));
  28584. +}
  28585. +
  28586. +static __u64 get_format40_oid(const format40_disk_super_block * sb)
  28587. +{
  28588. + return le64_to_cpu(get_unaligned(&sb->oid));
  28589. +}
  28590. +
  28591. +static __u32 get_format40_mkfs_id(const format40_disk_super_block * sb)
  28592. +{
  28593. + return le32_to_cpu(get_unaligned(&sb->mkfs_id));
  28594. +}
  28595. +
  28596. +static __u32 get_format40_node_plugin_id(const format40_disk_super_block * sb)
  28597. +{
  28598. + return le32_to_cpu(get_unaligned(&sb->node_pid));
  28599. +}
  28600. +
  28601. +static __u64 get_format40_flags(const format40_disk_super_block * sb)
  28602. +{
  28603. + return le64_to_cpu(get_unaligned(&sb->flags));
  28604. +}
  28605. +
  28606. +static __u32 get_format40_version(const format40_disk_super_block * sb)
  28607. +{
  28608. + return le32_to_cpu(get_unaligned(&sb->version)) &
  28609. + ~FORMAT40_UPDATE_BACKUP;
  28610. +}
  28611. +
  28612. +static int update_backup_version(const format40_disk_super_block * sb)
  28613. +{
  28614. + return (le32_to_cpu(get_unaligned(&sb->version)) &
  28615. + FORMAT40_UPDATE_BACKUP);
  28616. +}
  28617. +
  28618. +static int update_disk_version_minor(const format40_disk_super_block * sb)
  28619. +{
  28620. + return (get_format40_version(sb) < get_release_number_minor());
  28621. +}
  28622. +
  28623. +static int incomplete_compatibility(const format40_disk_super_block * sb)
  28624. +{
  28625. + return (get_format40_version(sb) > get_release_number_minor());
  28626. +}
  28627. +
  28628. +static format40_super_info *get_sb_info(struct super_block *super)
  28629. +{
  28630. + return &get_super_private(super)->u.format40;
  28631. +}
  28632. +
  28633. +static int consult_diskmap(struct super_block *s)
  28634. +{
  28635. + format40_super_info *info;
  28636. + journal_location *jloc;
  28637. +
  28638. + info = get_sb_info(s);
  28639. + jloc = &get_super_private(s)->jloc;
  28640. + /* Default format-specific locations, if there is nothing in
  28641. + * diskmap */
  28642. + jloc->footer = FORMAT40_JOURNAL_FOOTER_BLOCKNR;
  28643. + jloc->header = FORMAT40_JOURNAL_HEADER_BLOCKNR;
  28644. + info->loc.super = FORMAT40_OFFSET / s->s_blocksize;
  28645. +#ifdef CONFIG_REISER4_BADBLOCKS
  28646. + reiser4_get_diskmap_value(FORMAT40_PLUGIN_DISKMAP_ID, FORMAT40_JF,
  28647. + &jloc->footer);
  28648. + reiser4_get_diskmap_value(FORMAT40_PLUGIN_DISKMAP_ID, FORMAT40_JH,
  28649. + &jloc->header);
  28650. + reiser4_get_diskmap_value(FORMAT40_PLUGIN_DISKMAP_ID, FORMAT40_SUPER,
  28651. + &info->loc.super);
  28652. +#endif
  28653. + return 0;
  28654. +}
  28655. +
  28656. +/* find any valid super block of disk_format40 (even if the first
  28657. + super block is destroyed), will change block numbers of actual journal header/footer (jf/jh)
  28658. + if needed */
  28659. +static struct buffer_head *find_a_disk_format40_super_block(struct super_block
  28660. + *s)
  28661. +{
  28662. + struct buffer_head *super_bh;
  28663. + format40_disk_super_block *disk_sb;
  28664. + format40_super_info *info;
  28665. +
  28666. + assert("umka-487", s != NULL);
  28667. +
  28668. + info = get_sb_info(s);
  28669. +
  28670. + super_bh = sb_bread(s, info->loc.super);
  28671. + if (super_bh == NULL)
  28672. + return ERR_PTR(RETERR(-EIO));
  28673. +
  28674. + disk_sb = (format40_disk_super_block *) super_bh->b_data;
  28675. + if (strncmp(disk_sb->magic, FORMAT40_MAGIC, sizeof(FORMAT40_MAGIC))) {
  28676. + brelse(super_bh);
  28677. + return ERR_PTR(RETERR(-EINVAL));
  28678. + }
  28679. +
  28680. + reiser4_set_block_count(s, le64_to_cpu(get_unaligned(&disk_sb->block_count)));
  28681. + reiser4_set_data_blocks(s, le64_to_cpu(get_unaligned(&disk_sb->block_count)) -
  28682. + le64_to_cpu(get_unaligned(&disk_sb->free_blocks)));
  28683. + reiser4_set_free_blocks(s, le64_to_cpu(get_unaligned(&disk_sb->free_blocks)));
  28684. +
  28685. + return super_bh;
  28686. +}
  28687. +
  28688. +/* find the most recent version of super block. This is called after journal is
  28689. + replayed */
  28690. +static struct buffer_head *read_super_block(struct super_block *s UNUSED_ARG)
  28691. +{
  28692. + /* Here the most recent superblock copy has to be read. However, as
  28693. + journal replay isn't complete, we are using
  28694. + find_a_disk_format40_super_block() function. */
  28695. + return find_a_disk_format40_super_block(s);
  28696. +}
  28697. +
  28698. +static int get_super_jnode(struct super_block *s)
  28699. +{
  28700. + reiser4_super_info_data *sbinfo = get_super_private(s);
  28701. + jnode *sb_jnode;
  28702. + int ret;
  28703. +
  28704. + sb_jnode = reiser4_alloc_io_head(&get_sb_info(s)->loc.super);
  28705. +
  28706. + ret = jload(sb_jnode);
  28707. +
  28708. + if (ret) {
  28709. + reiser4_drop_io_head(sb_jnode);
  28710. + return ret;
  28711. + }
  28712. +
  28713. + pin_jnode_data(sb_jnode);
  28714. + jrelse(sb_jnode);
  28715. +
  28716. + sbinfo->u.format40.sb_jnode = sb_jnode;
  28717. +
  28718. + return 0;
  28719. +}
  28720. +
  28721. +static void done_super_jnode(struct super_block *s)
  28722. +{
  28723. + jnode *sb_jnode = get_super_private(s)->u.format40.sb_jnode;
  28724. +
  28725. + if (sb_jnode) {
  28726. + unpin_jnode_data(sb_jnode);
  28727. + reiser4_drop_io_head(sb_jnode);
  28728. + }
  28729. +}
  28730. +
  28731. +typedef enum format40_init_stage {
  28732. + NONE_DONE = 0,
  28733. + CONSULT_DISKMAP,
  28734. + FIND_A_SUPER,
  28735. + INIT_JOURNAL_INFO,
  28736. + INIT_STATUS,
  28737. + JOURNAL_REPLAY,
  28738. + READ_SUPER,
  28739. + KEY_CHECK,
  28740. + INIT_OID,
  28741. + INIT_TREE,
  28742. + JOURNAL_RECOVER,
  28743. + INIT_SA,
  28744. + INIT_JNODE,
  28745. + ALL_DONE
  28746. +} format40_init_stage;
  28747. +
  28748. +static format40_disk_super_block *copy_sb(const struct buffer_head *super_bh)
  28749. +{
  28750. + format40_disk_super_block *sb_copy;
  28751. +
  28752. + sb_copy = kmalloc(sizeof(format40_disk_super_block),
  28753. + reiser4_ctx_gfp_mask_get());
  28754. + if (sb_copy == NULL)
  28755. + return ERR_PTR(RETERR(-ENOMEM));
  28756. + memcpy(sb_copy, ((format40_disk_super_block *) super_bh->b_data),
  28757. + sizeof(format40_disk_super_block));
  28758. + return sb_copy;
  28759. +}
  28760. +
  28761. +static int check_key_format(const format40_disk_super_block *sb_copy)
  28762. +{
  28763. + if (!equi(REISER4_LARGE_KEY,
  28764. + get_format40_flags(sb_copy) & (1 << FORMAT40_LARGE_KEYS))) {
  28765. + warning("nikita-3228", "Key format mismatch. "
  28766. + "Only %s keys are supported.",
  28767. + REISER4_LARGE_KEY ? "large" : "small");
  28768. + return RETERR(-EINVAL);
  28769. + }
  28770. + return 0;
  28771. +}
  28772. +
  28773. +/**
  28774. + * try_init_format40
  28775. + * @super:
  28776. + * @stage:
  28777. + *
  28778. + */
  28779. +static int try_init_format40(struct super_block *super,
  28780. + format40_init_stage *stage)
  28781. +{
  28782. + int result;
  28783. + struct buffer_head *super_bh;
  28784. + reiser4_super_info_data *sbinfo;
  28785. + format40_disk_super_block *sb_copy;
  28786. + tree_level height;
  28787. + reiser4_block_nr root_block;
  28788. + node_plugin *nplug;
  28789. +
  28790. + assert("vs-475", super != NULL);
  28791. + assert("vs-474", get_super_private(super));
  28792. +
  28793. + *stage = NONE_DONE;
  28794. +
  28795. + result = consult_diskmap(super);
  28796. + if (result)
  28797. + return result;
  28798. + *stage = CONSULT_DISKMAP;
  28799. +
  28800. + super_bh = find_a_disk_format40_super_block(super);
  28801. + if (IS_ERR(super_bh))
  28802. + return PTR_ERR(super_bh);
  28803. + brelse(super_bh);
  28804. + *stage = FIND_A_SUPER;
  28805. +
  28806. + /* ok, we are sure that filesystem format is a format40 format */
  28807. +
  28808. + /* map jnodes for journal control blocks (header, footer) to disk */
  28809. + result = reiser4_init_journal_info(super);
  28810. + if (result)
  28811. + return result;
  28812. + *stage = INIT_JOURNAL_INFO;
  28813. +
  28814. + /* ok, we are sure that filesystem format is a format40 format */
  28815. + /* Now check it's state */
  28816. + result = reiser4_status_init(FORMAT40_STATUS_BLOCKNR);
  28817. + if (result != 0 && result != -EINVAL)
  28818. + /* -EINVAL means there is no magic, so probably just old
  28819. + * fs. */
  28820. + return result;
  28821. + *stage = INIT_STATUS;
  28822. +
  28823. + result = reiser4_status_query(NULL, NULL);
  28824. + if (result == REISER4_STATUS_MOUNT_WARN)
  28825. + notice("vpf-1363", "Warning: mounting %s with errors.",
  28826. + super->s_id);
  28827. + if (result == REISER4_STATUS_MOUNT_RO) {
  28828. + notice("vpf-1364", "Warning: mounting %s with fatal errors,"
  28829. + " forcing read-only mount.", super->s_id);
  28830. + super->s_flags |= MS_RDONLY;
  28831. + }
  28832. + result = reiser4_journal_replay(super);
  28833. + if (result)
  28834. + return result;
  28835. + *stage = JOURNAL_REPLAY;
  28836. +
  28837. + super_bh = read_super_block(super);
  28838. + if (IS_ERR(super_bh))
  28839. + return PTR_ERR(super_bh);
  28840. + *stage = READ_SUPER;
  28841. +
  28842. + /* allocate and make a copy of format40_disk_super_block */
  28843. + sb_copy = copy_sb(super_bh);
  28844. + brelse(super_bh);
  28845. +
  28846. + if (IS_ERR(sb_copy))
  28847. + return PTR_ERR(sb_copy);
  28848. + printk("reiser4: %s: found disk format 4.0.%u.\n",
  28849. + super->s_id,
  28850. + get_format40_version(sb_copy));
  28851. + if (incomplete_compatibility(sb_copy))
  28852. + printk("reiser4: %s: format version number (4.0.%u) is "
  28853. + "greater than release number (4.%u.%u) of reiser4 "
  28854. + "kernel module. Some objects of the volume can be "
  28855. + "inaccessible.\n",
  28856. + super->s_id,
  28857. + get_format40_version(sb_copy),
  28858. + get_release_number_major(),
  28859. + get_release_number_minor());
  28860. + /* make sure that key format of kernel and filesystem match */
  28861. + result = check_key_format(sb_copy);
  28862. + if (result) {
  28863. + kfree(sb_copy);
  28864. + return result;
  28865. + }
  28866. + *stage = KEY_CHECK;
  28867. +
  28868. + result = oid_init_allocator(super, get_format40_file_count(sb_copy),
  28869. + get_format40_oid(sb_copy));
  28870. + if (result) {
  28871. + kfree(sb_copy);
  28872. + return result;
  28873. + }
  28874. + *stage = INIT_OID;
  28875. +
  28876. + /* get things necessary to init reiser4_tree */
  28877. + root_block = get_format40_root_block(sb_copy);
  28878. + height = get_format40_tree_height(sb_copy);
  28879. + nplug = node_plugin_by_id(get_format40_node_plugin_id(sb_copy));
  28880. +
  28881. + /* initialize reiser4_super_info_data */
  28882. + sbinfo = get_super_private(super);
  28883. + assert("", sbinfo->tree.super == super);
  28884. + /* init reiser4_tree for the filesystem */
  28885. + result = reiser4_init_tree(&sbinfo->tree, &root_block, height, nplug);
  28886. + if (result) {
  28887. + kfree(sb_copy);
  28888. + return result;
  28889. + }
  28890. + *stage = INIT_TREE;
  28891. +
  28892. + /*
  28893. + * initialize reiser4_super_info_data with data from format40 super
  28894. + * block
  28895. + */
  28896. + sbinfo->default_uid = 0;
  28897. + sbinfo->default_gid = 0;
  28898. + sbinfo->mkfs_id = get_format40_mkfs_id(sb_copy);
  28899. + /* number of blocks in filesystem and reserved space */
  28900. + reiser4_set_block_count(super, get_format40_block_count(sb_copy));
  28901. + sbinfo->blocks_free = get_format40_free_blocks(sb_copy);
  28902. + sbinfo->version = get_format40_version(sb_copy);
  28903. +
  28904. + if (update_backup_version(sb_copy))
  28905. + printk("reiser4: %s: use 'fsck.reiser4 --fix' "
  28906. + "to complete disk format upgrade.\n", super->s_id);
  28907. + kfree(sb_copy);
  28908. +
  28909. + sbinfo->fsuid = 0;
  28910. + sbinfo->fs_flags |= (1 << REISER4_ADG); /* hard links for directories
  28911. + * are not supported */
  28912. + sbinfo->fs_flags |= (1 << REISER4_ONE_NODE_PLUGIN); /* all nodes in
  28913. + * layout 40 are
  28914. + * of one
  28915. + * plugin */
  28916. + /* sbinfo->tmgr is initialized already */
  28917. +
  28918. + /* recover sb data which were logged separately from sb block */
  28919. +
  28920. + /* NOTE-NIKITA: reiser4_journal_recover_sb_data() calls
  28921. + * oid_init_allocator() and reiser4_set_free_blocks() with new
  28922. + * data. What's the reason to call them above? */
  28923. + result = reiser4_journal_recover_sb_data(super);
  28924. + if (result != 0)
  28925. + return result;
  28926. + *stage = JOURNAL_RECOVER;
  28927. +
  28928. + /*
  28929. + * Set number of used blocks. The number of used blocks is not stored
  28930. + * neither in on-disk super block nor in the journal footer blocks. At
  28931. + * this moment actual values of total blocks and free block counters
  28932. + * are set in the reiser4 super block (in-memory structure) and we can
  28933. + * calculate number of used blocks from them.
  28934. + */
  28935. + reiser4_set_data_blocks(super,
  28936. + reiser4_block_count(super) -
  28937. + reiser4_free_blocks(super));
  28938. +
  28939. +#if REISER4_DEBUG
  28940. + sbinfo->min_blocks_used = 16 /* reserved area */ +
  28941. + 2 /* super blocks */ +
  28942. + 2 /* journal footer and header */ ;
  28943. +#endif
  28944. +
  28945. + /* init disk space allocator */
  28946. + result = sa_init_allocator(reiser4_get_space_allocator(super),
  28947. + super, NULL);
  28948. + if (result)
  28949. + return result;
  28950. + *stage = INIT_SA;
  28951. +
  28952. + result = get_super_jnode(super);
  28953. + if (result == 0)
  28954. + *stage = ALL_DONE;
  28955. + return result;
  28956. +}
  28957. +
  28958. +/* plugin->u.format.get_ready */
  28959. +int init_format_format40(struct super_block *s, void *data UNUSED_ARG)
  28960. +{
  28961. + int result;
  28962. + format40_init_stage stage;
  28963. +
  28964. + result = try_init_format40(s, &stage);
  28965. + switch (stage) {
  28966. + case ALL_DONE:
  28967. + assert("nikita-3458", result == 0);
  28968. + break;
  28969. + case INIT_JNODE:
  28970. + done_super_jnode(s);
  28971. + case INIT_SA:
  28972. + sa_destroy_allocator(reiser4_get_space_allocator(s), s);
  28973. + case JOURNAL_RECOVER:
  28974. + case INIT_TREE:
  28975. + reiser4_done_tree(&get_super_private(s)->tree);
  28976. + case INIT_OID:
  28977. + case KEY_CHECK:
  28978. + case READ_SUPER:
  28979. + case JOURNAL_REPLAY:
  28980. + case INIT_STATUS:
  28981. + reiser4_status_finish();
  28982. + case INIT_JOURNAL_INFO:
  28983. + reiser4_done_journal_info(s);
  28984. + case FIND_A_SUPER:
  28985. + case CONSULT_DISKMAP:
  28986. + case NONE_DONE:
  28987. + break;
  28988. + default:
  28989. + impossible("nikita-3457", "init stage: %i", stage);
  28990. + }
  28991. +
  28992. + if (!rofs_super(s) && reiser4_free_blocks(s) < RELEASE_RESERVED)
  28993. + return RETERR(-ENOSPC);
  28994. +
  28995. + return result;
  28996. +}
  28997. +
  28998. +static void pack_format40_super(const struct super_block *s, char *data)
  28999. +{
  29000. + format40_disk_super_block *super_data =
  29001. + (format40_disk_super_block *) data;
  29002. +
  29003. + reiser4_super_info_data *sbinfo = get_super_private(s);
  29004. +
  29005. + assert("zam-591", data != NULL);
  29006. +
  29007. + put_unaligned(cpu_to_le64(reiser4_free_committed_blocks(s)),
  29008. + &super_data->free_blocks);
  29009. +
  29010. + put_unaligned(cpu_to_le64(sbinfo->tree.root_block),
  29011. + &super_data->root_block);
  29012. +
  29013. + put_unaligned(cpu_to_le64(oid_next(s)),
  29014. + &super_data->oid);
  29015. +
  29016. + put_unaligned(cpu_to_le64(oids_used(s)),
  29017. + &super_data->file_count);
  29018. +
  29019. + put_unaligned(cpu_to_le16(sbinfo->tree.height),
  29020. + &super_data->tree_height);
  29021. +
  29022. + if (update_disk_version_minor(super_data)) {
  29023. + __u32 version = PLUGIN_LIBRARY_VERSION | FORMAT40_UPDATE_BACKUP;
  29024. +
  29025. + put_unaligned(cpu_to_le32(version), &super_data->version);
  29026. + }
  29027. +}
  29028. +
  29029. +/* plugin->u.format.log_super
  29030. + return a jnode which should be added to transaction when the super block
  29031. + gets logged */
  29032. +jnode *log_super_format40(struct super_block *s)
  29033. +{
  29034. + jnode *sb_jnode;
  29035. +
  29036. + sb_jnode = get_super_private(s)->u.format40.sb_jnode;
  29037. +
  29038. + jload(sb_jnode);
  29039. +
  29040. + pack_format40_super(s, jdata(sb_jnode));
  29041. +
  29042. + jrelse(sb_jnode);
  29043. +
  29044. + return sb_jnode;
  29045. +}
  29046. +
  29047. +/* plugin->u.format.release */
  29048. +int release_format40(struct super_block *s)
  29049. +{
  29050. + int ret;
  29051. + reiser4_super_info_data *sbinfo;
  29052. +
  29053. + sbinfo = get_super_private(s);
  29054. + assert("zam-579", sbinfo != NULL);
  29055. +
  29056. + if (!rofs_super(s)) {
  29057. + ret = reiser4_capture_super_block(s);
  29058. + if (ret != 0)
  29059. + warning("vs-898",
  29060. + "reiser4_capture_super_block failed: %d",
  29061. + ret);
  29062. +
  29063. + ret = txnmgr_force_commit_all(s, 1);
  29064. + if (ret != 0)
  29065. + warning("jmacd-74438", "txn_force failed: %d", ret);
  29066. +
  29067. + all_grabbed2free();
  29068. + }
  29069. +
  29070. + sa_destroy_allocator(&sbinfo->space_allocator, s);
  29071. + reiser4_done_journal_info(s);
  29072. + done_super_jnode(s);
  29073. +
  29074. + rcu_barrier();
  29075. + reiser4_done_tree(&sbinfo->tree);
  29076. + /* call finish_rcu(), because some znode were "released" in
  29077. + * reiser4_done_tree(). */
  29078. + rcu_barrier();
  29079. +
  29080. + return 0;
  29081. +}
  29082. +
  29083. +#define FORMAT40_ROOT_LOCALITY 41
  29084. +#define FORMAT40_ROOT_OBJECTID 42
  29085. +
  29086. +/* plugin->u.format.root_dir_key */
  29087. +const reiser4_key *root_dir_key_format40(const struct super_block *super
  29088. + UNUSED_ARG)
  29089. +{
  29090. + static const reiser4_key FORMAT40_ROOT_DIR_KEY = {
  29091. + .el = {
  29092. + __constant_cpu_to_le64((FORMAT40_ROOT_LOCALITY << 4) | KEY_SD_MINOR),
  29093. +#if REISER4_LARGE_KEY
  29094. + ON_LARGE_KEY(0ull,)
  29095. +#endif
  29096. + __constant_cpu_to_le64(FORMAT40_ROOT_OBJECTID),
  29097. + 0ull
  29098. + }
  29099. + };
  29100. +
  29101. + return &FORMAT40_ROOT_DIR_KEY;
  29102. +}
  29103. +
  29104. +/* plugin->u.format.check_open.
  29105. + Check the opened object for validness. For now it checks for the valid oid &
  29106. + locality only, can be improved later and it its work may depend on the mount
  29107. + options. */
  29108. +int check_open_format40(const struct inode *object)
  29109. +{
  29110. + oid_t max, oid;
  29111. +
  29112. + max = oid_next(object->i_sb) - 1;
  29113. +
  29114. + /* Check the oid. */
  29115. + oid = get_inode_oid(object);
  29116. + if (oid > max) {
  29117. + warning("vpf-1360", "The object with the oid %llu "
  29118. + "greater then the max used oid %llu found.",
  29119. + (unsigned long long)oid, (unsigned long long)max);
  29120. +
  29121. + return RETERR(-EIO);
  29122. + }
  29123. +
  29124. + /* Check the locality. */
  29125. + oid = reiser4_inode_data(object)->locality_id;
  29126. + if (oid > max) {
  29127. + warning("vpf-1361", "The object with the locality %llu "
  29128. + "greater then the max used oid %llu found.",
  29129. + (unsigned long long)oid, (unsigned long long)max);
  29130. +
  29131. + return RETERR(-EIO);
  29132. + }
  29133. +
  29134. + return 0;
  29135. +}
  29136. +
  29137. +/*
  29138. + * plugin->u.format.version_update
  29139. + * Upgrade minor disk format version number
  29140. + */
  29141. +int version_update_format40(struct super_block *super) {
  29142. + txn_handle * trans;
  29143. + lock_handle lh;
  29144. + txn_atom *atom;
  29145. + int ret;
  29146. +
  29147. + /* Nothing to do if RO mount or the on-disk version is not less. */
  29148. + if (super->s_flags & MS_RDONLY)
  29149. + return 0;
  29150. +
  29151. + if (get_super_private(super)->version >= get_release_number_minor())
  29152. + return 0;
  29153. +
  29154. + printk("reiser4: %s: upgrading disk format to 4.0.%u.\n",
  29155. + super->s_id,
  29156. + get_release_number_minor());
  29157. + printk("reiser4: %s: use 'fsck.reiser4 --fix' "
  29158. + "to complete disk format upgrade.\n", super->s_id);
  29159. +
  29160. + /* Mark the uber znode dirty to call log_super on write_logs. */
  29161. + init_lh(&lh);
  29162. + ret = get_uber_znode(reiser4_get_tree(super), ZNODE_WRITE_LOCK,
  29163. + ZNODE_LOCK_HIPRI, &lh);
  29164. + if (ret != 0)
  29165. + return ret;
  29166. +
  29167. + znode_make_dirty(lh.node);
  29168. + done_lh(&lh);
  29169. +
  29170. + /* Update the backup blocks. */
  29171. +
  29172. + /* Force write_logs immediately. */
  29173. + trans = get_current_context()->trans;
  29174. + atom = get_current_atom_locked();
  29175. + assert("vpf-1906", atom != NULL);
  29176. +
  29177. + spin_lock_txnh(trans);
  29178. + return force_commit_atom(trans);
  29179. +}
  29180. +
  29181. +/* Make Linus happy.
  29182. + Local variables:
  29183. + c-indentation-style: "K&R"
  29184. + mode-name: "LC"
  29185. + c-basic-offset: 8
  29186. + tab-width: 8
  29187. + fill-column: 120
  29188. + scroll-step: 1
  29189. + End:
  29190. +*/
  29191. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/disk_format/disk_format40.h linux-4.14.2/fs/reiser4/plugin/disk_format/disk_format40.h
  29192. --- linux-4.14.2.orig/fs/reiser4/plugin/disk_format/disk_format40.h 1970-01-01 01:00:00.000000000 +0100
  29193. +++ linux-4.14.2/fs/reiser4/plugin/disk_format/disk_format40.h 2017-11-26 22:13:09.000000000 +0100
  29194. @@ -0,0 +1,111 @@
  29195. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  29196. +
  29197. +/* this file contains:
  29198. + - definition of ondisk super block of standart disk layout for
  29199. + reiser 4.0 (layout 40)
  29200. + - definition of layout 40 specific portion of in-core super block
  29201. + - declarations of functions implementing methods of layout plugin
  29202. + for layout 40
  29203. + - declarations of functions used to get/set fields in layout 40 super block
  29204. +*/
  29205. +
  29206. +#ifndef __DISK_FORMAT40_H__
  29207. +#define __DISK_FORMAT40_H__
  29208. +
  29209. +/* magic for default reiser4 layout */
  29210. +#define FORMAT40_MAGIC "ReIsEr40FoRmAt"
  29211. +#define FORMAT40_OFFSET (REISER4_MASTER_OFFSET + PAGE_SIZE)
  29212. +
  29213. +#include "../../dformat.h"
  29214. +
  29215. +#include <linux/fs.h> /* for struct super_block */
  29216. +
  29217. +typedef enum {
  29218. + FORMAT40_LARGE_KEYS
  29219. +} format40_flags;
  29220. +
  29221. +/* ondisk super block for format 40. It is 512 bytes long */
  29222. +typedef struct format40_disk_super_block {
  29223. + /* 0 */ d64 block_count;
  29224. + /* number of block in a filesystem */
  29225. + /* 8 */ d64 free_blocks;
  29226. + /* number of free blocks */
  29227. + /* 16 */ d64 root_block;
  29228. + /* filesystem tree root block */
  29229. + /* 24 */ d64 oid;
  29230. + /* smallest free objectid */
  29231. + /* 32 */ d64 file_count;
  29232. + /* number of files in a filesystem */
  29233. + /* 40 */ d64 flushes;
  29234. + /* number of times super block was
  29235. + flushed. Needed if format 40
  29236. + will have few super blocks */
  29237. + /* 48 */ d32 mkfs_id;
  29238. + /* unique identifier of fs */
  29239. + /* 52 */ char magic[16];
  29240. + /* magic string ReIsEr40FoRmAt */
  29241. + /* 68 */ d16 tree_height;
  29242. + /* height of filesystem tree */
  29243. + /* 70 */ d16 formatting_policy;
  29244. + /* not used anymore */
  29245. + /* 72 */ d64 flags;
  29246. + /* 80 */ d32 version;
  29247. + /* on-disk format version number
  29248. + initially assigned by mkfs as the greatest format40
  29249. + version number supported by reiser4progs and updated
  29250. + in mount time in accordance with the greatest format40
  29251. + version number supported by kernel.
  29252. + Is used by fsck to catch possible corruption and
  29253. + for various compatibility issues */
  29254. + /* 84 */ d32 node_pid;
  29255. + /* node plugin id */
  29256. + /* 88 */ char not_used[424];
  29257. +} format40_disk_super_block;
  29258. +
  29259. +/* format 40 specific part of reiser4_super_info_data */
  29260. +typedef struct format40_super_info {
  29261. +/* format40_disk_super_block actual_sb; */
  29262. + jnode *sb_jnode;
  29263. + struct {
  29264. + reiser4_block_nr super;
  29265. + } loc;
  29266. +} format40_super_info;
  29267. +
  29268. +/* Defines for journal header and footer respectively. */
  29269. +#define FORMAT40_JOURNAL_HEADER_BLOCKNR \
  29270. + ((REISER4_MASTER_OFFSET / PAGE_SIZE) + 3)
  29271. +
  29272. +#define FORMAT40_JOURNAL_FOOTER_BLOCKNR \
  29273. + ((REISER4_MASTER_OFFSET / PAGE_SIZE) + 4)
  29274. +
  29275. +#define FORMAT40_STATUS_BLOCKNR \
  29276. + ((REISER4_MASTER_OFFSET / PAGE_SIZE) + 5)
  29277. +
  29278. +/* Diskmap declarations */
  29279. +#define FORMAT40_PLUGIN_DISKMAP_ID ((REISER4_FORMAT_PLUGIN_TYPE<<16) | (FORMAT40_ID))
  29280. +#define FORMAT40_SUPER 1
  29281. +#define FORMAT40_JH 2
  29282. +#define FORMAT40_JF 3
  29283. +
  29284. +/* declarations of functions implementing methods of layout plugin for
  29285. + format 40. The functions theirself are in disk_format40.c */
  29286. +extern int init_format_format40(struct super_block *, void *data);
  29287. +extern const reiser4_key *root_dir_key_format40(const struct super_block *);
  29288. +extern int release_format40(struct super_block *s);
  29289. +extern jnode *log_super_format40(struct super_block *s);
  29290. +extern int check_open_format40(const struct inode *object);
  29291. +extern int version_update_format40(struct super_block *super);
  29292. +
  29293. +/* __DISK_FORMAT40_H__ */
  29294. +#endif
  29295. +
  29296. +/* Make Linus happy.
  29297. + Local variables:
  29298. + c-indentation-style: "K&R"
  29299. + mode-name: "LC"
  29300. + c-basic-offset: 8
  29301. + tab-width: 8
  29302. + fill-column: 120
  29303. + scroll-step: 1
  29304. + End:
  29305. +*/
  29306. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/disk_format/disk_format.c linux-4.14.2/fs/reiser4/plugin/disk_format/disk_format.c
  29307. --- linux-4.14.2.orig/fs/reiser4/plugin/disk_format/disk_format.c 1970-01-01 01:00:00.000000000 +0100
  29308. +++ linux-4.14.2/fs/reiser4/plugin/disk_format/disk_format.c 2017-11-26 22:13:09.000000000 +0100
  29309. @@ -0,0 +1,38 @@
  29310. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  29311. +
  29312. +#include "../../debug.h"
  29313. +#include "../plugin_header.h"
  29314. +#include "disk_format40.h"
  29315. +#include "disk_format.h"
  29316. +#include "../plugin.h"
  29317. +
  29318. +/* initialization of disk layout plugins */
  29319. +disk_format_plugin format_plugins[LAST_FORMAT_ID] = {
  29320. + [FORMAT40_ID] = {
  29321. + .h = {
  29322. + .type_id = REISER4_FORMAT_PLUGIN_TYPE,
  29323. + .id = FORMAT40_ID,
  29324. + .pops = NULL,
  29325. + .label = "reiser40",
  29326. + .desc = "standard disk layout for reiser40",
  29327. + .linkage = {NULL, NULL}
  29328. + },
  29329. + .init_format = init_format_format40,
  29330. + .root_dir_key = root_dir_key_format40,
  29331. + .release = release_format40,
  29332. + .log_super = log_super_format40,
  29333. + .check_open = check_open_format40,
  29334. + .version_update = version_update_format40
  29335. + }
  29336. +};
  29337. +
  29338. +/* Make Linus happy.
  29339. + Local variables:
  29340. + c-indentation-style: "K&R"
  29341. + mode-name: "LC"
  29342. + c-basic-offset: 8
  29343. + tab-width: 8
  29344. + fill-column: 120
  29345. + scroll-step: 1
  29346. + End:
  29347. +*/
  29348. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/disk_format/disk_format.h linux-4.14.2/fs/reiser4/plugin/disk_format/disk_format.h
  29349. --- linux-4.14.2.orig/fs/reiser4/plugin/disk_format/disk_format.h 1970-01-01 01:00:00.000000000 +0100
  29350. +++ linux-4.14.2/fs/reiser4/plugin/disk_format/disk_format.h 2017-11-26 22:13:09.000000000 +0100
  29351. @@ -0,0 +1,27 @@
  29352. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  29353. +
  29354. +/* identifiers for disk layouts, they are also used as indexes in array of disk
  29355. + plugins */
  29356. +
  29357. +#if !defined( __REISER4_DISK_FORMAT_H__ )
  29358. +#define __REISER4_DISK_FORMAT_H__
  29359. +
  29360. +typedef enum {
  29361. + /* standard reiser4 disk layout plugin id */
  29362. + FORMAT40_ID,
  29363. + LAST_FORMAT_ID
  29364. +} disk_format_id;
  29365. +
  29366. +/* __REISER4_DISK_FORMAT_H__ */
  29367. +#endif
  29368. +
  29369. +/* Make Linus happy.
  29370. + Local variables:
  29371. + c-indentation-style: "K&R"
  29372. + mode-name: "LC"
  29373. + c-basic-offset: 8
  29374. + tab-width: 8
  29375. + fill-column: 120
  29376. + scroll-step: 1
  29377. + End:
  29378. +*/
  29379. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/disk_format/Makefile linux-4.14.2/fs/reiser4/plugin/disk_format/Makefile
  29380. --- linux-4.14.2.orig/fs/reiser4/plugin/disk_format/Makefile 1970-01-01 01:00:00.000000000 +0100
  29381. +++ linux-4.14.2/fs/reiser4/plugin/disk_format/Makefile 2017-11-26 22:13:09.000000000 +0100
  29382. @@ -0,0 +1,5 @@
  29383. +obj-$(CONFIG_REISER4_FS) += df_plugins.o
  29384. +
  29385. +df_plugins-objs := \
  29386. + disk_format40.o \
  29387. + disk_format.o
  29388. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/fibration.c linux-4.14.2/fs/reiser4/plugin/fibration.c
  29389. --- linux-4.14.2.orig/fs/reiser4/plugin/fibration.c 1970-01-01 01:00:00.000000000 +0100
  29390. +++ linux-4.14.2/fs/reiser4/plugin/fibration.c 2017-11-26 22:13:09.000000000 +0100
  29391. @@ -0,0 +1,175 @@
  29392. +/* Copyright 2004 by Hans Reiser, licensing governed by
  29393. + * reiser4/README */
  29394. +
  29395. +/* Directory fibrations */
  29396. +
  29397. +/*
  29398. + * Suppose we have a directory tree with sources of some project. During
  29399. + * compilation .o files are created within this tree. This makes access
  29400. + * to the original source files less efficient, because source files are
  29401. + * now "diluted" by object files: default directory plugin uses prefix
  29402. + * of a file name as a part of the key for directory entry (and this
  29403. + * part is also inherited by the key of file body). This means that
  29404. + * foo.o will be located close to foo.c and foo.h in the tree.
  29405. + *
  29406. + * To avoid this effect directory plugin fill highest 7 (unused
  29407. + * originally) bits of the second component of the directory entry key
  29408. + * by bit-pattern depending on the file name (see
  29409. + * fs/reiser4/kassign.c:build_entry_key_common()). These bits are called
  29410. + * "fibre". Fibre of the file name key is inherited by key of stat data
  29411. + * and keys of file body (in the case of REISER4_LARGE_KEY).
  29412. + *
  29413. + * Fibre for a given file is chosen by per-directory fibration
  29414. + * plugin. Names within given fibre are ordered lexicographically.
  29415. + */
  29416. +
  29417. +#include "../debug.h"
  29418. +#include "plugin_header.h"
  29419. +#include "plugin.h"
  29420. +#include "../super.h"
  29421. +#include "../inode.h"
  29422. +
  29423. +#include <linux/types.h>
  29424. +
  29425. +static const int fibre_shift = 57;
  29426. +
  29427. +#define FIBRE_NO(n) (((__u64)(n)) << fibre_shift)
  29428. +
  29429. +/*
  29430. + * Trivial fibration: all files of directory are just ordered
  29431. + * lexicographically.
  29432. + */
  29433. +static __u64 fibre_trivial(const struct inode *dir, const char *name, int len)
  29434. +{
  29435. + return FIBRE_NO(0);
  29436. +}
  29437. +
  29438. +/*
  29439. + * dot-o fibration: place .o files after all others.
  29440. + */
  29441. +static __u64 fibre_dot_o(const struct inode *dir, const char *name, int len)
  29442. +{
  29443. + /* special treatment for .*\.o */
  29444. + if (len > 2 && name[len - 1] == 'o' && name[len - 2] == '.')
  29445. + return FIBRE_NO(1);
  29446. + else
  29447. + return FIBRE_NO(0);
  29448. +}
  29449. +
  29450. +/*
  29451. + * ext.1 fibration: subdivide directory into 128 fibrations one for each
  29452. + * 7bit extension character (file "foo.h" goes into fibre "h"), plus
  29453. + * default fibre for the rest.
  29454. + */
  29455. +static __u64 fibre_ext_1(const struct inode *dir, const char *name, int len)
  29456. +{
  29457. + if (len > 2 && name[len - 2] == '.')
  29458. + return FIBRE_NO(name[len - 1]);
  29459. + else
  29460. + return FIBRE_NO(0);
  29461. +}
  29462. +
  29463. +/*
  29464. + * ext.3 fibration: try to separate files with different 3-character
  29465. + * extensions from each other.
  29466. + */
  29467. +static __u64 fibre_ext_3(const struct inode *dir, const char *name, int len)
  29468. +{
  29469. + if (len > 4 && name[len - 4] == '.')
  29470. + return FIBRE_NO(name[len - 3] + name[len - 2] + name[len - 1]);
  29471. + else
  29472. + return FIBRE_NO(0);
  29473. +}
  29474. +
  29475. +static int change_fibration(struct inode *inode,
  29476. + reiser4_plugin * plugin,
  29477. + pset_member memb)
  29478. +{
  29479. + int result;
  29480. +
  29481. + assert("nikita-3503", inode != NULL);
  29482. + assert("nikita-3504", plugin != NULL);
  29483. +
  29484. + assert("nikita-3505", is_reiser4_inode(inode));
  29485. + assert("nikita-3506", inode_dir_plugin(inode) != NULL);
  29486. + assert("nikita-3507",
  29487. + plugin->h.type_id == REISER4_FIBRATION_PLUGIN_TYPE);
  29488. +
  29489. + result = 0;
  29490. + if (inode_fibration_plugin(inode) == NULL ||
  29491. + inode_fibration_plugin(inode)->h.id != plugin->h.id) {
  29492. + if (is_dir_empty(inode) == 0)
  29493. + result = aset_set_unsafe(&reiser4_inode_data(inode)->pset,
  29494. + PSET_FIBRATION, plugin);
  29495. + else
  29496. + result = RETERR(-ENOTEMPTY);
  29497. +
  29498. + }
  29499. + return result;
  29500. +}
  29501. +
  29502. +static reiser4_plugin_ops fibration_plugin_ops = {
  29503. + .init = NULL,
  29504. + .load = NULL,
  29505. + .save_len = NULL,
  29506. + .save = NULL,
  29507. + .change = change_fibration
  29508. +};
  29509. +
  29510. +/* fibration plugins */
  29511. +fibration_plugin fibration_plugins[LAST_FIBRATION_ID] = {
  29512. + [FIBRATION_LEXICOGRAPHIC] = {
  29513. + .h = {
  29514. + .type_id = REISER4_FIBRATION_PLUGIN_TYPE,
  29515. + .id = FIBRATION_LEXICOGRAPHIC,
  29516. + .pops = &fibration_plugin_ops,
  29517. + .label = "lexicographic",
  29518. + .desc = "no fibration",
  29519. + .linkage = {NULL, NULL}
  29520. + },
  29521. + .fibre = fibre_trivial
  29522. + },
  29523. + [FIBRATION_DOT_O] = {
  29524. + .h = {
  29525. + .type_id = REISER4_FIBRATION_PLUGIN_TYPE,
  29526. + .id = FIBRATION_DOT_O,
  29527. + .pops = &fibration_plugin_ops,
  29528. + .label = "dot-o",
  29529. + .desc = "fibrate .o files separately",
  29530. + .linkage = {NULL, NULL}
  29531. + },
  29532. + .fibre = fibre_dot_o
  29533. + },
  29534. + [FIBRATION_EXT_1] = {
  29535. + .h = {
  29536. + .type_id = REISER4_FIBRATION_PLUGIN_TYPE,
  29537. + .id = FIBRATION_EXT_1,
  29538. + .pops = &fibration_plugin_ops,
  29539. + .label = "ext-1",
  29540. + .desc = "fibrate file by single character extension",
  29541. + .linkage = {NULL, NULL}
  29542. + },
  29543. + .fibre = fibre_ext_1
  29544. + },
  29545. + [FIBRATION_EXT_3] = {
  29546. + .h = {
  29547. + .type_id = REISER4_FIBRATION_PLUGIN_TYPE,
  29548. + .id = FIBRATION_EXT_3,
  29549. + .pops = &fibration_plugin_ops,
  29550. + .label = "ext-3",
  29551. + .desc = "fibrate file by three character extension",
  29552. + .linkage = {NULL, NULL}
  29553. + },
  29554. + .fibre = fibre_ext_3
  29555. + }
  29556. +};
  29557. +
  29558. +/*
  29559. + * Local variables:
  29560. + * c-indentation-style: "K&R"
  29561. + * mode-name: "LC"
  29562. + * c-basic-offset: 8
  29563. + * tab-width: 8
  29564. + * fill-column: 79
  29565. + * End:
  29566. + */
  29567. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/fibration.h linux-4.14.2/fs/reiser4/plugin/fibration.h
  29568. --- linux-4.14.2.orig/fs/reiser4/plugin/fibration.h 1970-01-01 01:00:00.000000000 +0100
  29569. +++ linux-4.14.2/fs/reiser4/plugin/fibration.h 2017-11-26 22:13:09.000000000 +0100
  29570. @@ -0,0 +1,37 @@
  29571. +/* Copyright 2004 by Hans Reiser, licensing governed by reiser4/README */
  29572. +
  29573. +/* Fibration plugin used by hashed directory plugin to segment content
  29574. + * of directory. See fs/reiser4/plugin/fibration.c for more on this. */
  29575. +
  29576. +#if !defined(__FS_REISER4_PLUGIN_FIBRATION_H__)
  29577. +#define __FS_REISER4_PLUGIN_FIBRATION_H__
  29578. +
  29579. +#include "plugin_header.h"
  29580. +
  29581. +typedef struct fibration_plugin {
  29582. + /* generic fields */
  29583. + plugin_header h;
  29584. +
  29585. + __u64(*fibre) (const struct inode *dir, const char *name, int len);
  29586. +} fibration_plugin;
  29587. +
  29588. +typedef enum {
  29589. + FIBRATION_LEXICOGRAPHIC,
  29590. + FIBRATION_DOT_O,
  29591. + FIBRATION_EXT_1,
  29592. + FIBRATION_EXT_3,
  29593. + LAST_FIBRATION_ID
  29594. +} reiser4_fibration_id;
  29595. +
  29596. +/* __FS_REISER4_PLUGIN_FIBRATION_H__ */
  29597. +#endif
  29598. +
  29599. +/* Make Linus happy.
  29600. + Local variables:
  29601. + c-indentation-style: "K&R"
  29602. + mode-name: "LC"
  29603. + c-basic-offset: 8
  29604. + tab-width: 8
  29605. + fill-column: 120
  29606. + End:
  29607. +*/
  29608. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/file/cryptcompress.c linux-4.14.2/fs/reiser4/plugin/file/cryptcompress.c
  29609. --- linux-4.14.2.orig/fs/reiser4/plugin/file/cryptcompress.c 1970-01-01 01:00:00.000000000 +0100
  29610. +++ linux-4.14.2/fs/reiser4/plugin/file/cryptcompress.c 2017-11-26 22:14:18.000000000 +0100
  29611. @@ -0,0 +1,3798 @@
  29612. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  29613. + reiser4/README */
  29614. +/*
  29615. + * Written by Edward Shishkin.
  29616. + *
  29617. + * Implementations of inode/file/address_space operations
  29618. + * specific for cryptcompress file plugin which manages
  29619. + * regular files built of compressed and(or) encrypted bodies.
  29620. + * See http://dev.namesys.com/CryptcompressPlugin for details.
  29621. + */
  29622. +
  29623. +#include "../../inode.h"
  29624. +#include "../cluster.h"
  29625. +#include "../object.h"
  29626. +#include "../../tree_walk.h"
  29627. +#include "cryptcompress.h"
  29628. +
  29629. +#include <linux/pagevec.h>
  29630. +#include <asm/uaccess.h>
  29631. +#include <linux/swap.h>
  29632. +#include <linux/writeback.h>
  29633. +#include <linux/random.h>
  29634. +#include <linux/scatterlist.h>
  29635. +
  29636. +/*
  29637. + Managing primary and secondary caches by Reiser4
  29638. + cryptcompress file plugin. Synchronization scheme.
  29639. +
  29640. +
  29641. + +------------------+
  29642. + +------------------->| tfm stream |
  29643. + | | (compressed data)|
  29644. + flush | +------------------+
  29645. + +-----------------+ |
  29646. + |(->)longterm lock| V
  29647. +--+ writepages() | | +-***-+ reiser4 +---+
  29648. + | | +--+ | *** | storage tree | |
  29649. + | | | +-***-+ (primary cache)| |
  29650. +u | write() (secondary| cache) V / | \ | |
  29651. +s | ----> +----+ +----+ +----+ +----+ +-***** ******* **----+ ----> | d |
  29652. +e | | | |page cluster | | | **disk cluster** | | i |
  29653. +r | <---- +----+ +----+ +----+ +----+ +-***** **********----+ <---- | s |
  29654. + | read() ^ ^ | | k |
  29655. + | | (->)longterm lock| | page_io()| |
  29656. + | | +------+ | |
  29657. +--+ readpages() | | +---+
  29658. + | V
  29659. + | +------------------+
  29660. + +--------------------| tfm stream |
  29661. + | (plain text) |
  29662. + +------------------+
  29663. +*/
  29664. +
  29665. +/* get cryptcompress specific portion of inode */
  29666. +struct cryptcompress_info *cryptcompress_inode_data(const struct inode *inode)
  29667. +{
  29668. + return &reiser4_inode_data(inode)->file_plugin_data.cryptcompress_info;
  29669. +}
  29670. +
  29671. +/* plugin->u.file.init_inode_data */
  29672. +void init_inode_data_cryptcompress(struct inode *inode,
  29673. + reiser4_object_create_data * crd,
  29674. + int create)
  29675. +{
  29676. + struct cryptcompress_info *data;
  29677. +
  29678. + data = cryptcompress_inode_data(inode);
  29679. + assert("edward-685", data != NULL);
  29680. +
  29681. + memset(data, 0, sizeof(*data));
  29682. +
  29683. + mutex_init(&data->checkin_mutex);
  29684. + data->trunc_index = ULONG_MAX;
  29685. + turn_on_compression(data);
  29686. + set_lattice_factor(data, MIN_LATTICE_FACTOR);
  29687. + init_inode_ordering(inode, crd, create);
  29688. +}
  29689. +
  29690. +/* The following is a part of reiser4 cipher key manager
  29691. + which is called when opening/creating a cryptcompress file */
  29692. +
  29693. +/* get/set cipher key info */
  29694. +struct reiser4_crypto_info * inode_crypto_info (struct inode * inode)
  29695. +{
  29696. + assert("edward-90", inode != NULL);
  29697. + assert("edward-91", reiser4_inode_data(inode) != NULL);
  29698. + return cryptcompress_inode_data(inode)->crypt;
  29699. +}
  29700. +
  29701. +static void set_inode_crypto_info (struct inode * inode,
  29702. + struct reiser4_crypto_info * info)
  29703. +{
  29704. + cryptcompress_inode_data(inode)->crypt = info;
  29705. +}
  29706. +
  29707. +/* allocate a cipher key info */
  29708. +struct reiser4_crypto_info * reiser4_alloc_crypto_info (struct inode * inode)
  29709. +{
  29710. + struct reiser4_crypto_info *info;
  29711. + int fipsize;
  29712. +
  29713. + info = kzalloc(sizeof(*info), reiser4_ctx_gfp_mask_get());
  29714. + if (!info)
  29715. + return ERR_PTR(-ENOMEM);
  29716. +
  29717. + fipsize = inode_digest_plugin(inode)->fipsize;
  29718. + info->keyid = kmalloc(fipsize, reiser4_ctx_gfp_mask_get());
  29719. + if (!info->keyid) {
  29720. + kfree(info);
  29721. + return ERR_PTR(-ENOMEM);
  29722. + }
  29723. + info->host = inode;
  29724. + return info;
  29725. +}
  29726. +
  29727. +#if 0
  29728. +/* allocate/free low-level info for cipher and digest
  29729. + transforms */
  29730. +static int alloc_crypto_tfms(struct reiser4_crypto_info * info)
  29731. +{
  29732. + struct crypto_blkcipher * ctfm = NULL;
  29733. + struct crypto_hash * dtfm = NULL;
  29734. + cipher_plugin * cplug = inode_cipher_plugin(info->host);
  29735. + digest_plugin * dplug = inode_digest_plugin(info->host);
  29736. +
  29737. + if (cplug->alloc) {
  29738. + ctfm = cplug->alloc();
  29739. + if (IS_ERR(ctfm)) {
  29740. + warning("edward-1364",
  29741. + "Can not allocate info for %s\n",
  29742. + cplug->h.desc);
  29743. + return RETERR(PTR_ERR(ctfm));
  29744. + }
  29745. + }
  29746. + info_set_cipher(info, ctfm);
  29747. + if (dplug->alloc) {
  29748. + dtfm = dplug->alloc();
  29749. + if (IS_ERR(dtfm)) {
  29750. + warning("edward-1365",
  29751. + "Can not allocate info for %s\n",
  29752. + dplug->h.desc);
  29753. + goto unhappy_with_digest;
  29754. + }
  29755. + }
  29756. + info_set_digest(info, dtfm);
  29757. + return 0;
  29758. + unhappy_with_digest:
  29759. + if (cplug->free) {
  29760. + cplug->free(ctfm);
  29761. + info_set_cipher(info, NULL);
  29762. + }
  29763. + return RETERR(PTR_ERR(dtfm));
  29764. +}
  29765. +#endif
  29766. +
  29767. +static void
  29768. +free_crypto_tfms(struct reiser4_crypto_info * info)
  29769. +{
  29770. + assert("edward-1366", info != NULL);
  29771. + if (!info_get_cipher(info)) {
  29772. + assert("edward-1601", !info_get_digest(info));
  29773. + return;
  29774. + }
  29775. + inode_cipher_plugin(info->host)->free(info_get_cipher(info));
  29776. + info_set_cipher(info, NULL);
  29777. + inode_digest_plugin(info->host)->free(info_get_digest(info));
  29778. + info_set_digest(info, NULL);
  29779. + return;
  29780. +}
  29781. +
  29782. +#if 0
  29783. +/* create a key fingerprint for disk stat-data */
  29784. +static int create_keyid (struct reiser4_crypto_info * info,
  29785. + struct reiser4_crypto_data * data)
  29786. +{
  29787. + int ret = -ENOMEM;
  29788. + size_t blk, pad;
  29789. + __u8 * dmem;
  29790. + __u8 * cmem;
  29791. + struct hash_desc ddesc;
  29792. + struct blkcipher_desc cdesc;
  29793. + struct scatterlist sg;
  29794. +
  29795. + assert("edward-1367", info != NULL);
  29796. + assert("edward-1368", info->keyid != NULL);
  29797. +
  29798. + ddesc.tfm = info_get_digest(info);
  29799. + ddesc.flags = 0;
  29800. + cdesc.tfm = info_get_cipher(info);
  29801. + cdesc.flags = 0;
  29802. +
  29803. + dmem = kmalloc((size_t)crypto_hash_digestsize(ddesc.tfm),
  29804. + reiser4_ctx_gfp_mask_get());
  29805. + if (!dmem)
  29806. + goto exit1;
  29807. +
  29808. + blk = crypto_blkcipher_blocksize(cdesc.tfm);
  29809. +
  29810. + pad = data->keyid_size % blk;
  29811. + pad = (pad ? blk - pad : 0);
  29812. +
  29813. + cmem = kmalloc((size_t)data->keyid_size + pad,
  29814. + reiser4_ctx_gfp_mask_get());
  29815. + if (!cmem)
  29816. + goto exit2;
  29817. + memcpy(cmem, data->keyid, data->keyid_size);
  29818. + memset(cmem + data->keyid_size, 0, pad);
  29819. +
  29820. + sg_init_one(&sg, cmem, data->keyid_size + pad);
  29821. +
  29822. + ret = crypto_blkcipher_encrypt(&cdesc, &sg, &sg,
  29823. + data->keyid_size + pad);
  29824. + if (ret) {
  29825. + warning("edward-1369",
  29826. + "encryption failed flags=%x\n", cdesc.flags);
  29827. + goto exit3;
  29828. + }
  29829. + ret = crypto_hash_digest(&ddesc, &sg, sg.length, dmem);
  29830. + if (ret) {
  29831. + warning("edward-1602",
  29832. + "digest failed flags=%x\n", ddesc.flags);
  29833. + goto exit3;
  29834. + }
  29835. + memcpy(info->keyid, dmem, inode_digest_plugin(info->host)->fipsize);
  29836. + exit3:
  29837. + kfree(cmem);
  29838. + exit2:
  29839. + kfree(dmem);
  29840. + exit1:
  29841. + return ret;
  29842. +}
  29843. +#endif
  29844. +
  29845. +static void destroy_keyid(struct reiser4_crypto_info * info)
  29846. +{
  29847. + assert("edward-1370", info != NULL);
  29848. + assert("edward-1371", info->keyid != NULL);
  29849. + kfree(info->keyid);
  29850. + return;
  29851. +}
  29852. +
  29853. +static void __free_crypto_info (struct inode * inode)
  29854. +{
  29855. + struct reiser4_crypto_info * info = inode_crypto_info(inode);
  29856. + assert("edward-1372", info != NULL);
  29857. +
  29858. + free_crypto_tfms(info);
  29859. + destroy_keyid(info);
  29860. + kfree(info);
  29861. +}
  29862. +
  29863. +#if 0
  29864. +static void instantiate_crypto_info(struct reiser4_crypto_info * info)
  29865. +{
  29866. + assert("edward-1373", info != NULL);
  29867. + assert("edward-1374", info->inst == 0);
  29868. + info->inst = 1;
  29869. +}
  29870. +#endif
  29871. +
  29872. +static void uninstantiate_crypto_info(struct reiser4_crypto_info * info)
  29873. +{
  29874. + assert("edward-1375", info != NULL);
  29875. + info->inst = 0;
  29876. +}
  29877. +
  29878. +#if 0
  29879. +static int is_crypto_info_instantiated(struct reiser4_crypto_info * info)
  29880. +{
  29881. + return info->inst;
  29882. +}
  29883. +
  29884. +static int inode_has_cipher_key(struct inode * inode)
  29885. +{
  29886. + assert("edward-1376", inode != NULL);
  29887. + return inode_crypto_info(inode) &&
  29888. + is_crypto_info_instantiated(inode_crypto_info(inode));
  29889. +}
  29890. +#endif
  29891. +
  29892. +static void free_crypto_info (struct inode * inode)
  29893. +{
  29894. + uninstantiate_crypto_info(inode_crypto_info(inode));
  29895. + __free_crypto_info(inode);
  29896. +}
  29897. +
  29898. +static int need_cipher(struct inode * inode)
  29899. +{
  29900. + return inode_cipher_plugin(inode) !=
  29901. + cipher_plugin_by_id(NONE_CIPHER_ID);
  29902. +}
  29903. +
  29904. +/* Parse @data which contains a (uninstantiated) cipher key imported
  29905. + from user space, create a low-level cipher info and attach it to
  29906. + the @object. If success, then info contains an instantiated key */
  29907. +#if 0
  29908. +struct reiser4_crypto_info * create_crypto_info(struct inode * object,
  29909. + struct reiser4_crypto_data * data)
  29910. +{
  29911. + int ret;
  29912. + struct reiser4_crypto_info * info;
  29913. +
  29914. + assert("edward-1377", data != NULL);
  29915. + assert("edward-1378", need_cipher(object));
  29916. +
  29917. + if (inode_file_plugin(object) !=
  29918. + file_plugin_by_id(DIRECTORY_FILE_PLUGIN_ID))
  29919. + return ERR_PTR(-EINVAL);
  29920. +
  29921. + info = reiser4_alloc_crypto_info(object);
  29922. + if (IS_ERR(info))
  29923. + return info;
  29924. + ret = alloc_crypto_tfms(info);
  29925. + if (ret)
  29926. + goto err;
  29927. + /* instantiating a key */
  29928. + ret = crypto_blkcipher_setkey(info_get_cipher(info),
  29929. + data->key,
  29930. + data->keysize);
  29931. + if (ret) {
  29932. + warning("edward-1379",
  29933. + "setkey failed flags=%x",
  29934. + crypto_blkcipher_get_flags(info_get_cipher(info)));
  29935. + goto err;
  29936. + }
  29937. + info->keysize = data->keysize;
  29938. + ret = create_keyid(info, data);
  29939. + if (ret)
  29940. + goto err;
  29941. + instantiate_crypto_info(info);
  29942. + return info;
  29943. + err:
  29944. + __free_crypto_info(object);
  29945. + return ERR_PTR(ret);
  29946. +}
  29947. +#endif
  29948. +
  29949. +/* increment/decrement a load counter when
  29950. + attaching/detaching the crypto-stat to any object */
  29951. +static void load_crypto_info(struct reiser4_crypto_info * info)
  29952. +{
  29953. + assert("edward-1380", info != NULL);
  29954. + inc_keyload_count(info);
  29955. +}
  29956. +
  29957. +static void unload_crypto_info(struct inode * inode)
  29958. +{
  29959. + struct reiser4_crypto_info * info = inode_crypto_info(inode);
  29960. + assert("edward-1381", info->keyload_count > 0);
  29961. +
  29962. + dec_keyload_count(inode_crypto_info(inode));
  29963. + if (info->keyload_count == 0)
  29964. + /* final release */
  29965. + free_crypto_info(inode);
  29966. +}
  29967. +
  29968. +/* attach/detach an existing crypto-stat */
  29969. +void reiser4_attach_crypto_info(struct inode * inode,
  29970. + struct reiser4_crypto_info * info)
  29971. +{
  29972. + assert("edward-1382", inode != NULL);
  29973. + assert("edward-1383", info != NULL);
  29974. + assert("edward-1384", inode_crypto_info(inode) == NULL);
  29975. +
  29976. + set_inode_crypto_info(inode, info);
  29977. + load_crypto_info(info);
  29978. +}
  29979. +
  29980. +/* returns true, if crypto stat can be attached to the @host */
  29981. +#if REISER4_DEBUG
  29982. +static int host_allows_crypto_info(struct inode * host)
  29983. +{
  29984. + int ret;
  29985. + file_plugin * fplug = inode_file_plugin(host);
  29986. +
  29987. + switch (fplug->h.id) {
  29988. + case CRYPTCOMPRESS_FILE_PLUGIN_ID:
  29989. + ret = 1;
  29990. + break;
  29991. + default:
  29992. + ret = 0;
  29993. + }
  29994. + return ret;
  29995. +}
  29996. +#endif /* REISER4_DEBUG */
  29997. +
  29998. +static void reiser4_detach_crypto_info(struct inode * inode)
  29999. +{
  30000. + assert("edward-1385", inode != NULL);
  30001. + assert("edward-1386", host_allows_crypto_info(inode));
  30002. +
  30003. + if (inode_crypto_info(inode))
  30004. + unload_crypto_info(inode);
  30005. + set_inode_crypto_info(inode, NULL);
  30006. +}
  30007. +
  30008. +#if 0
  30009. +
  30010. +/* compare fingerprints of @child and @parent */
  30011. +static int keyid_eq(struct reiser4_crypto_info * child,
  30012. + struct reiser4_crypto_info * parent)
  30013. +{
  30014. + return !memcmp(child->keyid,
  30015. + parent->keyid,
  30016. + info_digest_plugin(parent)->fipsize);
  30017. +}
  30018. +
  30019. +/* check if a crypto-stat (which is bound to @parent) can be inherited */
  30020. +int can_inherit_crypto_cryptcompress(struct inode *child, struct inode *parent)
  30021. +{
  30022. + if (!need_cipher(child))
  30023. + return 0;
  30024. + /* the child is created */
  30025. + if (!inode_crypto_info(child))
  30026. + return 1;
  30027. + /* the child is looked up */
  30028. + if (!inode_crypto_info(parent))
  30029. + return 0;
  30030. + return (inode_cipher_plugin(child) == inode_cipher_plugin(parent) &&
  30031. + inode_digest_plugin(child) == inode_digest_plugin(parent) &&
  30032. + inode_crypto_info(child)->keysize ==
  30033. + inode_crypto_info(parent)->keysize &&
  30034. + keyid_eq(inode_crypto_info(child), inode_crypto_info(parent)));
  30035. +}
  30036. +#endif
  30037. +
  30038. +/* helper functions for ->create() method of the cryptcompress plugin */
  30039. +static int inode_set_crypto(struct inode * object)
  30040. +{
  30041. + reiser4_inode * info;
  30042. + if (!inode_crypto_info(object)) {
  30043. + if (need_cipher(object))
  30044. + return RETERR(-EINVAL);
  30045. + /* the file is not to be encrypted */
  30046. + return 0;
  30047. + }
  30048. + info = reiser4_inode_data(object);
  30049. + info->extmask |= (1 << CRYPTO_STAT);
  30050. + return 0;
  30051. +}
  30052. +
  30053. +static int inode_init_compression(struct inode * object)
  30054. +{
  30055. + int result = 0;
  30056. + assert("edward-1461", object != NULL);
  30057. + if (inode_compression_plugin(object)->init)
  30058. + result = inode_compression_plugin(object)->init();
  30059. + return result;
  30060. +}
  30061. +
  30062. +static int inode_check_cluster(struct inode * object)
  30063. +{
  30064. + assert("edward-696", object != NULL);
  30065. +
  30066. + if (unlikely(inode_cluster_size(object) < PAGE_SIZE)) {
  30067. + warning("edward-1320", "Can not support '%s' "
  30068. + "logical clusters (less then page size)",
  30069. + inode_cluster_plugin(object)->h.label);
  30070. + return RETERR(-EINVAL);
  30071. + }
  30072. + if (unlikely(inode_cluster_shift(object)) >= BITS_PER_BYTE*sizeof(int)){
  30073. + warning("edward-1463", "Can not support '%s' "
  30074. + "logical clusters (too big for transform)",
  30075. + inode_cluster_plugin(object)->h.label);
  30076. + return RETERR(-EINVAL);
  30077. + }
  30078. + return 0;
  30079. +}
  30080. +
  30081. +/* plugin->destroy_inode() */
  30082. +void destroy_inode_cryptcompress(struct inode * inode)
  30083. +{
  30084. + assert("edward-1464", INODE_PGCOUNT(inode) == 0);
  30085. + reiser4_detach_crypto_info(inode);
  30086. + return;
  30087. +}
  30088. +
  30089. +/* plugin->create_object():
  30090. +. install plugins
  30091. +. attach crypto info if specified
  30092. +. attach compression info if specified
  30093. +. attach cluster info
  30094. +*/
  30095. +int create_object_cryptcompress(struct inode *object, struct inode *parent,
  30096. + reiser4_object_create_data * data)
  30097. +{
  30098. + int result;
  30099. + reiser4_inode *info;
  30100. +
  30101. + assert("edward-23", object != NULL);
  30102. + assert("edward-24", parent != NULL);
  30103. + assert("edward-30", data != NULL);
  30104. + assert("edward-26", reiser4_inode_get_flag(object, REISER4_NO_SD));
  30105. + assert("edward-27", data->id == CRYPTCOMPRESS_FILE_PLUGIN_ID);
  30106. +
  30107. + info = reiser4_inode_data(object);
  30108. +
  30109. + assert("edward-29", info != NULL);
  30110. +
  30111. + /* set file bit */
  30112. + info->plugin_mask |= (1 << PSET_FILE);
  30113. +
  30114. + /* set crypto */
  30115. + result = inode_set_crypto(object);
  30116. + if (result)
  30117. + goto error;
  30118. + /* set compression */
  30119. + result = inode_init_compression(object);
  30120. + if (result)
  30121. + goto error;
  30122. + /* set cluster */
  30123. + result = inode_check_cluster(object);
  30124. + if (result)
  30125. + goto error;
  30126. +
  30127. + /* save everything in disk stat-data */
  30128. + result = write_sd_by_inode_common(object);
  30129. + if (!result)
  30130. + return 0;
  30131. + error:
  30132. + reiser4_detach_crypto_info(object);
  30133. + return result;
  30134. +}
  30135. +
  30136. +/* plugin->open() */
  30137. +int open_cryptcompress(struct inode * inode, struct file * file)
  30138. +{
  30139. + return 0;
  30140. +}
  30141. +
  30142. +/* returns a blocksize, the attribute of a cipher algorithm */
  30143. +static unsigned int
  30144. +cipher_blocksize(struct inode * inode)
  30145. +{
  30146. + assert("edward-758", need_cipher(inode));
  30147. + assert("edward-1400", inode_crypto_info(inode) != NULL);
  30148. + return crypto_blkcipher_blocksize
  30149. + (info_get_cipher(inode_crypto_info(inode)));
  30150. +}
  30151. +
  30152. +/* returns offset translated by scale factor of the crypto-algorithm */
  30153. +static loff_t inode_scaled_offset (struct inode * inode,
  30154. + const loff_t src_off /* input offset */)
  30155. +{
  30156. + assert("edward-97", inode != NULL);
  30157. +
  30158. + if (!need_cipher(inode) ||
  30159. + src_off == get_key_offset(reiser4_min_key()) ||
  30160. + src_off == get_key_offset(reiser4_max_key()))
  30161. + return src_off;
  30162. +
  30163. + return inode_cipher_plugin(inode)->scale(inode,
  30164. + cipher_blocksize(inode),
  30165. + src_off);
  30166. +}
  30167. +
  30168. +/* returns disk cluster size */
  30169. +size_t inode_scaled_cluster_size(struct inode * inode)
  30170. +{
  30171. + assert("edward-110", inode != NULL);
  30172. +
  30173. + return inode_scaled_offset(inode, inode_cluster_size(inode));
  30174. +}
  30175. +
  30176. +/* set number of cluster pages */
  30177. +static void set_cluster_nrpages(struct cluster_handle * clust,
  30178. + struct inode *inode)
  30179. +{
  30180. + struct reiser4_slide * win;
  30181. +
  30182. + assert("edward-180", clust != NULL);
  30183. + assert("edward-1040", inode != NULL);
  30184. +
  30185. + clust->old_nrpages = size_in_pages(lbytes(clust->index, inode));
  30186. + win = clust->win;
  30187. + if (!win) {
  30188. + clust->nr_pages = size_in_pages(lbytes(clust->index, inode));
  30189. + return;
  30190. + }
  30191. + assert("edward-1176", clust->op != LC_INVAL);
  30192. + assert("edward-1064", win->off + win->count + win->delta != 0);
  30193. +
  30194. + if (win->stat == HOLE_WINDOW &&
  30195. + win->off == 0 && win->count == inode_cluster_size(inode)) {
  30196. + /* special case: writing a "fake" logical cluster */
  30197. + clust->nr_pages = 0;
  30198. + return;
  30199. + }
  30200. + clust->nr_pages = size_in_pages(max(win->off + win->count + win->delta,
  30201. + lbytes(clust->index, inode)));
  30202. + return;
  30203. +}
  30204. +
  30205. +/* plugin->key_by_inode()
  30206. + build key of a disk cluster */
  30207. +int key_by_inode_cryptcompress(struct inode *inode, loff_t off,
  30208. + reiser4_key * key)
  30209. +{
  30210. + assert("edward-64", inode != 0);
  30211. +
  30212. + if (likely(off != get_key_offset(reiser4_max_key())))
  30213. + off = off_to_clust_to_off(off, inode);
  30214. + if (inode_crypto_info(inode))
  30215. + off = inode_scaled_offset(inode, off);
  30216. +
  30217. + key_by_inode_and_offset_common(inode, 0, key);
  30218. + set_key_offset(key, (__u64)off);
  30219. + return 0;
  30220. +}
  30221. +
  30222. +/* plugin->flow_by_inode() */
  30223. +/* flow is used to read/write disk clusters */
  30224. +int flow_by_inode_cryptcompress(struct inode *inode, const char __user * buf,
  30225. + int user, /* 1: @buf is of user space,
  30226. + 0: kernel space */
  30227. + loff_t size, /* @buf size */
  30228. + loff_t off, /* offset to start io from */
  30229. + rw_op op, /* READ or WRITE */
  30230. + flow_t * f /* resulting flow */)
  30231. +{
  30232. + assert("edward-436", f != NULL);
  30233. + assert("edward-149", inode != NULL);
  30234. + assert("edward-150", inode_file_plugin(inode) != NULL);
  30235. + assert("edward-1465", user == 0); /* we use flow to read/write
  30236. + disk clusters located in
  30237. + kernel space */
  30238. + f->length = size;
  30239. + memcpy(&f->data, &buf, sizeof(buf));
  30240. + f->user = user;
  30241. + f->op = op;
  30242. +
  30243. + return key_by_inode_cryptcompress(inode, off, &f->key);
  30244. +}
  30245. +
  30246. +static int
  30247. +cryptcompress_hint_validate(hint_t * hint, const reiser4_key * key,
  30248. + znode_lock_mode lock_mode)
  30249. +{
  30250. + coord_t *coord;
  30251. +
  30252. + assert("edward-704", hint != NULL);
  30253. + assert("edward-1089", !hint_is_valid(hint));
  30254. + assert("edward-706", hint->lh.owner == NULL);
  30255. +
  30256. + coord = &hint->ext_coord.coord;
  30257. +
  30258. + if (!hint || !hint_is_set(hint) || hint->mode != lock_mode)
  30259. + /* hint either not set or set by different operation */
  30260. + return RETERR(-E_REPEAT);
  30261. +
  30262. + if (get_key_offset(key) != hint->offset)
  30263. + /* hint is set for different key */
  30264. + return RETERR(-E_REPEAT);
  30265. +
  30266. + assert("edward-707", reiser4_schedulable());
  30267. +
  30268. + return reiser4_seal_validate(&hint->seal, &hint->ext_coord.coord,
  30269. + key, &hint->lh, lock_mode,
  30270. + ZNODE_LOCK_LOPRI);
  30271. +}
  30272. +
  30273. +/* reserve disk space when writing a logical cluster */
  30274. +static int reserve4cluster(struct inode *inode, struct cluster_handle *clust)
  30275. +{
  30276. + int result = 0;
  30277. +
  30278. + assert("edward-965", reiser4_schedulable());
  30279. + assert("edward-439", inode != NULL);
  30280. + assert("edward-440", clust != NULL);
  30281. + assert("edward-441", clust->pages != NULL);
  30282. +
  30283. + if (clust->nr_pages == 0) {
  30284. + assert("edward-1152", clust->win != NULL);
  30285. + assert("edward-1153", clust->win->stat == HOLE_WINDOW);
  30286. + /* don't reserve disk space for fake logical cluster */
  30287. + return 0;
  30288. + }
  30289. + assert("edward-442", jprivate(clust->pages[0]) != NULL);
  30290. +
  30291. + result = reiser4_grab_space_force(estimate_insert_cluster(inode) +
  30292. + estimate_update_cluster(inode),
  30293. + BA_CAN_COMMIT);
  30294. + if (result)
  30295. + return result;
  30296. + clust->reserved = 1;
  30297. + grabbed2cluster_reserved(estimate_insert_cluster(inode) +
  30298. + estimate_update_cluster(inode));
  30299. +#if REISER4_DEBUG
  30300. + clust->reserved_prepped = estimate_update_cluster(inode);
  30301. + clust->reserved_unprepped = estimate_insert_cluster(inode);
  30302. +#endif
  30303. + /* there can be space grabbed by txnmgr_force_commit_all */
  30304. + return 0;
  30305. +}
  30306. +
  30307. +/* free reserved disk space if writing a logical cluster fails */
  30308. +static void free_reserved4cluster(struct inode *inode,
  30309. + struct cluster_handle *ch, int count)
  30310. +{
  30311. + assert("edward-967", ch->reserved == 1);
  30312. +
  30313. + cluster_reserved2free(count);
  30314. + ch->reserved = 0;
  30315. +}
  30316. +
  30317. +/*
  30318. + * The core search procedure of the cryptcompress plugin.
  30319. + * If returned value is not cbk_errored, then current position
  30320. + * is locked.
  30321. + */
  30322. +static int find_cluster_item(hint_t * hint,
  30323. + const reiser4_key * key, /* key of the item we are
  30324. + looking for */
  30325. + znode_lock_mode lock_mode /* which lock */ ,
  30326. + ra_info_t * ra_info, lookup_bias bias, __u32 flags)
  30327. +{
  30328. + int result;
  30329. + reiser4_key ikey;
  30330. + coord_t *coord = &hint->ext_coord.coord;
  30331. + coord_t orig = *coord;
  30332. +
  30333. + assert("edward-152", hint != NULL);
  30334. +
  30335. + if (!hint_is_valid(hint)) {
  30336. + result = cryptcompress_hint_validate(hint, key, lock_mode);
  30337. + if (result == -E_REPEAT)
  30338. + goto traverse_tree;
  30339. + else if (result) {
  30340. + assert("edward-1216", 0);
  30341. + return result;
  30342. + }
  30343. + hint_set_valid(hint);
  30344. + }
  30345. + assert("edward-709", znode_is_any_locked(coord->node));
  30346. + /*
  30347. + * Hint is valid, so we perform in-place lookup.
  30348. + * It means we just need to check if the next item in
  30349. + * the tree (relative to the current position @coord)
  30350. + * has key @key.
  30351. + *
  30352. + * Valid hint means in particular, that node is not
  30353. + * empty and at least one its item has been processed
  30354. + */
  30355. + if (equal_to_rdk(coord->node, key)) {
  30356. + /*
  30357. + * Look for the item in the right neighbor
  30358. + */
  30359. + lock_handle lh_right;
  30360. +
  30361. + init_lh(&lh_right);
  30362. + result = reiser4_get_right_neighbor(&lh_right, coord->node,
  30363. + znode_is_wlocked(coord->node) ?
  30364. + ZNODE_WRITE_LOCK : ZNODE_READ_LOCK,
  30365. + GN_CAN_USE_UPPER_LEVELS);
  30366. + if (result) {
  30367. + done_lh(&lh_right);
  30368. + reiser4_unset_hint(hint);
  30369. + if (result == -E_NO_NEIGHBOR)
  30370. + return RETERR(-EIO);
  30371. + return result;
  30372. + }
  30373. + assert("edward-1218",
  30374. + equal_to_ldk(lh_right.node, key));
  30375. + result = zload(lh_right.node);
  30376. + if (result) {
  30377. + done_lh(&lh_right);
  30378. + reiser4_unset_hint(hint);
  30379. + return result;
  30380. + }
  30381. + coord_init_first_unit_nocheck(coord, lh_right.node);
  30382. +
  30383. + if (!coord_is_existing_item(coord)) {
  30384. + zrelse(lh_right.node);
  30385. + done_lh(&lh_right);
  30386. + goto traverse_tree;
  30387. + }
  30388. + item_key_by_coord(coord, &ikey);
  30389. + zrelse(coord->node);
  30390. + if (unlikely(!keyeq(key, &ikey))) {
  30391. + warning("edward-1608",
  30392. + "Expected item not found. Fsck?");
  30393. + done_lh(&lh_right);
  30394. + goto not_found;
  30395. + }
  30396. + /*
  30397. + * item has been found in the right neighbor;
  30398. + * move lock to the right
  30399. + */
  30400. + done_lh(&hint->lh);
  30401. + move_lh(&hint->lh, &lh_right);
  30402. +
  30403. + dclust_inc_extension_ncount(hint);
  30404. +
  30405. + return CBK_COORD_FOUND;
  30406. + } else {
  30407. + /*
  30408. + * Look for the item in the current node
  30409. + */
  30410. + coord->item_pos++;
  30411. + coord->unit_pos = 0;
  30412. + coord->between = AT_UNIT;
  30413. +
  30414. + result = zload(coord->node);
  30415. + if (result) {
  30416. + done_lh(&hint->lh);
  30417. + return result;
  30418. + }
  30419. + if (!coord_is_existing_item(coord)) {
  30420. + zrelse(coord->node);
  30421. + goto not_found;
  30422. + }
  30423. + item_key_by_coord(coord, &ikey);
  30424. + zrelse(coord->node);
  30425. + if (!keyeq(key, &ikey))
  30426. + goto not_found;
  30427. + /*
  30428. + * item has been found in the current node
  30429. + */
  30430. + dclust_inc_extension_ncount(hint);
  30431. +
  30432. + return CBK_COORD_FOUND;
  30433. + }
  30434. + not_found:
  30435. + /*
  30436. + * The tree doesn't contain an item with @key;
  30437. + * roll back the coord
  30438. + */
  30439. + *coord = orig;
  30440. + ON_DEBUG(coord_update_v(coord));
  30441. + return CBK_COORD_NOTFOUND;
  30442. +
  30443. + traverse_tree:
  30444. +
  30445. + reiser4_unset_hint(hint);
  30446. + dclust_init_extension(hint);
  30447. + coord_init_zero(coord);
  30448. +
  30449. + assert("edward-713", hint->lh.owner == NULL);
  30450. + assert("edward-714", reiser4_schedulable());
  30451. +
  30452. + result = coord_by_key(current_tree, key, coord, &hint->lh,
  30453. + lock_mode, bias, LEAF_LEVEL, LEAF_LEVEL,
  30454. + CBK_UNIQUE | flags, ra_info);
  30455. + if (cbk_errored(result))
  30456. + return result;
  30457. + if(result == CBK_COORD_FOUND)
  30458. + dclust_inc_extension_ncount(hint);
  30459. + hint_set_valid(hint);
  30460. + return result;
  30461. +}
  30462. +
  30463. +/* This function is called by deflate[inflate] manager when
  30464. + creating a transformed/plain stream to check if we should
  30465. + create/cut some overhead. If this returns true, then @oh
  30466. + contains the size of this overhead.
  30467. + */
  30468. +static int need_cut_or_align(struct inode * inode,
  30469. + struct cluster_handle * ch, rw_op rw, int * oh)
  30470. +{
  30471. + struct tfm_cluster * tc = &ch->tc;
  30472. + switch (rw) {
  30473. + case WRITE_OP: /* estimate align */
  30474. + *oh = tc->len % cipher_blocksize(inode);
  30475. + if (*oh != 0)
  30476. + return 1;
  30477. + break;
  30478. + case READ_OP: /* estimate cut */
  30479. + *oh = *(tfm_output_data(ch) + tc->len - 1);
  30480. + break;
  30481. + default:
  30482. + impossible("edward-1401", "bad option");
  30483. + }
  30484. + return (tc->len != tc->lsize);
  30485. +}
  30486. +
  30487. +/* create/cut an overhead of transformed/plain stream */
  30488. +static void align_or_cut_overhead(struct inode * inode,
  30489. + struct cluster_handle * ch, rw_op rw)
  30490. +{
  30491. + unsigned int oh;
  30492. + cipher_plugin * cplug = inode_cipher_plugin(inode);
  30493. +
  30494. + assert("edward-1402", need_cipher(inode));
  30495. +
  30496. + if (!need_cut_or_align(inode, ch, rw, &oh))
  30497. + return;
  30498. + switch (rw) {
  30499. + case WRITE_OP: /* do align */
  30500. + ch->tc.len +=
  30501. + cplug->align_stream(tfm_input_data(ch) +
  30502. + ch->tc.len, ch->tc.len,
  30503. + cipher_blocksize(inode));
  30504. + *(tfm_input_data(ch) + ch->tc.len - 1) =
  30505. + cipher_blocksize(inode) - oh;
  30506. + break;
  30507. + case READ_OP: /* do cut */
  30508. + assert("edward-1403", oh <= cipher_blocksize(inode));
  30509. + ch->tc.len -= oh;
  30510. + break;
  30511. + default:
  30512. + impossible("edward-1404", "bad option");
  30513. + }
  30514. + return;
  30515. +}
  30516. +
  30517. +static unsigned max_cipher_overhead(struct inode * inode)
  30518. +{
  30519. + if (!need_cipher(inode) || !inode_cipher_plugin(inode)->align_stream)
  30520. + return 0;
  30521. + return cipher_blocksize(inode);
  30522. +}
  30523. +
  30524. +static int deflate_overhead(struct inode *inode)
  30525. +{
  30526. + return (inode_compression_plugin(inode)->
  30527. + checksum ? DC_CHECKSUM_SIZE : 0);
  30528. +}
  30529. +
  30530. +static unsigned deflate_overrun(struct inode * inode, int ilen)
  30531. +{
  30532. + return coa_overrun(inode_compression_plugin(inode), ilen);
  30533. +}
  30534. +
  30535. +static bool is_all_zero(char const* mem, size_t size)
  30536. +{
  30537. + while (size-- > 0)
  30538. + if (*mem++)
  30539. + return false;
  30540. + return true;
  30541. +}
  30542. +
  30543. +static inline bool should_punch_hole(struct tfm_cluster *tc)
  30544. +{
  30545. + if (0 &&
  30546. + !reiser4_is_set(reiser4_get_current_sb(), REISER4_DONT_PUNCH_HOLES)
  30547. + && is_all_zero(tfm_stream_data(tc, INPUT_STREAM), tc->lsize)) {
  30548. +
  30549. + tc->hole = 1;
  30550. + return true;
  30551. + }
  30552. + return false;
  30553. +}
  30554. +
  30555. +/* Estimating compressibility of a logical cluster by various
  30556. + policies represented by compression mode plugin.
  30557. + If this returns false, then compressor won't be called for
  30558. + the cluster of index @index.
  30559. +*/
  30560. +static int should_compress(struct tfm_cluster *tc, cloff_t index,
  30561. + struct inode *inode)
  30562. +{
  30563. + compression_plugin *cplug = inode_compression_plugin(inode);
  30564. + compression_mode_plugin *mplug = inode_compression_mode_plugin(inode);
  30565. +
  30566. + assert("edward-1321", tc->len != 0);
  30567. + assert("edward-1322", cplug != NULL);
  30568. + assert("edward-1323", mplug != NULL);
  30569. +
  30570. + if (should_punch_hole(tc))
  30571. + /*
  30572. + * we are about to punch a hole,
  30573. + * so don't compress data
  30574. + */
  30575. + return 0;
  30576. + return /* estimate by size */
  30577. + (cplug->min_size_deflate ?
  30578. + tc->len >= cplug->min_size_deflate() :
  30579. + 1) &&
  30580. + /* estimate by compression mode plugin */
  30581. + (mplug->should_deflate ?
  30582. + mplug->should_deflate(inode, index) :
  30583. + 1);
  30584. +}
  30585. +
  30586. +/* Evaluating results of compression transform.
  30587. + Returns true, if we need to accept this results */
  30588. +static int save_compressed(int size_before, int size_after, struct inode *inode)
  30589. +{
  30590. + return (size_after + deflate_overhead(inode) +
  30591. + max_cipher_overhead(inode) < size_before);
  30592. +}
  30593. +
  30594. +/* Guess result of the evaluation above */
  30595. +static int need_inflate(struct cluster_handle * ch, struct inode * inode,
  30596. + int encrypted /* is cluster encrypted */ )
  30597. +{
  30598. + struct tfm_cluster * tc = &ch->tc;
  30599. +
  30600. + assert("edward-142", tc != 0);
  30601. + assert("edward-143", inode != NULL);
  30602. +
  30603. + return tc->len <
  30604. + (encrypted ?
  30605. + inode_scaled_offset(inode, tc->lsize) :
  30606. + tc->lsize);
  30607. +}
  30608. +
  30609. +/* If results of compression were accepted, then we add
  30610. + a checksum to catch possible disk cluster corruption.
  30611. + The following is a format of the data stored in disk clusters:
  30612. +
  30613. + data This is (transformed) logical cluster.
  30614. + cipher_overhead This is created by ->align() method
  30615. + of cipher plugin. May be absent.
  30616. + checksum (4) This is created by ->checksum method
  30617. + of compression plugin to check
  30618. + integrity. May be absent.
  30619. +
  30620. + Crypto overhead format:
  30621. +
  30622. + data
  30623. + control_byte (1) contains aligned overhead size:
  30624. + 1 <= overhead <= cipher_blksize
  30625. +*/
  30626. +/* Append a checksum at the end of a transformed stream */
  30627. +static void dc_set_checksum(compression_plugin * cplug, struct tfm_cluster * tc)
  30628. +{
  30629. + __u32 checksum;
  30630. +
  30631. + assert("edward-1309", tc != NULL);
  30632. + assert("edward-1310", tc->len > 0);
  30633. + assert("edward-1311", cplug->checksum != NULL);
  30634. +
  30635. + checksum = cplug->checksum(tfm_stream_data(tc, OUTPUT_STREAM), tc->len);
  30636. + put_unaligned(cpu_to_le32(checksum),
  30637. + (d32 *)(tfm_stream_data(tc, OUTPUT_STREAM) + tc->len));
  30638. + tc->len += (int)DC_CHECKSUM_SIZE;
  30639. +}
  30640. +
  30641. +/* Check a disk cluster checksum.
  30642. + Returns 0 if checksum is correct, otherwise returns 1 */
  30643. +static int dc_check_checksum(compression_plugin * cplug, struct tfm_cluster * tc)
  30644. +{
  30645. + assert("edward-1312", tc != NULL);
  30646. + assert("edward-1313", tc->len > (int)DC_CHECKSUM_SIZE);
  30647. + assert("edward-1314", cplug->checksum != NULL);
  30648. +
  30649. + if (cplug->checksum(tfm_stream_data(tc, INPUT_STREAM),
  30650. + tc->len - (int)DC_CHECKSUM_SIZE) !=
  30651. + le32_to_cpu(get_unaligned((d32 *)
  30652. + (tfm_stream_data(tc, INPUT_STREAM)
  30653. + + tc->len - (int)DC_CHECKSUM_SIZE)))) {
  30654. + warning("edward-156",
  30655. + "Bad disk cluster checksum %d, (should be %d) Fsck?\n",
  30656. + (int)le32_to_cpu
  30657. + (get_unaligned((d32 *)
  30658. + (tfm_stream_data(tc, INPUT_STREAM) +
  30659. + tc->len - (int)DC_CHECKSUM_SIZE))),
  30660. + (int)cplug->checksum
  30661. + (tfm_stream_data(tc, INPUT_STREAM),
  30662. + tc->len - (int)DC_CHECKSUM_SIZE));
  30663. + return 1;
  30664. + }
  30665. + tc->len -= (int)DC_CHECKSUM_SIZE;
  30666. + return 0;
  30667. +}
  30668. +
  30669. +/* get input/output stream for some transform action */
  30670. +int grab_tfm_stream(struct inode * inode, struct tfm_cluster * tc,
  30671. + tfm_stream_id id)
  30672. +{
  30673. + size_t size = inode_scaled_cluster_size(inode);
  30674. +
  30675. + assert("edward-901", tc != NULL);
  30676. + assert("edward-1027", inode_compression_plugin(inode) != NULL);
  30677. +
  30678. + if (cluster_get_tfm_act(tc) == TFMA_WRITE)
  30679. + size += deflate_overrun(inode, inode_cluster_size(inode));
  30680. +
  30681. + if (!get_tfm_stream(tc, id) && id == INPUT_STREAM)
  30682. + alternate_streams(tc);
  30683. + if (!get_tfm_stream(tc, id))
  30684. + return alloc_tfm_stream(tc, size, id);
  30685. +
  30686. + assert("edward-902", tfm_stream_is_set(tc, id));
  30687. +
  30688. + if (tfm_stream_size(tc, id) < size)
  30689. + return realloc_tfm_stream(tc, size, id);
  30690. + return 0;
  30691. +}
  30692. +
  30693. +/* Common deflate manager */
  30694. +int reiser4_deflate_cluster(struct cluster_handle * clust, struct inode * inode)
  30695. +{
  30696. + int result = 0;
  30697. + int compressed = 0;
  30698. + int encrypted = 0;
  30699. + struct tfm_cluster * tc = &clust->tc;
  30700. + compression_plugin * coplug;
  30701. +
  30702. + assert("edward-401", inode != NULL);
  30703. + assert("edward-903", tfm_stream_is_set(tc, INPUT_STREAM));
  30704. + assert("edward-1348", cluster_get_tfm_act(tc) == TFMA_WRITE);
  30705. + assert("edward-498", !tfm_cluster_is_uptodate(tc));
  30706. +
  30707. + coplug = inode_compression_plugin(inode);
  30708. + if (should_compress(tc, clust->index, inode)) {
  30709. + /* try to compress, discard bad results */
  30710. + size_t dst_len;
  30711. + compression_mode_plugin * mplug =
  30712. + inode_compression_mode_plugin(inode);
  30713. + assert("edward-602", coplug != NULL);
  30714. + assert("edward-1423", coplug->compress != NULL);
  30715. +
  30716. + result = grab_coa(tc, coplug);
  30717. + if (result)
  30718. + /*
  30719. + * can not allocate memory to perform
  30720. + * compression, leave data uncompressed
  30721. + */
  30722. + goto cipher;
  30723. + result = grab_tfm_stream(inode, tc, OUTPUT_STREAM);
  30724. + if (result) {
  30725. + warning("edward-1425",
  30726. + "alloc stream failed with ret=%d, skipped compression",
  30727. + result);
  30728. + goto cipher;
  30729. + }
  30730. + dst_len = tfm_stream_size(tc, OUTPUT_STREAM);
  30731. + coplug->compress(get_coa(tc, coplug->h.id, tc->act),
  30732. + tfm_input_data(clust), tc->len,
  30733. + tfm_output_data(clust), &dst_len);
  30734. + /* make sure we didn't overwrite extra bytes */
  30735. + assert("edward-603",
  30736. + dst_len <= tfm_stream_size(tc, OUTPUT_STREAM));
  30737. +
  30738. + /* evaluate results of compression transform */
  30739. + if (save_compressed(tc->len, dst_len, inode)) {
  30740. + /* good result, accept */
  30741. + tc->len = dst_len;
  30742. + if (mplug->accept_hook != NULL) {
  30743. + result = mplug->accept_hook(inode, clust->index);
  30744. + if (result)
  30745. + warning("edward-1426",
  30746. + "accept_hook failed with ret=%d",
  30747. + result);
  30748. + }
  30749. + compressed = 1;
  30750. + }
  30751. + else {
  30752. + /* bad result, discard */
  30753. +#if 0
  30754. + if (cluster_is_complete(clust, inode))
  30755. + warning("edward-1496",
  30756. + "incompressible cluster %lu (inode %llu)",
  30757. + clust->index,
  30758. + (unsigned long long)get_inode_oid(inode));
  30759. +#endif
  30760. + if (mplug->discard_hook != NULL &&
  30761. + cluster_is_complete(clust, inode)) {
  30762. + result = mplug->discard_hook(inode,
  30763. + clust->index);
  30764. + if (result)
  30765. + warning("edward-1427",
  30766. + "discard_hook failed with ret=%d",
  30767. + result);
  30768. + }
  30769. + }
  30770. + }
  30771. + cipher:
  30772. + if (need_cipher(inode)) {
  30773. + cipher_plugin * ciplug;
  30774. + struct blkcipher_desc desc;
  30775. + struct scatterlist src;
  30776. + struct scatterlist dst;
  30777. +
  30778. + ciplug = inode_cipher_plugin(inode);
  30779. + desc.tfm = info_get_cipher(inode_crypto_info(inode));
  30780. + desc.flags = 0;
  30781. + if (compressed)
  30782. + alternate_streams(tc);
  30783. + result = grab_tfm_stream(inode, tc, OUTPUT_STREAM);
  30784. + if (result)
  30785. + return result;
  30786. +
  30787. + align_or_cut_overhead(inode, clust, WRITE_OP);
  30788. + sg_init_one(&src, tfm_input_data(clust), tc->len);
  30789. + sg_init_one(&dst, tfm_output_data(clust), tc->len);
  30790. +
  30791. + result = crypto_blkcipher_encrypt(&desc, &dst, &src, tc->len);
  30792. + if (result) {
  30793. + warning("edward-1405",
  30794. + "encryption failed flags=%x\n", desc.flags);
  30795. + return result;
  30796. + }
  30797. + encrypted = 1;
  30798. + }
  30799. + if (compressed && coplug->checksum != NULL)
  30800. + dc_set_checksum(coplug, tc);
  30801. + if (!compressed && !encrypted)
  30802. + alternate_streams(tc);
  30803. + return result;
  30804. +}
  30805. +
  30806. +/* Common inflate manager. */
  30807. +int reiser4_inflate_cluster(struct cluster_handle * clust, struct inode * inode)
  30808. +{
  30809. + int result = 0;
  30810. + int transformed = 0;
  30811. + struct tfm_cluster * tc = &clust->tc;
  30812. + compression_plugin * coplug;
  30813. +
  30814. + assert("edward-905", inode != NULL);
  30815. + assert("edward-1178", clust->dstat == PREP_DISK_CLUSTER);
  30816. + assert("edward-906", tfm_stream_is_set(&clust->tc, INPUT_STREAM));
  30817. + assert("edward-1349", tc->act == TFMA_READ);
  30818. + assert("edward-907", !tfm_cluster_is_uptodate(tc));
  30819. +
  30820. + /* Handle a checksum (if any) */
  30821. + coplug = inode_compression_plugin(inode);
  30822. + if (need_inflate(clust, inode, need_cipher(inode)) &&
  30823. + coplug->checksum != NULL) {
  30824. + result = dc_check_checksum(coplug, tc);
  30825. + if (unlikely(result)) {
  30826. + warning("edward-1460",
  30827. + "Inode %llu: disk cluster %lu looks corrupted",
  30828. + (unsigned long long)get_inode_oid(inode),
  30829. + clust->index);
  30830. + return RETERR(-EIO);
  30831. + }
  30832. + }
  30833. + if (need_cipher(inode)) {
  30834. + cipher_plugin * ciplug;
  30835. + struct blkcipher_desc desc;
  30836. + struct scatterlist src;
  30837. + struct scatterlist dst;
  30838. +
  30839. + ciplug = inode_cipher_plugin(inode);
  30840. + desc.tfm = info_get_cipher(inode_crypto_info(inode));
  30841. + desc.flags = 0;
  30842. + result = grab_tfm_stream(inode, tc, OUTPUT_STREAM);
  30843. + if (result)
  30844. + return result;
  30845. + assert("edward-909", tfm_cluster_is_set(tc));
  30846. +
  30847. + sg_init_one(&src, tfm_input_data(clust), tc->len);
  30848. + sg_init_one(&dst, tfm_output_data(clust), tc->len);
  30849. +
  30850. + result = crypto_blkcipher_decrypt(&desc, &dst, &src, tc->len);
  30851. + if (result) {
  30852. + warning("edward-1600", "decrypt failed flags=%x\n",
  30853. + desc.flags);
  30854. + return result;
  30855. + }
  30856. + align_or_cut_overhead(inode, clust, READ_OP);
  30857. + transformed = 1;
  30858. + }
  30859. + if (need_inflate(clust, inode, 0)) {
  30860. + size_t dst_len = inode_cluster_size(inode);
  30861. + if(transformed)
  30862. + alternate_streams(tc);
  30863. +
  30864. + result = grab_tfm_stream(inode, tc, OUTPUT_STREAM);
  30865. + if (result)
  30866. + return result;
  30867. + assert("edward-1305", coplug->decompress != NULL);
  30868. + assert("edward-910", tfm_cluster_is_set(tc));
  30869. +
  30870. + coplug->decompress(get_coa(tc, coplug->h.id, tc->act),
  30871. + tfm_input_data(clust), tc->len,
  30872. + tfm_output_data(clust), &dst_len);
  30873. + /* check length */
  30874. + tc->len = dst_len;
  30875. + assert("edward-157", dst_len == tc->lsize);
  30876. + transformed = 1;
  30877. + }
  30878. + if (!transformed)
  30879. + alternate_streams(tc);
  30880. + return result;
  30881. +}
  30882. +
  30883. +/* This is implementation of readpage method of struct
  30884. + address_space_operations for cryptcompress plugin. */
  30885. +int readpage_cryptcompress(struct file *file, struct page *page)
  30886. +{
  30887. + reiser4_context *ctx;
  30888. + struct cluster_handle clust;
  30889. + item_plugin *iplug;
  30890. + int result;
  30891. +
  30892. + assert("edward-88", PageLocked(page));
  30893. + assert("vs-976", !PageUptodate(page));
  30894. + assert("edward-89", page->mapping && page->mapping->host);
  30895. +
  30896. + ctx = reiser4_init_context(page->mapping->host->i_sb);
  30897. + if (IS_ERR(ctx)) {
  30898. + unlock_page(page);
  30899. + return PTR_ERR(ctx);
  30900. + }
  30901. + assert("edward-113",
  30902. + ergo(file != NULL,
  30903. + page->mapping == file_inode(file)->i_mapping));
  30904. +
  30905. + if (PageUptodate(page)) {
  30906. + warning("edward-1338", "page is already uptodate\n");
  30907. + unlock_page(page);
  30908. + reiser4_exit_context(ctx);
  30909. + return 0;
  30910. + }
  30911. + cluster_init_read(&clust, NULL);
  30912. + clust.file = file;
  30913. + iplug = item_plugin_by_id(CTAIL_ID);
  30914. + if (!iplug->s.file.readpage) {
  30915. + unlock_page(page);
  30916. + put_cluster_handle(&clust);
  30917. + reiser4_exit_context(ctx);
  30918. + return -EINVAL;
  30919. + }
  30920. + result = iplug->s.file.readpage(&clust, page);
  30921. +
  30922. + put_cluster_handle(&clust);
  30923. + reiser4_txn_restart(ctx);
  30924. + reiser4_exit_context(ctx);
  30925. + return result;
  30926. +}
  30927. +
  30928. +/* number of pages to check in */
  30929. +static int get_new_nrpages(struct cluster_handle * clust)
  30930. +{
  30931. + switch (clust->op) {
  30932. + case LC_APPOV:
  30933. + case LC_EXPAND:
  30934. + return clust->nr_pages;
  30935. + case LC_SHRINK:
  30936. + assert("edward-1179", clust->win != NULL);
  30937. + return size_in_pages(clust->win->off + clust->win->count);
  30938. + default:
  30939. + impossible("edward-1180", "bad page cluster option");
  30940. + return 0;
  30941. + }
  30942. +}
  30943. +
  30944. +static void set_cluster_pages_dirty(struct cluster_handle * clust,
  30945. + struct inode * inode)
  30946. +{
  30947. + int i;
  30948. + struct page *pg;
  30949. + int nrpages = get_new_nrpages(clust);
  30950. +
  30951. + for (i = 0; i < nrpages; i++) {
  30952. +
  30953. + pg = clust->pages[i];
  30954. + assert("edward-968", pg != NULL);
  30955. + lock_page(pg);
  30956. + assert("edward-1065", PageUptodate(pg));
  30957. + set_page_dirty_notag(pg);
  30958. + unlock_page(pg);
  30959. + mark_page_accessed(pg);
  30960. + }
  30961. +}
  30962. +
  30963. +/* Grab a page cluster for read/write operations.
  30964. + Attach a jnode for write operations (when preparing for modifications, which
  30965. + are supposed to be committed).
  30966. +
  30967. + We allocate only one jnode per page cluster; this jnode is binded to the
  30968. + first page of this cluster, so we have an extra-reference that will be put
  30969. + as soon as jnode is evicted from memory), other references will be cleaned
  30970. + up in flush time (assume that check in page cluster was successful).
  30971. +*/
  30972. +int grab_page_cluster(struct inode * inode,
  30973. + struct cluster_handle * clust, rw_op rw)
  30974. +{
  30975. + int i;
  30976. + int result = 0;
  30977. + jnode *node = NULL;
  30978. +
  30979. + assert("edward-182", clust != NULL);
  30980. + assert("edward-183", clust->pages != NULL);
  30981. + assert("edward-1466", clust->node == NULL);
  30982. + assert("edward-1428", inode != NULL);
  30983. + assert("edward-1429", inode->i_mapping != NULL);
  30984. + assert("edward-184", clust->nr_pages <= cluster_nrpages(inode));
  30985. +
  30986. + if (clust->nr_pages == 0)
  30987. + return 0;
  30988. +
  30989. + for (i = 0; i < clust->nr_pages; i++) {
  30990. +
  30991. + assert("edward-1044", clust->pages[i] == NULL);
  30992. +
  30993. + clust->pages[i] =
  30994. + find_or_create_page(inode->i_mapping,
  30995. + clust_to_pg(clust->index, inode) + i,
  30996. + reiser4_ctx_gfp_mask_get());
  30997. + if (!clust->pages[i]) {
  30998. + result = RETERR(-ENOMEM);
  30999. + break;
  31000. + }
  31001. + if (i == 0 && rw == WRITE_OP) {
  31002. + node = jnode_of_page(clust->pages[i]);
  31003. + if (IS_ERR(node)) {
  31004. + result = PTR_ERR(node);
  31005. + unlock_page(clust->pages[i]);
  31006. + break;
  31007. + }
  31008. + JF_SET(node, JNODE_CLUSTER_PAGE);
  31009. + assert("edward-920", jprivate(clust->pages[0]));
  31010. + }
  31011. + INODE_PGCOUNT_INC(inode);
  31012. + unlock_page(clust->pages[i]);
  31013. + }
  31014. + if (unlikely(result)) {
  31015. + while (i) {
  31016. + put_cluster_page(clust->pages[--i]);
  31017. + INODE_PGCOUNT_DEC(inode);
  31018. + }
  31019. + if (node && !IS_ERR(node))
  31020. + jput(node);
  31021. + return result;
  31022. + }
  31023. + clust->node = node;
  31024. + return 0;
  31025. +}
  31026. +
  31027. +static void truncate_page_cluster_range(struct inode * inode,
  31028. + struct page ** pages,
  31029. + cloff_t index,
  31030. + int from, int count,
  31031. + int even_cows)
  31032. +{
  31033. + assert("edward-1467", count > 0);
  31034. + reiser4_invalidate_pages(inode->i_mapping,
  31035. + clust_to_pg(index, inode) + from,
  31036. + count, even_cows);
  31037. +}
  31038. +
  31039. +/* Put @count pages starting from @from offset */
  31040. +void __put_page_cluster(int from, int count,
  31041. + struct page ** pages, struct inode * inode)
  31042. +{
  31043. + int i;
  31044. + assert("edward-1468", pages != NULL);
  31045. + assert("edward-1469", inode != NULL);
  31046. + assert("edward-1470", from >= 0 && count >= 0);
  31047. +
  31048. + for (i = 0; i < count; i++) {
  31049. + assert("edward-1471", pages[from + i] != NULL);
  31050. + assert("edward-1472",
  31051. + pages[from + i]->index == pages[from]->index + i);
  31052. +
  31053. + put_cluster_page(pages[from + i]);
  31054. + INODE_PGCOUNT_DEC(inode);
  31055. + }
  31056. +}
  31057. +
  31058. +/*
  31059. + * This is dual to grab_page_cluster,
  31060. + * however if @rw == WRITE_OP, then we call this function
  31061. + * only if something is failed before checkin page cluster.
  31062. + */
  31063. +void put_page_cluster(struct cluster_handle * clust,
  31064. + struct inode * inode, rw_op rw)
  31065. +{
  31066. + assert("edward-445", clust != NULL);
  31067. + assert("edward-922", clust->pages != NULL);
  31068. + assert("edward-446",
  31069. + ergo(clust->nr_pages != 0, clust->pages[0] != NULL));
  31070. +
  31071. + __put_page_cluster(0, clust->nr_pages, clust->pages, inode);
  31072. + if (rw == WRITE_OP) {
  31073. + if (unlikely(clust->node)) {
  31074. + assert("edward-447",
  31075. + clust->node == jprivate(clust->pages[0]));
  31076. + jput(clust->node);
  31077. + clust->node = NULL;
  31078. + }
  31079. + }
  31080. +}
  31081. +
  31082. +#if REISER4_DEBUG
  31083. +int cryptcompress_inode_ok(struct inode *inode)
  31084. +{
  31085. + if (!(reiser4_inode_data(inode)->plugin_mask & (1 << PSET_FILE)))
  31086. + return 0;
  31087. + if (!cluster_shift_ok(inode_cluster_shift(inode)))
  31088. + return 0;
  31089. + return 1;
  31090. +}
  31091. +
  31092. +static int window_ok(struct reiser4_slide * win, struct inode *inode)
  31093. +{
  31094. + assert("edward-1115", win != NULL);
  31095. + assert("edward-1116", ergo(win->delta, win->stat == HOLE_WINDOW));
  31096. +
  31097. + return (win->off != inode_cluster_size(inode)) &&
  31098. + (win->off + win->count + win->delta <= inode_cluster_size(inode));
  31099. +}
  31100. +
  31101. +static int cluster_ok(struct cluster_handle * clust, struct inode *inode)
  31102. +{
  31103. + assert("edward-279", clust != NULL);
  31104. +
  31105. + if (!clust->pages)
  31106. + return 0;
  31107. + return (clust->win ? window_ok(clust->win, inode) : 1);
  31108. +}
  31109. +#if 0
  31110. +static int pages_truncate_ok(struct inode *inode, pgoff_t start)
  31111. +{
  31112. + int found;
  31113. + struct page * page;
  31114. +
  31115. +
  31116. + found = find_get_pages(inode->i_mapping, &start, 1, &page);
  31117. + if (found)
  31118. + put_cluster_page(page);
  31119. + return !found;
  31120. +}
  31121. +#else
  31122. +#define pages_truncate_ok(inode, start) 1
  31123. +#endif
  31124. +
  31125. +static int jnode_truncate_ok(struct inode *inode, cloff_t index)
  31126. +{
  31127. + jnode *node;
  31128. + node = jlookup(current_tree, get_inode_oid(inode),
  31129. + clust_to_pg(index, inode));
  31130. + if (likely(!node))
  31131. + return 1;
  31132. + jput(node);
  31133. + return 0;
  31134. +}
  31135. +#endif
  31136. +
  31137. +/* guess next window stat */
  31138. +static inline window_stat next_window_stat(struct reiser4_slide * win)
  31139. +{
  31140. + assert("edward-1130", win != NULL);
  31141. + return ((win->stat == HOLE_WINDOW && win->delta == 0) ?
  31142. + HOLE_WINDOW : DATA_WINDOW);
  31143. +}
  31144. +
  31145. +/* guess and set next cluster index and window params */
  31146. +static void move_update_window(struct inode * inode,
  31147. + struct cluster_handle * clust,
  31148. + loff_t file_off, loff_t to_file)
  31149. +{
  31150. + struct reiser4_slide * win;
  31151. +
  31152. + assert("edward-185", clust != NULL);
  31153. + assert("edward-438", clust->pages != NULL);
  31154. + assert("edward-281", cluster_ok(clust, inode));
  31155. +
  31156. + win = clust->win;
  31157. + if (!win)
  31158. + return;
  31159. +
  31160. + switch (win->stat) {
  31161. + case DATA_WINDOW:
  31162. + /* increment */
  31163. + clust->index++;
  31164. + win->stat = DATA_WINDOW;
  31165. + win->off = 0;
  31166. + win->count = min((loff_t)inode_cluster_size(inode), to_file);
  31167. + break;
  31168. + case HOLE_WINDOW:
  31169. + switch (next_window_stat(win)) {
  31170. + case HOLE_WINDOW:
  31171. + /* skip */
  31172. + clust->index = off_to_clust(file_off, inode);
  31173. + win->stat = HOLE_WINDOW;
  31174. + win->off = 0;
  31175. + win->count = off_to_cloff(file_off, inode);
  31176. + win->delta = min((loff_t)(inode_cluster_size(inode) -
  31177. + win->count), to_file);
  31178. + break;
  31179. + case DATA_WINDOW:
  31180. + /* stay */
  31181. + win->stat = DATA_WINDOW;
  31182. + /* off+count+delta=inv */
  31183. + win->off = win->off + win->count;
  31184. + win->count = win->delta;
  31185. + win->delta = 0;
  31186. + break;
  31187. + default:
  31188. + impossible("edward-282", "wrong next window state");
  31189. + }
  31190. + break;
  31191. + default:
  31192. + impossible("edward-283", "wrong current window state");
  31193. + }
  31194. + assert("edward-1068", cluster_ok(clust, inode));
  31195. +}
  31196. +
  31197. +static int update_sd_cryptcompress(struct inode *inode)
  31198. +{
  31199. + int result = 0;
  31200. +
  31201. + assert("edward-978", reiser4_schedulable());
  31202. +
  31203. + result = reiser4_grab_space_force(/* one for stat data update */
  31204. + estimate_update_common(inode),
  31205. + BA_CAN_COMMIT);
  31206. + if (result)
  31207. + return result;
  31208. + if (!IS_NOCMTIME(inode))
  31209. + inode->i_ctime = inode->i_mtime = current_time(inode);
  31210. +
  31211. + result = reiser4_update_sd(inode);
  31212. +
  31213. + if (unlikely(result != 0))
  31214. + warning("edward-1573",
  31215. + "Can not update stat-data: %i. FSCK?",
  31216. + result);
  31217. + return result;
  31218. +}
  31219. +
  31220. +static void uncapture_cluster_jnode(jnode * node)
  31221. +{
  31222. + txn_atom *atom;
  31223. +
  31224. + assert_spin_locked(&(node->guard));
  31225. +
  31226. + atom = jnode_get_atom(node);
  31227. + if (atom == NULL) {
  31228. + assert("jmacd-7111", !JF_ISSET(node, JNODE_DIRTY));
  31229. + spin_unlock_jnode(node);
  31230. + return;
  31231. + }
  31232. + reiser4_uncapture_block(node);
  31233. + spin_unlock_atom(atom);
  31234. + jput(node);
  31235. +}
  31236. +
  31237. +static void put_found_pages(struct page **pages, int nr)
  31238. +{
  31239. + int i;
  31240. + for (i = 0; i < nr; i++) {
  31241. + assert("edward-1045", pages[i] != NULL);
  31242. + put_cluster_page(pages[i]);
  31243. + }
  31244. +}
  31245. +
  31246. +/* Lifecycle of a logical cluster in the system.
  31247. + *
  31248. + *
  31249. + * Logical cluster of a cryptcompress file is represented in the system by
  31250. + * . page cluster (in memory, primary cache, contains plain text);
  31251. + * . disk cluster (in memory, secondary cache, contains transformed text).
  31252. + * Primary cache is to reduce number of transform operations (compression,
  31253. + * encryption), i.e. to implement transform-caching strategy.
  31254. + * Secondary cache is to reduce number of I/O operations, i.e. for usual
  31255. + * write-caching strategy. Page cluster is a set of pages, i.e. mapping of
  31256. + * a logical cluster to the primary cache. Disk cluster is a set of items
  31257. + * of the same type defined by some reiser4 item plugin id.
  31258. + *
  31259. + * 1. Performing modifications
  31260. + *
  31261. + * Every modification of a cryptcompress file is considered as a set of
  31262. + * operations performed on file's logical clusters. Every such "atomic"
  31263. + * modification is truncate, append and(or) overwrite some bytes of a
  31264. + * logical cluster performed in the primary cache with the following
  31265. + * synchronization with the secondary cache (in flush time). Disk clusters,
  31266. + * which live in the secondary cache, are supposed to be synchronized with
  31267. + * disk. The mechanism of synchronization of primary and secondary caches
  31268. + * includes so-called checkin/checkout technique described below.
  31269. + *
  31270. + * 2. Submitting modifications
  31271. + *
  31272. + * Each page cluster has associated jnode (a special in-memory header to
  31273. + * keep a track of transactions in reiser4), which is attached to its first
  31274. + * page when grabbing page cluster for modifications (see grab_page_cluster).
  31275. + * Submitting modifications (see checkin_logical_cluster) is going per logical
  31276. + * cluster and includes:
  31277. + * . checkin_cluster_size;
  31278. + * . checkin_page_cluster.
  31279. + * checkin_cluster_size() is resolved to file size update (which completely
  31280. + * defines new size of logical cluster (number of file's bytes in a logical
  31281. + * cluster).
  31282. + * checkin_page_cluster() captures jnode of a page cluster and installs
  31283. + * jnode's dirty flag (if needed) to indicate that modifications are
  31284. + * successfully checked in.
  31285. + *
  31286. + * 3. Checking out modifications
  31287. + *
  31288. + * Is going per logical cluster in flush time (see checkout_logical_cluster).
  31289. + * This is the time of synchronizing primary and secondary caches.
  31290. + * checkout_logical_cluster() includes:
  31291. + * . checkout_page_cluster (retrieving checked in pages).
  31292. + * . uncapture jnode (including clear dirty flag and unlock)
  31293. + *
  31294. + * 4. Committing modifications
  31295. + *
  31296. + * Proceeding a synchronization of primary and secondary caches. When checking
  31297. + * out page cluster (the phase above) pages are locked/flushed/unlocked
  31298. + * one-by-one in ascending order of their indexes to contiguous stream, which
  31299. + * is supposed to be transformed (compressed, encrypted), chopped up into items
  31300. + * and committed to disk as a disk cluster.
  31301. + *
  31302. + * 5. Managing page references
  31303. + *
  31304. + * Every checked in page have a special additional "control" reference,
  31305. + * which is dropped at checkout. We need this to avoid unexpected evicting
  31306. + * pages from memory before checkout. Control references are managed so
  31307. + * they are not accumulated with every checkin:
  31308. + *
  31309. + * 0
  31310. + * checkin -> 1
  31311. + * 0 -> checkout
  31312. + * checkin -> 1
  31313. + * checkin -> 1
  31314. + * checkin -> 1
  31315. + * 0 -> checkout
  31316. + * ...
  31317. + *
  31318. + * Every page cluster has its own unique "cluster lock". Update/drop
  31319. + * references are serialized via this lock. Number of checked in cluster
  31320. + * pages is calculated by i_size under cluster lock. File size is updated
  31321. + * at every checkin action also under cluster lock (except cases of
  31322. + * appending/truncating fake logical clusters).
  31323. + *
  31324. + * Proof of correctness:
  31325. + *
  31326. + * Since we update file size under cluster lock, in the case of non-fake
  31327. + * logical cluster with its lock held we do have expected number of checked
  31328. + * in pages. On the other hand, append/truncate of fake logical clusters
  31329. + * doesn't change number of checked in pages of any cluster.
  31330. + *
  31331. + * NOTE-EDWARD: As cluster lock we use guard (spinlock_t) of its jnode.
  31332. + * Currently, I don't see any reason to create a special lock for those
  31333. + * needs.
  31334. + */
  31335. +
  31336. +static inline void lock_cluster(jnode * node)
  31337. +{
  31338. + spin_lock_jnode(node);
  31339. +}
  31340. +
  31341. +static inline void unlock_cluster(jnode * node)
  31342. +{
  31343. + spin_unlock_jnode(node);
  31344. +}
  31345. +
  31346. +static inline void unlock_cluster_uncapture(jnode * node)
  31347. +{
  31348. + uncapture_cluster_jnode(node);
  31349. +}
  31350. +
  31351. +/* Set new file size by window. Cluster lock is required. */
  31352. +static void checkin_file_size(struct cluster_handle * clust,
  31353. + struct inode * inode)
  31354. +{
  31355. + loff_t new_size;
  31356. + struct reiser4_slide * win;
  31357. +
  31358. + assert("edward-1181", clust != NULL);
  31359. + assert("edward-1182", inode != NULL);
  31360. + assert("edward-1473", clust->pages != NULL);
  31361. + assert("edward-1474", clust->pages[0] != NULL);
  31362. + assert("edward-1475", jprivate(clust->pages[0]) != NULL);
  31363. + assert_spin_locked(&(jprivate(clust->pages[0])->guard));
  31364. +
  31365. +
  31366. + win = clust->win;
  31367. + assert("edward-1183", win != NULL);
  31368. +
  31369. + new_size = clust_to_off(clust->index, inode) + win->off;
  31370. +
  31371. + switch (clust->op) {
  31372. + case LC_APPOV:
  31373. + case LC_EXPAND:
  31374. + if (new_size + win->count <= i_size_read(inode))
  31375. + /* overwrite only */
  31376. + return;
  31377. + new_size += win->count;
  31378. + break;
  31379. + case LC_SHRINK:
  31380. + break;
  31381. + default:
  31382. + impossible("edward-1184", "bad page cluster option");
  31383. + break;
  31384. + }
  31385. + inode_check_scale_nolock(inode, i_size_read(inode), new_size);
  31386. + i_size_write(inode, new_size);
  31387. + return;
  31388. +}
  31389. +
  31390. +static inline void checkin_cluster_size(struct cluster_handle * clust,
  31391. + struct inode * inode)
  31392. +{
  31393. + if (clust->win)
  31394. + checkin_file_size(clust, inode);
  31395. +}
  31396. +
  31397. +static int checkin_page_cluster(struct cluster_handle * clust,
  31398. + struct inode * inode)
  31399. +{
  31400. + int result;
  31401. + jnode * node;
  31402. + int old_nrpages = clust->old_nrpages;
  31403. + int new_nrpages = get_new_nrpages(clust);
  31404. +
  31405. + node = clust->node;
  31406. +
  31407. + assert("edward-221", node != NULL);
  31408. + assert("edward-971", clust->reserved == 1);
  31409. + assert("edward-1263",
  31410. + clust->reserved_prepped == estimate_update_cluster(inode));
  31411. + assert("edward-1264", clust->reserved_unprepped == 0);
  31412. +
  31413. + if (JF_ISSET(node, JNODE_DIRTY)) {
  31414. + /*
  31415. + * page cluster was checked in, but not yet
  31416. + * checked out, so release related resources
  31417. + */
  31418. + free_reserved4cluster(inode, clust,
  31419. + estimate_update_cluster(inode));
  31420. + __put_page_cluster(0, clust->old_nrpages,
  31421. + clust->pages, inode);
  31422. + } else {
  31423. + result = capture_cluster_jnode(node);
  31424. + if (unlikely(result)) {
  31425. + unlock_cluster(node);
  31426. + return result;
  31427. + }
  31428. + jnode_make_dirty_locked(node);
  31429. + clust->reserved = 0;
  31430. + }
  31431. + unlock_cluster(node);
  31432. +
  31433. + if (new_nrpages < old_nrpages) {
  31434. + /* truncate >= 1 complete pages */
  31435. + __put_page_cluster(new_nrpages,
  31436. + old_nrpages - new_nrpages,
  31437. + clust->pages, inode);
  31438. + truncate_page_cluster_range(inode,
  31439. + clust->pages, clust->index,
  31440. + new_nrpages,
  31441. + old_nrpages - new_nrpages,
  31442. + 0);
  31443. + }
  31444. +#if REISER4_DEBUG
  31445. + clust->reserved_prepped -= estimate_update_cluster(inode);
  31446. +#endif
  31447. + return 0;
  31448. +}
  31449. +
  31450. +/* Submit modifications of a logical cluster */
  31451. +static int checkin_logical_cluster(struct cluster_handle * clust,
  31452. + struct inode *inode)
  31453. +{
  31454. + int result = 0;
  31455. + jnode * node;
  31456. +
  31457. + node = clust->node;
  31458. +
  31459. + assert("edward-1035", node != NULL);
  31460. + assert("edward-1029", clust != NULL);
  31461. + assert("edward-1030", clust->reserved == 1);
  31462. + assert("edward-1031", clust->nr_pages != 0);
  31463. + assert("edward-1032", clust->pages != NULL);
  31464. + assert("edward-1033", clust->pages[0] != NULL);
  31465. + assert("edward-1446", jnode_is_cluster_page(node));
  31466. + assert("edward-1476", node == jprivate(clust->pages[0]));
  31467. +
  31468. + lock_cluster(node);
  31469. + checkin_cluster_size(clust, inode);
  31470. + /*
  31471. + * this will unlock the cluster
  31472. + */
  31473. + result = checkin_page_cluster(clust, inode);
  31474. + jput(node);
  31475. + clust->node = NULL;
  31476. + return result;
  31477. +}
  31478. +
  31479. +/*
  31480. + * Retrieve size of logical cluster that was checked in at
  31481. + * the latest modifying session (cluster lock is required)
  31482. + */
  31483. +static inline void checkout_cluster_size(struct cluster_handle * clust,
  31484. + struct inode * inode)
  31485. +{
  31486. + struct tfm_cluster *tc = &clust->tc;
  31487. +
  31488. + tc->len = lbytes(clust->index, inode);
  31489. + assert("edward-1478", tc->len != 0);
  31490. +}
  31491. +
  31492. +/*
  31493. + * Retrieve a page cluster with the latest submitted modifications
  31494. + * and flush its pages to previously allocated contiguous stream.
  31495. + */
  31496. +static void checkout_page_cluster(struct cluster_handle * clust,
  31497. + jnode * node, struct inode * inode)
  31498. +{
  31499. + int i;
  31500. + int found;
  31501. + int to_put;
  31502. + pgoff_t page_index = clust_to_pg(clust->index, inode);
  31503. + struct tfm_cluster *tc = &clust->tc;
  31504. +
  31505. + /* find and put checked in pages: cluster is locked,
  31506. + * so we must get expected number (to_put) of pages
  31507. + */
  31508. + to_put = size_in_pages(lbytes(clust->index, inode));
  31509. + found = find_get_pages(inode->i_mapping, &page_index,
  31510. + to_put, clust->pages);
  31511. + BUG_ON(found != to_put);
  31512. +
  31513. + __put_page_cluster(0, to_put, clust->pages, inode);
  31514. + unlock_cluster_uncapture(node);
  31515. +
  31516. + /* Flush found pages.
  31517. + *
  31518. + * Note, that we don't disable modifications while flushing,
  31519. + * moreover, some found pages can be truncated, as we have
  31520. + * released cluster lock.
  31521. + */
  31522. + for (i = 0; i < found; i++) {
  31523. + int in_page;
  31524. + char * data;
  31525. + assert("edward-1479",
  31526. + clust->pages[i]->index == clust->pages[0]->index + i);
  31527. +
  31528. + lock_page(clust->pages[i]);
  31529. + if (!PageUptodate(clust->pages[i])) {
  31530. + /* page was truncated */
  31531. + assert("edward-1480",
  31532. + i_size_read(inode) <= page_offset(clust->pages[i]));
  31533. + assert("edward-1481",
  31534. + clust->pages[i]->mapping != inode->i_mapping);
  31535. + unlock_page(clust->pages[i]);
  31536. + break;
  31537. + }
  31538. + /* Update the number of bytes in the logical cluster,
  31539. + * as it could be partially truncated. Note, that only
  31540. + * partial truncate is possible (complete truncate can
  31541. + * not go here, as it is performed via ->kill_hook()
  31542. + * called by cut_file_items(), and the last one must
  31543. + * wait for znode locked with parent coord).
  31544. + */
  31545. + checkout_cluster_size(clust, inode);
  31546. +
  31547. + /* this can be zero, as new file size is
  31548. + checked in before truncating pages */
  31549. + in_page = __mbp(tc->len, i);
  31550. +
  31551. + data = kmap_atomic(clust->pages[i]);
  31552. + memcpy(tfm_stream_data(tc, INPUT_STREAM) + pg_to_off(i),
  31553. + data, in_page);
  31554. + kunmap_atomic(data);
  31555. + /*
  31556. + * modifications have been checked out and will be
  31557. + * committed later. Anyway, the dirty status of the
  31558. + * page is no longer relevant. However, the uptodate
  31559. + * status of the page is still relevant!
  31560. + */
  31561. + if (PageDirty(clust->pages[i]))
  31562. + cancel_dirty_page(clust->pages[i]);
  31563. +
  31564. + unlock_page(clust->pages[i]);
  31565. +
  31566. + if (in_page < PAGE_SIZE)
  31567. + /* end of the file */
  31568. + break;
  31569. + }
  31570. + put_found_pages(clust->pages, found); /* find_get_pages */
  31571. + tc->lsize = tc->len;
  31572. + return;
  31573. +}
  31574. +
  31575. +/* Check out modifications of a logical cluster */
  31576. +int checkout_logical_cluster(struct cluster_handle * clust,
  31577. + jnode * node, struct inode *inode)
  31578. +{
  31579. + int result;
  31580. + struct tfm_cluster *tc = &clust->tc;
  31581. +
  31582. + assert("edward-980", node != NULL);
  31583. + assert("edward-236", inode != NULL);
  31584. + assert("edward-237", clust != NULL);
  31585. + assert("edward-240", !clust->win);
  31586. + assert("edward-241", reiser4_schedulable());
  31587. + assert("edward-718", cryptcompress_inode_ok(inode));
  31588. +
  31589. + result = grab_tfm_stream(inode, tc, INPUT_STREAM);
  31590. + if (result) {
  31591. + warning("edward-1430", "alloc stream failed with ret=%d",
  31592. + result);
  31593. + return RETERR(-E_REPEAT);
  31594. + }
  31595. + lock_cluster(node);
  31596. +
  31597. + if (unlikely(!JF_ISSET(node, JNODE_DIRTY))) {
  31598. + /* race with another flush */
  31599. + warning("edward-982",
  31600. + "checking out logical cluster %lu of inode %llu: "
  31601. + "jnode is not dirty", clust->index,
  31602. + (unsigned long long)get_inode_oid(inode));
  31603. + unlock_cluster(node);
  31604. + return RETERR(-E_REPEAT);
  31605. + }
  31606. + cluster_reserved2grabbed(estimate_update_cluster(inode));
  31607. +
  31608. + /* this will unlock cluster */
  31609. + checkout_page_cluster(clust, node, inode);
  31610. + return 0;
  31611. +}
  31612. +
  31613. +/* set hint for the cluster of the index @index */
  31614. +static void set_hint_cluster(struct inode *inode, hint_t * hint,
  31615. + cloff_t index, znode_lock_mode mode)
  31616. +{
  31617. + reiser4_key key;
  31618. + assert("edward-722", cryptcompress_inode_ok(inode));
  31619. + assert("edward-723",
  31620. + inode_file_plugin(inode) ==
  31621. + file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID));
  31622. +
  31623. + inode_file_plugin(inode)->key_by_inode(inode,
  31624. + clust_to_off(index, inode),
  31625. + &key);
  31626. +
  31627. + reiser4_seal_init(&hint->seal, &hint->ext_coord.coord, &key);
  31628. + hint->offset = get_key_offset(&key);
  31629. + hint->mode = mode;
  31630. +}
  31631. +
  31632. +void invalidate_hint_cluster(struct cluster_handle * clust)
  31633. +{
  31634. + assert("edward-1291", clust != NULL);
  31635. + assert("edward-1292", clust->hint != NULL);
  31636. +
  31637. + done_lh(&clust->hint->lh);
  31638. + hint_clr_valid(clust->hint);
  31639. +}
  31640. +
  31641. +static void put_hint_cluster(struct cluster_handle * clust,
  31642. + struct inode *inode, znode_lock_mode mode)
  31643. +{
  31644. + assert("edward-1286", clust != NULL);
  31645. + assert("edward-1287", clust->hint != NULL);
  31646. +
  31647. + set_hint_cluster(inode, clust->hint, clust->index + 1, mode);
  31648. + invalidate_hint_cluster(clust);
  31649. +}
  31650. +
  31651. +static int balance_dirty_page_cluster(struct cluster_handle * clust,
  31652. + struct inode *inode, loff_t off,
  31653. + loff_t to_file,
  31654. + int nr_dirtied)
  31655. +{
  31656. + int result;
  31657. + struct cryptcompress_info * info;
  31658. +
  31659. + assert("edward-724", inode != NULL);
  31660. + assert("edward-725", cryptcompress_inode_ok(inode));
  31661. + assert("edward-1547", nr_dirtied <= cluster_nrpages(inode));
  31662. +
  31663. + /* set next window params */
  31664. + move_update_window(inode, clust, off, to_file);
  31665. +
  31666. + result = update_sd_cryptcompress(inode);
  31667. + if (result)
  31668. + return result;
  31669. + assert("edward-726", clust->hint->lh.owner == NULL);
  31670. + info = cryptcompress_inode_data(inode);
  31671. +
  31672. + if (nr_dirtied == 0)
  31673. + return 0;
  31674. + mutex_unlock(&info->checkin_mutex);
  31675. + reiser4_throttle_write(inode);
  31676. + mutex_lock(&info->checkin_mutex);
  31677. + return 0;
  31678. +}
  31679. +
  31680. +/*
  31681. + * Check in part of a hole within a logical cluster
  31682. + */
  31683. +static int write_hole(struct inode *inode, struct cluster_handle * clust,
  31684. + loff_t file_off, loff_t to_file)
  31685. +{
  31686. + int result = 0;
  31687. + unsigned cl_off, cl_count = 0;
  31688. + unsigned to_pg, pg_off;
  31689. + struct reiser4_slide * win;
  31690. +
  31691. + assert("edward-190", clust != NULL);
  31692. + assert("edward-1069", clust->win != NULL);
  31693. + assert("edward-191", inode != NULL);
  31694. + assert("edward-727", cryptcompress_inode_ok(inode));
  31695. + assert("edward-1171", clust->dstat != INVAL_DISK_CLUSTER);
  31696. + assert("edward-1154",
  31697. + ergo(clust->dstat != FAKE_DISK_CLUSTER, clust->reserved == 1));
  31698. +
  31699. + win = clust->win;
  31700. +
  31701. + assert("edward-1070", win != NULL);
  31702. + assert("edward-201", win->stat == HOLE_WINDOW);
  31703. + assert("edward-192", cluster_ok(clust, inode));
  31704. +
  31705. + if (win->off == 0 && win->count == inode_cluster_size(inode)) {
  31706. + /*
  31707. + * This part of the hole occupies the whole logical
  31708. + * cluster, so it won't be represented by any items.
  31709. + * Nothing to submit.
  31710. + */
  31711. + move_update_window(inode, clust, file_off, to_file);
  31712. + return 0;
  31713. + }
  31714. + /*
  31715. + * This part of the hole starts not at logical cluster
  31716. + * boundary, so it has to be converted to zeros and written to disk
  31717. + */
  31718. + cl_count = win->count; /* number of zeroes to write */
  31719. + cl_off = win->off;
  31720. + pg_off = off_to_pgoff(win->off);
  31721. +
  31722. + while (cl_count) {
  31723. + struct page *page;
  31724. + page = clust->pages[off_to_pg(cl_off)];
  31725. +
  31726. + assert("edward-284", page != NULL);
  31727. +
  31728. + to_pg = min((typeof(pg_off))PAGE_SIZE - pg_off, cl_count);
  31729. + lock_page(page);
  31730. + zero_user(page, pg_off, to_pg);
  31731. + SetPageUptodate(page);
  31732. + set_page_dirty_notag(page);
  31733. + mark_page_accessed(page);
  31734. + unlock_page(page);
  31735. +
  31736. + cl_off += to_pg;
  31737. + cl_count -= to_pg;
  31738. + pg_off = 0;
  31739. + }
  31740. + if (win->delta == 0) {
  31741. + /* only zeroes in this window, try to capture
  31742. + */
  31743. + result = checkin_logical_cluster(clust, inode);
  31744. + if (result)
  31745. + return result;
  31746. + put_hint_cluster(clust, inode, ZNODE_WRITE_LOCK);
  31747. + result = balance_dirty_page_cluster(clust,
  31748. + inode, file_off, to_file,
  31749. + win_count_to_nrpages(win));
  31750. + } else
  31751. + move_update_window(inode, clust, file_off, to_file);
  31752. + return result;
  31753. +}
  31754. +
  31755. +/*
  31756. + The main disk search procedure for cryptcompress plugin, which
  31757. + . scans all items of disk cluster with the lock mode @mode
  31758. + . maybe reads each one (if @read)
  31759. + . maybe makes its znode dirty (if write lock mode was specified)
  31760. +
  31761. + NOTE-EDWARD: Callers should handle the case when disk cluster
  31762. + is incomplete (-EIO)
  31763. +*/
  31764. +int find_disk_cluster(struct cluster_handle * clust,
  31765. + struct inode *inode, int read, znode_lock_mode mode)
  31766. +{
  31767. + flow_t f;
  31768. + hint_t *hint;
  31769. + int result = 0;
  31770. + int was_grabbed;
  31771. + ra_info_t ra_info;
  31772. + file_plugin *fplug;
  31773. + item_plugin *iplug;
  31774. + struct tfm_cluster *tc;
  31775. + struct cryptcompress_info * info;
  31776. +
  31777. + assert("edward-138", clust != NULL);
  31778. + assert("edward-728", clust->hint != NULL);
  31779. + assert("edward-226", reiser4_schedulable());
  31780. + assert("edward-137", inode != NULL);
  31781. + assert("edward-729", cryptcompress_inode_ok(inode));
  31782. +
  31783. + hint = clust->hint;
  31784. + fplug = inode_file_plugin(inode);
  31785. + was_grabbed = get_current_context()->grabbed_blocks;
  31786. + info = cryptcompress_inode_data(inode);
  31787. + tc = &clust->tc;
  31788. +
  31789. + assert("edward-462", !tfm_cluster_is_uptodate(tc));
  31790. + assert("edward-461", ergo(read, tfm_stream_is_set(tc, INPUT_STREAM)));
  31791. +
  31792. + dclust_init_extension(hint);
  31793. +
  31794. + /* set key of the first disk cluster item */
  31795. + fplug->flow_by_inode(inode,
  31796. + (read ? (char __user *)tfm_stream_data(tc, INPUT_STREAM) : NULL),
  31797. + 0 /* kernel space */ ,
  31798. + inode_scaled_cluster_size(inode),
  31799. + clust_to_off(clust->index, inode), READ_OP, &f);
  31800. + if (mode == ZNODE_WRITE_LOCK) {
  31801. + /* reserve for flush to make dirty all the leaf nodes
  31802. + which contain disk cluster */
  31803. + result =
  31804. + reiser4_grab_space_force(estimate_dirty_cluster(inode),
  31805. + BA_CAN_COMMIT);
  31806. + if (result)
  31807. + goto out;
  31808. + }
  31809. +
  31810. + ra_info.key_to_stop = f.key;
  31811. + set_key_offset(&ra_info.key_to_stop, get_key_offset(reiser4_max_key()));
  31812. +
  31813. + while (f.length) {
  31814. + result = find_cluster_item(hint, &f.key, mode,
  31815. + NULL, FIND_EXACT,
  31816. + (mode == ZNODE_WRITE_LOCK ?
  31817. + CBK_FOR_INSERT : 0));
  31818. + switch (result) {
  31819. + case CBK_COORD_NOTFOUND:
  31820. + result = 0;
  31821. + if (inode_scaled_offset
  31822. + (inode, clust_to_off(clust->index, inode)) ==
  31823. + get_key_offset(&f.key)) {
  31824. + /* first item not found, this is treated
  31825. + as disk cluster is absent */
  31826. + clust->dstat = FAKE_DISK_CLUSTER;
  31827. + goto out;
  31828. + }
  31829. + /* we are outside the cluster, stop search here */
  31830. + assert("edward-146",
  31831. + f.length != inode_scaled_cluster_size(inode));
  31832. + goto ok;
  31833. + case CBK_COORD_FOUND:
  31834. + assert("edward-148",
  31835. + hint->ext_coord.coord.between == AT_UNIT);
  31836. + assert("edward-460",
  31837. + hint->ext_coord.coord.unit_pos == 0);
  31838. +
  31839. + coord_clear_iplug(&hint->ext_coord.coord);
  31840. + result = zload_ra(hint->ext_coord.coord.node, &ra_info);
  31841. + if (unlikely(result))
  31842. + goto out;
  31843. + iplug = item_plugin_by_coord(&hint->ext_coord.coord);
  31844. + assert("edward-147",
  31845. + item_id_by_coord(&hint->ext_coord.coord) ==
  31846. + CTAIL_ID);
  31847. +
  31848. + result = iplug->s.file.read(NULL, &f, hint);
  31849. + if (result) {
  31850. + zrelse(hint->ext_coord.coord.node);
  31851. + goto out;
  31852. + }
  31853. + if (mode == ZNODE_WRITE_LOCK) {
  31854. + /* Don't make dirty more nodes then it was
  31855. + estimated (see comments before
  31856. + estimate_dirty_cluster). Missed nodes will be
  31857. + read up in flush time if they are evicted from
  31858. + memory */
  31859. + if (dclust_get_extension_ncount(hint) <=
  31860. + estimate_dirty_cluster(inode))
  31861. + znode_make_dirty(hint->ext_coord.coord.node);
  31862. +
  31863. + znode_set_convertible(hint->ext_coord.coord.
  31864. + node);
  31865. + }
  31866. + zrelse(hint->ext_coord.coord.node);
  31867. + break;
  31868. + default:
  31869. + goto out;
  31870. + }
  31871. + }
  31872. + ok:
  31873. + /* at least one item was found */
  31874. + /* NOTE-EDWARD: Callers should handle the case
  31875. + when disk cluster is incomplete (-EIO) */
  31876. + tc->len = inode_scaled_cluster_size(inode) - f.length;
  31877. + tc->lsize = lbytes(clust->index, inode);
  31878. + assert("edward-1196", tc->len > 0);
  31879. + assert("edward-1406", tc->lsize > 0);
  31880. +
  31881. + if (hint_is_unprepped_dclust(clust->hint)) {
  31882. + clust->dstat = UNPR_DISK_CLUSTER;
  31883. + } else if (clust->index == info->trunc_index) {
  31884. + clust->dstat = TRNC_DISK_CLUSTER;
  31885. + } else {
  31886. + clust->dstat = PREP_DISK_CLUSTER;
  31887. + dclust_set_extension_dsize(clust->hint, tc->len);
  31888. + }
  31889. + out:
  31890. + assert("edward-1339",
  31891. + get_current_context()->grabbed_blocks >= was_grabbed);
  31892. + grabbed2free(get_current_context(),
  31893. + get_current_super_private(),
  31894. + get_current_context()->grabbed_blocks - was_grabbed);
  31895. + return result;
  31896. +}
  31897. +
  31898. +int get_disk_cluster_locked(struct cluster_handle * clust, struct inode *inode,
  31899. + znode_lock_mode lock_mode)
  31900. +{
  31901. + reiser4_key key;
  31902. + ra_info_t ra_info;
  31903. +
  31904. + assert("edward-730", reiser4_schedulable());
  31905. + assert("edward-731", clust != NULL);
  31906. + assert("edward-732", inode != NULL);
  31907. +
  31908. + if (hint_is_valid(clust->hint)) {
  31909. + assert("edward-1293", clust->dstat != INVAL_DISK_CLUSTER);
  31910. + assert("edward-1294",
  31911. + znode_is_write_locked(clust->hint->lh.node));
  31912. + /* already have a valid locked position */
  31913. + return (clust->dstat ==
  31914. + FAKE_DISK_CLUSTER ? CBK_COORD_NOTFOUND :
  31915. + CBK_COORD_FOUND);
  31916. + }
  31917. + key_by_inode_cryptcompress(inode, clust_to_off(clust->index, inode),
  31918. + &key);
  31919. + ra_info.key_to_stop = key;
  31920. + set_key_offset(&ra_info.key_to_stop, get_key_offset(reiser4_max_key()));
  31921. +
  31922. + return find_cluster_item(clust->hint, &key, lock_mode, NULL, FIND_EXACT,
  31923. + CBK_FOR_INSERT);
  31924. +}
  31925. +
  31926. +/* Read needed cluster pages before modifying.
  31927. + If success, @clust->hint contains locked position in the tree.
  31928. + Also:
  31929. + . find and set disk cluster state
  31930. + . make disk cluster dirty if its state is not FAKE_DISK_CLUSTER.
  31931. +*/
  31932. +static int read_some_cluster_pages(struct inode * inode,
  31933. + struct cluster_handle * clust)
  31934. +{
  31935. + int i;
  31936. + int result = 0;
  31937. + item_plugin *iplug;
  31938. + struct reiser4_slide * win = clust->win;
  31939. + znode_lock_mode mode = ZNODE_WRITE_LOCK;
  31940. +
  31941. + iplug = item_plugin_by_id(CTAIL_ID);
  31942. +
  31943. + assert("edward-924", !tfm_cluster_is_uptodate(&clust->tc));
  31944. +
  31945. +#if REISER4_DEBUG
  31946. + if (clust->nr_pages == 0) {
  31947. + /* start write hole from fake disk cluster */
  31948. + assert("edward-1117", win != NULL);
  31949. + assert("edward-1118", win->stat == HOLE_WINDOW);
  31950. + assert("edward-1119", new_logical_cluster(clust, inode));
  31951. + }
  31952. +#endif
  31953. + if (new_logical_cluster(clust, inode)) {
  31954. + /*
  31955. + new page cluster is about to be written, nothing to read,
  31956. + */
  31957. + assert("edward-734", reiser4_schedulable());
  31958. + assert("edward-735", clust->hint->lh.owner == NULL);
  31959. +
  31960. + if (clust->nr_pages) {
  31961. + int off;
  31962. + struct page * pg;
  31963. + assert("edward-1419", clust->pages != NULL);
  31964. + pg = clust->pages[clust->nr_pages - 1];
  31965. + assert("edward-1420", pg != NULL);
  31966. + off = off_to_pgoff(win->off+win->count+win->delta);
  31967. + if (off) {
  31968. + lock_page(pg);
  31969. + zero_user_segment(pg, off, PAGE_SIZE);
  31970. + unlock_page(pg);
  31971. + }
  31972. + }
  31973. + clust->dstat = FAKE_DISK_CLUSTER;
  31974. + return 0;
  31975. + }
  31976. + /*
  31977. + Here we should search for disk cluster to figure out its real state.
  31978. + Also there is one more important reason to do disk search: we need
  31979. + to make disk cluster _dirty_ if it exists
  31980. + */
  31981. +
  31982. + /* if windows is specified, read the only pages
  31983. + that will be modified partially */
  31984. +
  31985. + for (i = 0; i < clust->nr_pages; i++) {
  31986. + struct page *pg = clust->pages[i];
  31987. +
  31988. + lock_page(pg);
  31989. + if (PageUptodate(pg)) {
  31990. + unlock_page(pg);
  31991. + continue;
  31992. + }
  31993. + unlock_page(pg);
  31994. +
  31995. + if (win &&
  31996. + i >= size_in_pages(win->off) &&
  31997. + i < off_to_pg(win->off + win->count + win->delta))
  31998. + /* page will be completely overwritten */
  31999. + continue;
  32000. +
  32001. + if (win && (i == clust->nr_pages - 1) &&
  32002. + /* the last page is
  32003. + partially modified,
  32004. + not uptodate .. */
  32005. + (size_in_pages(i_size_read(inode)) <= pg->index)) {
  32006. + /* .. and appended,
  32007. + so set zeroes to the rest */
  32008. + int offset;
  32009. + lock_page(pg);
  32010. + assert("edward-1260",
  32011. + size_in_pages(win->off + win->count +
  32012. + win->delta) - 1 == i);
  32013. +
  32014. + offset =
  32015. + off_to_pgoff(win->off + win->count + win->delta);
  32016. + zero_user_segment(pg, offset, PAGE_SIZE);
  32017. + unlock_page(pg);
  32018. + /* still not uptodate */
  32019. + break;
  32020. + }
  32021. + lock_page(pg);
  32022. + result = do_readpage_ctail(inode, clust, pg, mode);
  32023. +
  32024. + assert("edward-1526", ergo(!result, PageUptodate(pg)));
  32025. + unlock_page(pg);
  32026. + if (result) {
  32027. + warning("edward-219", "do_readpage_ctail failed");
  32028. + goto out;
  32029. + }
  32030. + }
  32031. + if (!tfm_cluster_is_uptodate(&clust->tc)) {
  32032. + /* disk cluster unclaimed, but we need to make its znodes dirty
  32033. + * to make flush update convert its content
  32034. + */
  32035. + result = find_disk_cluster(clust, inode,
  32036. + 0 /* do not read items */,
  32037. + mode);
  32038. + }
  32039. + out:
  32040. + tfm_cluster_clr_uptodate(&clust->tc);
  32041. + return result;
  32042. +}
  32043. +
  32044. +static int should_create_unprepped_cluster(struct cluster_handle * clust,
  32045. + struct inode * inode)
  32046. +{
  32047. + assert("edward-737", clust != NULL);
  32048. +
  32049. + switch (clust->dstat) {
  32050. + case PREP_DISK_CLUSTER:
  32051. + case UNPR_DISK_CLUSTER:
  32052. + return 0;
  32053. + case FAKE_DISK_CLUSTER:
  32054. + if (clust->win &&
  32055. + clust->win->stat == HOLE_WINDOW && clust->nr_pages == 0) {
  32056. + assert("edward-1172",
  32057. + new_logical_cluster(clust, inode));
  32058. + return 0;
  32059. + }
  32060. + return 1;
  32061. + default:
  32062. + impossible("edward-1173", "bad disk cluster state");
  32063. + return 0;
  32064. + }
  32065. +}
  32066. +
  32067. +static int cryptcompress_make_unprepped_cluster(struct cluster_handle * clust,
  32068. + struct inode *inode)
  32069. +{
  32070. + int result;
  32071. +
  32072. + assert("edward-1123", reiser4_schedulable());
  32073. + assert("edward-737", clust != NULL);
  32074. + assert("edward-738", inode != NULL);
  32075. + assert("edward-739", cryptcompress_inode_ok(inode));
  32076. + assert("edward-1053", clust->hint != NULL);
  32077. +
  32078. + if (!should_create_unprepped_cluster(clust, inode)) {
  32079. + if (clust->reserved) {
  32080. + cluster_reserved2free(estimate_insert_cluster(inode));
  32081. +#if REISER4_DEBUG
  32082. + assert("edward-1267",
  32083. + clust->reserved_unprepped ==
  32084. + estimate_insert_cluster(inode));
  32085. + clust->reserved_unprepped -=
  32086. + estimate_insert_cluster(inode);
  32087. +#endif
  32088. + }
  32089. + return 0;
  32090. + }
  32091. + assert("edward-1268", clust->reserved);
  32092. + cluster_reserved2grabbed(estimate_insert_cluster(inode));
  32093. +#if REISER4_DEBUG
  32094. + assert("edward-1441",
  32095. + clust->reserved_unprepped == estimate_insert_cluster(inode));
  32096. + clust->reserved_unprepped -= estimate_insert_cluster(inode);
  32097. +#endif
  32098. + result = ctail_insert_unprepped_cluster(clust, inode);
  32099. + if (result)
  32100. + return result;
  32101. +
  32102. + inode_add_bytes(inode, inode_cluster_size(inode));
  32103. +
  32104. + assert("edward-743", cryptcompress_inode_ok(inode));
  32105. + assert("edward-744", znode_is_write_locked(clust->hint->lh.node));
  32106. +
  32107. + clust->dstat = UNPR_DISK_CLUSTER;
  32108. + return 0;
  32109. +}
  32110. +
  32111. +/* . Grab page cluster for read, write, setattr, etc. operations;
  32112. + * . Truncate its complete pages, if needed;
  32113. + */
  32114. +int prepare_page_cluster(struct inode * inode, struct cluster_handle * clust,
  32115. + rw_op rw)
  32116. +{
  32117. + assert("edward-177", inode != NULL);
  32118. + assert("edward-741", cryptcompress_inode_ok(inode));
  32119. + assert("edward-740", clust->pages != NULL);
  32120. +
  32121. + set_cluster_nrpages(clust, inode);
  32122. + reset_cluster_pgset(clust, cluster_nrpages(inode));
  32123. + return grab_page_cluster(inode, clust, rw);
  32124. +}
  32125. +
  32126. +/* Truncate complete page cluster of index @index.
  32127. + * This is called by ->kill_hook() method of item
  32128. + * plugin when deleting a disk cluster of such index.
  32129. + */
  32130. +void truncate_complete_page_cluster(struct inode *inode, cloff_t index,
  32131. + int even_cows)
  32132. +{
  32133. + int found;
  32134. + int nr_pages;
  32135. + jnode *node;
  32136. + pgoff_t page_index = clust_to_pg(index, inode);
  32137. + struct page *pages[MAX_CLUSTER_NRPAGES];
  32138. +
  32139. + node = jlookup(current_tree, get_inode_oid(inode),
  32140. + clust_to_pg(index, inode));
  32141. + nr_pages = size_in_pages(lbytes(index, inode));
  32142. + assert("edward-1483", nr_pages != 0);
  32143. + if (!node)
  32144. + goto truncate;
  32145. + found = find_get_pages(inode->i_mapping, &page_index,
  32146. + cluster_nrpages(inode), pages);
  32147. + if (!found) {
  32148. + assert("edward-1484", jnode_truncate_ok(inode, index));
  32149. + return;
  32150. + }
  32151. + lock_cluster(node);
  32152. +
  32153. + if (reiser4_inode_get_flag(inode, REISER4_FILE_CONV_IN_PROGRESS)
  32154. + && index == 0)
  32155. + /* converting to unix_file is in progress */
  32156. + JF_CLR(node, JNODE_CLUSTER_PAGE);
  32157. + if (JF_ISSET(node, JNODE_DIRTY)) {
  32158. + /*
  32159. + * @nr_pages were checked in, but not yet checked out -
  32160. + * we need to release them. (also there can be pages
  32161. + * attached to page cache by read(), etc. - don't take
  32162. + * them into account).
  32163. + */
  32164. + assert("edward-1198", found >= nr_pages);
  32165. +
  32166. + /* free disk space grabbed for disk cluster converting */
  32167. + cluster_reserved2grabbed(estimate_update_cluster(inode));
  32168. + grabbed2free(get_current_context(),
  32169. + get_current_super_private(),
  32170. + estimate_update_cluster(inode));
  32171. + __put_page_cluster(0, nr_pages, pages, inode);
  32172. +
  32173. + /* This will clear dirty bit, uncapture and unlock jnode */
  32174. + unlock_cluster_uncapture(node);
  32175. + } else
  32176. + unlock_cluster(node);
  32177. + jput(node); /* jlookup */
  32178. + put_found_pages(pages, found); /* find_get_pages */
  32179. + truncate:
  32180. + if (reiser4_inode_get_flag(inode, REISER4_FILE_CONV_IN_PROGRESS) &&
  32181. + index == 0)
  32182. + return;
  32183. + truncate_page_cluster_range(inode, pages, index, 0,
  32184. + cluster_nrpages(inode),
  32185. + even_cows);
  32186. + assert("edward-1201",
  32187. + ergo(!reiser4_inode_get_flag(inode,
  32188. + REISER4_FILE_CONV_IN_PROGRESS),
  32189. + jnode_truncate_ok(inode, index)));
  32190. + return;
  32191. +}
  32192. +
  32193. +/*
  32194. + * Set cluster handle @clust of a logical cluster before
  32195. + * modifications which are supposed to be committed.
  32196. + *
  32197. + * . grab cluster pages;
  32198. + * . reserve disk space;
  32199. + * . maybe read pages from disk and set the disk cluster dirty;
  32200. + * . maybe write hole and check in (partially zeroed) logical cluster;
  32201. + * . create 'unprepped' disk cluster for new or fake logical one.
  32202. + */
  32203. +static int prepare_logical_cluster(struct inode *inode,
  32204. + loff_t file_off, /* write position
  32205. + in the file */
  32206. + loff_t to_file, /* bytes of users data
  32207. + to write to the file */
  32208. + struct cluster_handle * clust,
  32209. + logical_cluster_op op)
  32210. +{
  32211. + int result = 0;
  32212. + struct reiser4_slide * win = clust->win;
  32213. +
  32214. + reset_cluster_params(clust);
  32215. + cluster_set_tfm_act(&clust->tc, TFMA_READ);
  32216. +#if REISER4_DEBUG
  32217. + clust->ctx = get_current_context();
  32218. +#endif
  32219. + assert("edward-1190", op != LC_INVAL);
  32220. +
  32221. + clust->op = op;
  32222. +
  32223. + result = prepare_page_cluster(inode, clust, WRITE_OP);
  32224. + if (result)
  32225. + return result;
  32226. + assert("edward-1447",
  32227. + ergo(clust->nr_pages != 0, jprivate(clust->pages[0])));
  32228. + assert("edward-1448",
  32229. + ergo(clust->nr_pages != 0,
  32230. + jnode_is_cluster_page(jprivate(clust->pages[0]))));
  32231. +
  32232. + result = reserve4cluster(inode, clust);
  32233. + if (result)
  32234. + goto out;
  32235. +
  32236. + result = read_some_cluster_pages(inode, clust);
  32237. +
  32238. + if (result ||
  32239. + /*
  32240. + * don't submit data modifications
  32241. + * when expanding or shrinking holes
  32242. + */
  32243. + (op == LC_SHRINK && clust->dstat == FAKE_DISK_CLUSTER) ||
  32244. + (op == LC_EXPAND && clust->dstat == FAKE_DISK_CLUSTER)){
  32245. + free_reserved4cluster(inode,
  32246. + clust,
  32247. + estimate_update_cluster(inode) +
  32248. + estimate_insert_cluster(inode));
  32249. + goto out;
  32250. + }
  32251. + assert("edward-1124", clust->dstat != INVAL_DISK_CLUSTER);
  32252. +
  32253. + result = cryptcompress_make_unprepped_cluster(clust, inode);
  32254. + if (result)
  32255. + goto error;
  32256. + if (win && win->stat == HOLE_WINDOW) {
  32257. + result = write_hole(inode, clust, file_off, to_file);
  32258. + if (result)
  32259. + goto error;
  32260. + }
  32261. + return 0;
  32262. + error:
  32263. + free_reserved4cluster(inode, clust,
  32264. + estimate_update_cluster(inode));
  32265. + out:
  32266. + put_page_cluster(clust, inode, WRITE_OP);
  32267. + return result;
  32268. +}
  32269. +
  32270. +/* set window by two offsets */
  32271. +static void set_window(struct cluster_handle * clust,
  32272. + struct reiser4_slide * win, struct inode *inode,
  32273. + loff_t o1, loff_t o2)
  32274. +{
  32275. + assert("edward-295", clust != NULL);
  32276. + assert("edward-296", inode != NULL);
  32277. + assert("edward-1071", win != NULL);
  32278. + assert("edward-297", o1 <= o2);
  32279. +
  32280. + clust->index = off_to_clust(o1, inode);
  32281. +
  32282. + win->off = off_to_cloff(o1, inode);
  32283. + win->count = min((loff_t)(inode_cluster_size(inode) - win->off),
  32284. + o2 - o1);
  32285. + win->delta = 0;
  32286. +
  32287. + clust->win = win;
  32288. +}
  32289. +
  32290. +static int set_window_and_cluster(struct inode *inode,
  32291. + struct cluster_handle * clust,
  32292. + struct reiser4_slide * win, size_t length,
  32293. + loff_t file_off)
  32294. +{
  32295. + int result;
  32296. +
  32297. + assert("edward-197", clust != NULL);
  32298. + assert("edward-1072", win != NULL);
  32299. + assert("edward-198", inode != NULL);
  32300. +
  32301. + result = alloc_cluster_pgset(clust, cluster_nrpages(inode));
  32302. + if (result)
  32303. + return result;
  32304. +
  32305. + if (file_off > i_size_read(inode)) {
  32306. + /* Uhmm, hole in cryptcompress file... */
  32307. + loff_t hole_size;
  32308. + hole_size = file_off - inode->i_size;
  32309. +
  32310. + set_window(clust, win, inode, inode->i_size, file_off);
  32311. + win->stat = HOLE_WINDOW;
  32312. + if (win->off + hole_size < inode_cluster_size(inode))
  32313. + /* there is also user's data to append to the hole */
  32314. + win->delta = min(inode_cluster_size(inode) -
  32315. + (win->off + win->count), length);
  32316. + return 0;
  32317. + }
  32318. + set_window(clust, win, inode, file_off, file_off + length);
  32319. + win->stat = DATA_WINDOW;
  32320. + return 0;
  32321. +}
  32322. +
  32323. +int set_cluster_by_page(struct cluster_handle * clust, struct page * page,
  32324. + int count)
  32325. +{
  32326. + int result = 0;
  32327. + int (*setting_actor)(struct cluster_handle * clust, int count);
  32328. +
  32329. + assert("edward-1358", clust != NULL);
  32330. + assert("edward-1359", page != NULL);
  32331. + assert("edward-1360", page->mapping != NULL);
  32332. + assert("edward-1361", page->mapping->host != NULL);
  32333. +
  32334. + setting_actor =
  32335. + (clust->pages ? reset_cluster_pgset : alloc_cluster_pgset);
  32336. + result = setting_actor(clust, count);
  32337. + clust->index = pg_to_clust(page->index, page->mapping->host);
  32338. + return result;
  32339. +}
  32340. +
  32341. +/* reset all the params that not get updated */
  32342. +void reset_cluster_params(struct cluster_handle * clust)
  32343. +{
  32344. + assert("edward-197", clust != NULL);
  32345. +
  32346. + clust->dstat = INVAL_DISK_CLUSTER;
  32347. + clust->tc.uptodate = 0;
  32348. + clust->tc.len = 0;
  32349. +}
  32350. +
  32351. +/* the heart of write_cryptcompress */
  32352. +static loff_t do_write_cryptcompress(struct file *file, struct inode *inode,
  32353. + const char __user *buf, size_t to_write,
  32354. + loff_t pos, struct dispatch_context *cont)
  32355. +{
  32356. + int i;
  32357. + hint_t *hint;
  32358. + int result = 0;
  32359. + size_t count;
  32360. + struct reiser4_slide win;
  32361. + struct cluster_handle clust;
  32362. + struct cryptcompress_info * info;
  32363. +
  32364. + assert("edward-154", buf != NULL);
  32365. + assert("edward-161", reiser4_schedulable());
  32366. + assert("edward-748", cryptcompress_inode_ok(inode));
  32367. + assert("edward-159", current_blocksize == PAGE_SIZE);
  32368. + assert("edward-1274", get_current_context()->grabbed_blocks == 0);
  32369. +
  32370. + hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
  32371. + if (hint == NULL)
  32372. + return RETERR(-ENOMEM);
  32373. +
  32374. + result = load_file_hint(file, hint);
  32375. + if (result) {
  32376. + kfree(hint);
  32377. + return result;
  32378. + }
  32379. + count = to_write;
  32380. +
  32381. + reiser4_slide_init(&win);
  32382. + cluster_init_read(&clust, &win);
  32383. + clust.hint = hint;
  32384. + info = cryptcompress_inode_data(inode);
  32385. +
  32386. + mutex_lock(&info->checkin_mutex);
  32387. +
  32388. + result = set_window_and_cluster(inode, &clust, &win, to_write, pos);
  32389. + if (result)
  32390. + goto out;
  32391. +
  32392. + if (next_window_stat(&win) == HOLE_WINDOW) {
  32393. + /* write hole in this iteration
  32394. + separated from the loop below */
  32395. + result = write_dispatch_hook(file, inode,
  32396. + pos, &clust, cont);
  32397. + if (result)
  32398. + goto out;
  32399. + result = prepare_logical_cluster(inode, pos, count, &clust,
  32400. + LC_APPOV);
  32401. + if (result)
  32402. + goto out;
  32403. + }
  32404. + do {
  32405. + const char __user * src;
  32406. + unsigned page_off, to_page;
  32407. +
  32408. + assert("edward-750", reiser4_schedulable());
  32409. +
  32410. + result = write_dispatch_hook(file, inode,
  32411. + pos + to_write - count,
  32412. + &clust, cont);
  32413. + if (result)
  32414. + goto out;
  32415. + if (cont->state == DISPATCH_ASSIGNED_NEW)
  32416. + /* done_lh was called in write_dispatch_hook */
  32417. + goto out_no_longterm_lock;
  32418. +
  32419. + result = prepare_logical_cluster(inode, pos, count, &clust,
  32420. + LC_APPOV);
  32421. + if (result)
  32422. + goto out;
  32423. +
  32424. + assert("edward-751", cryptcompress_inode_ok(inode));
  32425. + assert("edward-204", win.stat == DATA_WINDOW);
  32426. + assert("edward-1288", hint_is_valid(clust.hint));
  32427. + assert("edward-752",
  32428. + znode_is_write_locked(hint->ext_coord.coord.node));
  32429. + put_hint_cluster(&clust, inode, ZNODE_WRITE_LOCK);
  32430. +
  32431. + /* set write position in page */
  32432. + page_off = off_to_pgoff(win.off);
  32433. +
  32434. + /* copy user's data to cluster pages */
  32435. + for (i = off_to_pg(win.off), src = buf;
  32436. + i < size_in_pages(win.off + win.count);
  32437. + i++, src += to_page) {
  32438. + to_page = __mbp(win.off + win.count, i) - page_off;
  32439. + assert("edward-1039",
  32440. + page_off + to_page <= PAGE_SIZE);
  32441. + assert("edward-287", clust.pages[i] != NULL);
  32442. +
  32443. + fault_in_pages_readable(src, to_page);
  32444. +
  32445. + lock_page(clust.pages[i]);
  32446. + result =
  32447. + __copy_from_user((char *)kmap(clust.pages[i]) +
  32448. + page_off, src, to_page);
  32449. + kunmap(clust.pages[i]);
  32450. + if (unlikely(result)) {
  32451. + unlock_page(clust.pages[i]);
  32452. + result = -EFAULT;
  32453. + goto err2;
  32454. + }
  32455. + SetPageUptodate(clust.pages[i]);
  32456. + set_page_dirty_notag(clust.pages[i]);
  32457. + flush_dcache_page(clust.pages[i]);
  32458. + mark_page_accessed(clust.pages[i]);
  32459. + unlock_page(clust.pages[i]);
  32460. + page_off = 0;
  32461. + }
  32462. + assert("edward-753", cryptcompress_inode_ok(inode));
  32463. +
  32464. + result = checkin_logical_cluster(&clust, inode);
  32465. + if (result)
  32466. + goto err2;
  32467. +
  32468. + buf += win.count;
  32469. + count -= win.count;
  32470. +
  32471. + result = balance_dirty_page_cluster(&clust, inode, 0, count,
  32472. + win_count_to_nrpages(&win));
  32473. + if (result)
  32474. + goto err1;
  32475. + assert("edward-755", hint->lh.owner == NULL);
  32476. + reset_cluster_params(&clust);
  32477. + continue;
  32478. + err2:
  32479. + put_page_cluster(&clust, inode, WRITE_OP);
  32480. + err1:
  32481. + if (clust.reserved)
  32482. + free_reserved4cluster(inode,
  32483. + &clust,
  32484. + estimate_update_cluster(inode));
  32485. + break;
  32486. + } while (count);
  32487. + out:
  32488. + done_lh(&hint->lh);
  32489. + save_file_hint(file, hint);
  32490. + out_no_longterm_lock:
  32491. + mutex_unlock(&info->checkin_mutex);
  32492. + kfree(hint);
  32493. + put_cluster_handle(&clust);
  32494. + assert("edward-195",
  32495. + ergo((to_write == count),
  32496. + (result < 0 || cont->state == DISPATCH_ASSIGNED_NEW)));
  32497. + return (to_write - count) ? (to_write - count) : result;
  32498. +}
  32499. +
  32500. +/**
  32501. + * plugin->write()
  32502. + * @file: file to write to
  32503. + * @buf: address of user-space buffer
  32504. + * @read_amount: number of bytes to write
  32505. + * @off: position in file to write to
  32506. + */
  32507. +ssize_t write_cryptcompress(struct file *file, const char __user *buf,
  32508. + size_t count, loff_t *off,
  32509. + struct dispatch_context *cont)
  32510. +{
  32511. + ssize_t result;
  32512. + struct inode *inode;
  32513. + reiser4_context *ctx;
  32514. + loff_t pos = *off;
  32515. + struct cryptcompress_info *info;
  32516. +
  32517. + assert("edward-1449", cont->state == DISPATCH_INVAL_STATE);
  32518. +
  32519. + inode = file_inode(file);
  32520. + assert("edward-196", cryptcompress_inode_ok(inode));
  32521. +
  32522. + info = cryptcompress_inode_data(inode);
  32523. + ctx = get_current_context();
  32524. +
  32525. + result = file_remove_privs(file);
  32526. + if (unlikely(result != 0)) {
  32527. + context_set_commit_async(ctx);
  32528. + return result;
  32529. + }
  32530. + /* remove_suid might create a transaction */
  32531. + reiser4_txn_restart(ctx);
  32532. +
  32533. + result = do_write_cryptcompress(file, inode, buf, count, pos, cont);
  32534. +
  32535. + if (unlikely(result < 0)) {
  32536. + context_set_commit_async(ctx);
  32537. + return result;
  32538. + }
  32539. + /* update position in a file */
  32540. + *off = pos + result;
  32541. + return result;
  32542. +}
  32543. +
  32544. +/* plugin->readpages */
  32545. +int readpages_cryptcompress(struct file *file, struct address_space *mapping,
  32546. + struct list_head *pages, unsigned nr_pages)
  32547. +{
  32548. + reiser4_context * ctx;
  32549. + int ret;
  32550. +
  32551. + ctx = reiser4_init_context(mapping->host->i_sb);
  32552. + if (IS_ERR(ctx)) {
  32553. + ret = PTR_ERR(ctx);
  32554. + goto err;
  32555. + }
  32556. + /* cryptcompress file can be built of ctail items only */
  32557. + ret = readpages_ctail(file, mapping, pages);
  32558. + reiser4_txn_restart(ctx);
  32559. + reiser4_exit_context(ctx);
  32560. + if (ret) {
  32561. +err:
  32562. + put_pages_list(pages);
  32563. + }
  32564. + return ret;
  32565. +}
  32566. +
  32567. +static reiser4_block_nr cryptcompress_estimate_read(struct inode *inode)
  32568. +{
  32569. + /* reserve one block to update stat data item */
  32570. + assert("edward-1193",
  32571. + inode_file_plugin(inode)->estimate.update ==
  32572. + estimate_update_common);
  32573. + return estimate_update_common(inode);
  32574. +}
  32575. +
  32576. +/**
  32577. + * plugin->read
  32578. + * @file: file to read from
  32579. + * @buf: address of user-space buffer
  32580. + * @read_amount: number of bytes to read
  32581. + * @off: position in file to read from
  32582. + */
  32583. +ssize_t read_cryptcompress(struct file * file, char __user *buf, size_t size,
  32584. + loff_t * off)
  32585. +{
  32586. + ssize_t result;
  32587. + struct inode *inode;
  32588. + reiser4_context *ctx;
  32589. + struct cryptcompress_info *info;
  32590. + reiser4_block_nr needed;
  32591. +
  32592. + inode = file_inode(file);
  32593. + assert("edward-1194", !reiser4_inode_get_flag(inode, REISER4_NO_SD));
  32594. +
  32595. + ctx = reiser4_init_context(inode->i_sb);
  32596. + if (IS_ERR(ctx))
  32597. + return PTR_ERR(ctx);
  32598. +
  32599. + info = cryptcompress_inode_data(inode);
  32600. + needed = cryptcompress_estimate_read(inode);
  32601. +
  32602. + result = reiser4_grab_space(needed, BA_CAN_COMMIT);
  32603. + if (result != 0) {
  32604. + reiser4_exit_context(ctx);
  32605. + return result;
  32606. + }
  32607. + result = new_sync_read(file, buf, size, off);
  32608. +
  32609. + context_set_commit_async(ctx);
  32610. + reiser4_exit_context(ctx);
  32611. +
  32612. + return result;
  32613. +}
  32614. +
  32615. +/* Set left coord when unit is not found after node_lookup()
  32616. + This takes into account that there can be holes in a sequence
  32617. + of disk clusters */
  32618. +
  32619. +static void adjust_left_coord(coord_t * left_coord)
  32620. +{
  32621. + switch (left_coord->between) {
  32622. + case AFTER_UNIT:
  32623. + left_coord->between = AFTER_ITEM;
  32624. + case AFTER_ITEM:
  32625. + case BEFORE_UNIT:
  32626. + break;
  32627. + default:
  32628. + impossible("edward-1204", "bad left coord to cut");
  32629. + }
  32630. + return;
  32631. +}
  32632. +
  32633. +#define CRC_CUT_TREE_MIN_ITERATIONS 64
  32634. +
  32635. +/* plugin->cut_tree_worker */
  32636. +int cut_tree_worker_cryptcompress(tap_t * tap, const reiser4_key * from_key,
  32637. + const reiser4_key * to_key,
  32638. + reiser4_key * smallest_removed,
  32639. + struct inode *object, int truncate,
  32640. + int *progress)
  32641. +{
  32642. + lock_handle next_node_lock;
  32643. + coord_t left_coord;
  32644. + int result;
  32645. +
  32646. + assert("edward-1158", tap->coord->node != NULL);
  32647. + assert("edward-1159", znode_is_write_locked(tap->coord->node));
  32648. + assert("edward-1160", znode_get_level(tap->coord->node) == LEAF_LEVEL);
  32649. +
  32650. + *progress = 0;
  32651. + init_lh(&next_node_lock);
  32652. +
  32653. + while (1) {
  32654. + znode *node; /* node from which items are cut */
  32655. + node_plugin *nplug; /* node plugin for @node */
  32656. +
  32657. + node = tap->coord->node;
  32658. +
  32659. + /* Move next_node_lock to the next node on the left. */
  32660. + result =
  32661. + reiser4_get_left_neighbor(&next_node_lock, node,
  32662. + ZNODE_WRITE_LOCK,
  32663. + GN_CAN_USE_UPPER_LEVELS);
  32664. + if (result != 0 && result != -E_NO_NEIGHBOR)
  32665. + break;
  32666. + /* FIXME-EDWARD: Check can we delete the node as a whole. */
  32667. + result = reiser4_tap_load(tap);
  32668. + if (result)
  32669. + return result;
  32670. +
  32671. + /* Prepare the second (right) point for cut_node() */
  32672. + if (*progress)
  32673. + coord_init_last_unit(tap->coord, node);
  32674. +
  32675. + else if (item_plugin_by_coord(tap->coord)->b.lookup == NULL)
  32676. + /* set rightmost unit for the items without lookup method */
  32677. + tap->coord->unit_pos = coord_last_unit_pos(tap->coord);
  32678. +
  32679. + nplug = node->nplug;
  32680. +
  32681. + assert("edward-1161", nplug);
  32682. + assert("edward-1162", nplug->lookup);
  32683. +
  32684. + /* left_coord is leftmost unit cut from @node */
  32685. + result = nplug->lookup(node, from_key, FIND_EXACT, &left_coord);
  32686. +
  32687. + if (IS_CBKERR(result))
  32688. + break;
  32689. +
  32690. + if (result == CBK_COORD_NOTFOUND)
  32691. + adjust_left_coord(&left_coord);
  32692. +
  32693. + /* adjust coordinates so that they are set to existing units */
  32694. + if (coord_set_to_right(&left_coord)
  32695. + || coord_set_to_left(tap->coord)) {
  32696. + result = 0;
  32697. + break;
  32698. + }
  32699. +
  32700. + if (coord_compare(&left_coord, tap->coord) ==
  32701. + COORD_CMP_ON_RIGHT) {
  32702. + /* keys from @from_key to @to_key are not in the tree */
  32703. + result = 0;
  32704. + break;
  32705. + }
  32706. +
  32707. + /* cut data from one node */
  32708. + *smallest_removed = *reiser4_min_key();
  32709. + result = kill_node_content(&left_coord,
  32710. + tap->coord,
  32711. + from_key,
  32712. + to_key,
  32713. + smallest_removed,
  32714. + next_node_lock.node,
  32715. + object, truncate);
  32716. + reiser4_tap_relse(tap);
  32717. +
  32718. + if (result)
  32719. + break;
  32720. +
  32721. + ++(*progress);
  32722. +
  32723. + /* Check whether all items with keys >= from_key were removed
  32724. + * from the tree. */
  32725. + if (keyle(smallest_removed, from_key))
  32726. + /* result = 0; */
  32727. + break;
  32728. +
  32729. + if (next_node_lock.node == NULL)
  32730. + break;
  32731. +
  32732. + result = reiser4_tap_move(tap, &next_node_lock);
  32733. + done_lh(&next_node_lock);
  32734. + if (result)
  32735. + break;
  32736. +
  32737. + /* Break long cut_tree operation (deletion of a large file) if
  32738. + * atom requires commit. */
  32739. + if (*progress > CRC_CUT_TREE_MIN_ITERATIONS
  32740. + && current_atom_should_commit()) {
  32741. + result = -E_REPEAT;
  32742. + break;
  32743. + }
  32744. + }
  32745. + done_lh(&next_node_lock);
  32746. + return result;
  32747. +}
  32748. +
  32749. +static int expand_cryptcompress(struct inode *inode /* old size */,
  32750. + loff_t new_size)
  32751. +{
  32752. + int result = 0;
  32753. + hint_t *hint;
  32754. + lock_handle *lh;
  32755. + loff_t hole_size;
  32756. + int nr_zeroes;
  32757. + struct reiser4_slide win;
  32758. + struct cluster_handle clust;
  32759. +
  32760. + assert("edward-1133", inode->i_size < new_size);
  32761. + assert("edward-1134", reiser4_schedulable());
  32762. + assert("edward-1135", cryptcompress_inode_ok(inode));
  32763. + assert("edward-1136", current_blocksize == PAGE_SIZE);
  32764. + assert("edward-1333", off_to_cloff(inode->i_size, inode) != 0);
  32765. +
  32766. + hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
  32767. + if (hint == NULL)
  32768. + return RETERR(-ENOMEM);
  32769. + hint_init_zero(hint);
  32770. + lh = &hint->lh;
  32771. +
  32772. + reiser4_slide_init(&win);
  32773. + cluster_init_read(&clust, &win);
  32774. + clust.hint = hint;
  32775. +
  32776. + if (off_to_cloff(inode->i_size, inode) == 0)
  32777. + goto append_hole;
  32778. + /*
  32779. + * It can happen that
  32780. + * a part of the hole will be converted
  32781. + * to zeros. If so, it should be submitted
  32782. + */
  32783. + result = alloc_cluster_pgset(&clust, cluster_nrpages(inode));
  32784. + if (result)
  32785. + goto out;
  32786. + hole_size = new_size - inode->i_size;
  32787. + nr_zeroes = inode_cluster_size(inode) -
  32788. + off_to_cloff(inode->i_size, inode);
  32789. + if (nr_zeroes > hole_size)
  32790. + nr_zeroes = hole_size;
  32791. +
  32792. + set_window(&clust, &win, inode, inode->i_size,
  32793. + inode->i_size + nr_zeroes);
  32794. + win.stat = HOLE_WINDOW;
  32795. +
  32796. + assert("edward-1137",
  32797. + clust.index == off_to_clust(inode->i_size, inode));
  32798. +
  32799. + result = prepare_logical_cluster(inode, 0, 0, &clust, LC_EXPAND);
  32800. + if (result)
  32801. + goto out;
  32802. + assert("edward-1139",
  32803. + clust.dstat == PREP_DISK_CLUSTER ||
  32804. + clust.dstat == UNPR_DISK_CLUSTER ||
  32805. + clust.dstat == FAKE_DISK_CLUSTER);
  32806. +
  32807. + assert("edward-1431", hole_size >= nr_zeroes);
  32808. +
  32809. + append_hole:
  32810. + INODE_SET_SIZE(inode, new_size);
  32811. + out:
  32812. + done_lh(lh);
  32813. + kfree(hint);
  32814. + put_cluster_handle(&clust);
  32815. + return result;
  32816. +}
  32817. +
  32818. +static int update_size_actor(struct inode *inode,
  32819. + loff_t new_size, int update_sd)
  32820. +{
  32821. + if (new_size & ((loff_t) (inode_cluster_size(inode)) - 1))
  32822. + /*
  32823. + * cut not at logical cluster boundary,
  32824. + * size will be updated by write_hole()
  32825. + */
  32826. + return 0;
  32827. + else
  32828. + return reiser4_update_file_size(inode, new_size, update_sd);
  32829. +}
  32830. +
  32831. +static int prune_cryptcompress(struct inode *inode,
  32832. + loff_t new_size, int update_sd)
  32833. +{
  32834. + int result = 0;
  32835. + unsigned nr_zeros;
  32836. + loff_t to_prune;
  32837. + loff_t old_size;
  32838. + cloff_t from_idx;
  32839. + cloff_t to_idx;
  32840. +
  32841. + hint_t *hint;
  32842. + lock_handle *lh;
  32843. + struct reiser4_slide win;
  32844. + struct cluster_handle clust;
  32845. +
  32846. + assert("edward-1140", inode->i_size >= new_size);
  32847. + assert("edward-1141", reiser4_schedulable());
  32848. + assert("edward-1142", cryptcompress_inode_ok(inode));
  32849. + assert("edward-1143", current_blocksize == PAGE_SIZE);
  32850. +
  32851. + old_size = inode->i_size;
  32852. +
  32853. + hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
  32854. + if (hint == NULL)
  32855. + return RETERR(-ENOMEM);
  32856. + hint_init_zero(hint);
  32857. + lh = &hint->lh;
  32858. +
  32859. + reiser4_slide_init(&win);
  32860. + cluster_init_read(&clust, &win);
  32861. + clust.hint = hint;
  32862. +
  32863. + /*
  32864. + * index of the leftmost logical cluster
  32865. + * that will be completely truncated
  32866. + */
  32867. + from_idx = size_in_lc(new_size, inode);
  32868. + to_idx = size_in_lc(inode->i_size, inode);
  32869. + /*
  32870. + * truncate all complete disk clusters starting from @from_idx
  32871. + */
  32872. + assert("edward-1174", from_idx <= to_idx);
  32873. +
  32874. + old_size = inode->i_size;
  32875. + if (from_idx != to_idx) {
  32876. + struct cryptcompress_info *info;
  32877. + info = cryptcompress_inode_data(inode);
  32878. +
  32879. + result = cut_file_items(inode,
  32880. + clust_to_off(from_idx, inode),
  32881. + update_sd,
  32882. + clust_to_off(to_idx, inode),
  32883. + update_size_actor);
  32884. + info->trunc_index = ULONG_MAX;
  32885. + if (unlikely(result == CBK_COORD_NOTFOUND))
  32886. + result = 0;
  32887. + if (unlikely(result))
  32888. + goto out;
  32889. + }
  32890. + if (off_to_cloff(new_size, inode) == 0)
  32891. + goto truncate_hole;
  32892. +
  32893. + assert("edward-1146", new_size < inode->i_size);
  32894. +
  32895. + to_prune = inode->i_size - new_size;
  32896. + /*
  32897. + * Partial truncate of the last logical cluster.
  32898. + * Partial hole will be converted to zeros. The resulted
  32899. + * logical cluster will be captured and submitted to disk
  32900. + */
  32901. + result = alloc_cluster_pgset(&clust, cluster_nrpages(inode));
  32902. + if (result)
  32903. + goto out;
  32904. +
  32905. + nr_zeros = off_to_pgoff(new_size);
  32906. + if (nr_zeros)
  32907. + nr_zeros = PAGE_SIZE - nr_zeros;
  32908. +
  32909. + set_window(&clust, &win, inode, new_size, new_size + nr_zeros);
  32910. + win.stat = HOLE_WINDOW;
  32911. +
  32912. + assert("edward-1149", clust.index == from_idx - 1);
  32913. +
  32914. + result = prepare_logical_cluster(inode, 0, 0, &clust, LC_SHRINK);
  32915. + if (result)
  32916. + goto out;
  32917. + assert("edward-1151",
  32918. + clust.dstat == PREP_DISK_CLUSTER ||
  32919. + clust.dstat == UNPR_DISK_CLUSTER ||
  32920. + clust.dstat == FAKE_DISK_CLUSTER);
  32921. + truncate_hole:
  32922. + /*
  32923. + * drop all the pages that don't have jnodes (i.e. pages
  32924. + * which can not be truncated by cut_file_items() because
  32925. + * of holes represented by fake disk clusters) including
  32926. + * the pages of partially truncated cluster which was
  32927. + * released by prepare_logical_cluster()
  32928. + */
  32929. + INODE_SET_SIZE(inode, new_size);
  32930. + truncate_inode_pages(inode->i_mapping, new_size);
  32931. + out:
  32932. + assert("edward-1497",
  32933. + pages_truncate_ok(inode, size_in_pages(new_size)));
  32934. +
  32935. + done_lh(lh);
  32936. + kfree(hint);
  32937. + put_cluster_handle(&clust);
  32938. + return result;
  32939. +}
  32940. +
  32941. +/**
  32942. + * Capture a pager cluster.
  32943. + * @clust must be set up by a caller.
  32944. + */
  32945. +static int capture_page_cluster(struct cluster_handle * clust,
  32946. + struct inode * inode)
  32947. +{
  32948. + int result;
  32949. +
  32950. + assert("edward-1073", clust != NULL);
  32951. + assert("edward-1074", inode != NULL);
  32952. + assert("edward-1075", clust->dstat == INVAL_DISK_CLUSTER);
  32953. +
  32954. + result = prepare_logical_cluster(inode, 0, 0, clust, LC_APPOV);
  32955. + if (result)
  32956. + return result;
  32957. +
  32958. + set_cluster_pages_dirty(clust, inode);
  32959. + result = checkin_logical_cluster(clust, inode);
  32960. + put_hint_cluster(clust, inode, ZNODE_WRITE_LOCK);
  32961. + if (unlikely(result))
  32962. + put_page_cluster(clust, inode, WRITE_OP);
  32963. + return result;
  32964. +}
  32965. +
  32966. +/* Starting from @index find tagged pages of the same page cluster.
  32967. + * Clear the tag for each of them. Return number of found pages.
  32968. + */
  32969. +static int find_anon_page_cluster(struct address_space * mapping,
  32970. + pgoff_t * index, struct page ** pages)
  32971. +{
  32972. + int i = 0;
  32973. + int found;
  32974. + spin_lock_irq(&mapping->tree_lock);
  32975. + do {
  32976. + /* looking for one page */
  32977. + found = radix_tree_gang_lookup_tag(&mapping->page_tree,
  32978. + (void **)&pages[i],
  32979. + *index, 1,
  32980. + PAGECACHE_TAG_REISER4_MOVED);
  32981. + if (!found)
  32982. + break;
  32983. + if (!same_page_cluster(pages[0], pages[i]))
  32984. + break;
  32985. +
  32986. + /* found */
  32987. + get_page(pages[i]);
  32988. + *index = pages[i]->index + 1;
  32989. +
  32990. + radix_tree_tag_clear(&mapping->page_tree,
  32991. + pages[i]->index,
  32992. + PAGECACHE_TAG_REISER4_MOVED);
  32993. + if (last_page_in_cluster(pages[i++]))
  32994. + break;
  32995. + } while (1);
  32996. + spin_unlock_irq(&mapping->tree_lock);
  32997. + return i;
  32998. +}
  32999. +
  33000. +#define MAX_PAGES_TO_CAPTURE (1024)
  33001. +
  33002. +/* Capture anonymous page clusters */
  33003. +static int capture_anon_pages(struct address_space * mapping, pgoff_t * index,
  33004. + int to_capture)
  33005. +{
  33006. + int count = 0;
  33007. + int found = 0;
  33008. + int result = 0;
  33009. + hint_t *hint;
  33010. + lock_handle *lh;
  33011. + struct inode * inode;
  33012. + struct cluster_handle clust;
  33013. + struct page * pages[MAX_CLUSTER_NRPAGES];
  33014. +
  33015. + assert("edward-1127", mapping != NULL);
  33016. + assert("edward-1128", mapping->host != NULL);
  33017. + assert("edward-1440", mapping->host->i_mapping == mapping);
  33018. +
  33019. + inode = mapping->host;
  33020. + hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
  33021. + if (hint == NULL)
  33022. + return RETERR(-ENOMEM);
  33023. + hint_init_zero(hint);
  33024. + lh = &hint->lh;
  33025. +
  33026. + cluster_init_read(&clust, NULL /* no sliding window */);
  33027. + clust.hint = hint;
  33028. +
  33029. + result = alloc_cluster_pgset(&clust, cluster_nrpages(inode));
  33030. + if (result)
  33031. + goto out;
  33032. +
  33033. + while (to_capture > 0) {
  33034. + found = find_anon_page_cluster(mapping, index, pages);
  33035. + if (!found) {
  33036. + *index = (pgoff_t) - 1;
  33037. + break;
  33038. + }
  33039. + move_cluster_forward(&clust, inode, pages[0]->index);
  33040. + result = capture_page_cluster(&clust, inode);
  33041. +
  33042. + put_found_pages(pages, found); /* find_anon_page_cluster */
  33043. + if (result)
  33044. + break;
  33045. + to_capture -= clust.nr_pages;
  33046. + count += clust.nr_pages;
  33047. + }
  33048. + if (result) {
  33049. + warning("edward-1077",
  33050. + "Capture failed (inode %llu, result=%i, captured=%d)\n",
  33051. + (unsigned long long)get_inode_oid(inode), result, count);
  33052. + } else {
  33053. + assert("edward-1078", ergo(found > 0, count > 0));
  33054. + if (to_capture <= 0)
  33055. + /* there may be left more pages */
  33056. + __mark_inode_dirty(inode, I_DIRTY_PAGES);
  33057. + result = count;
  33058. + }
  33059. + out:
  33060. + done_lh(lh);
  33061. + kfree(hint);
  33062. + put_cluster_handle(&clust);
  33063. + return result;
  33064. +}
  33065. +
  33066. +/* Returns true if inode's mapping has dirty pages
  33067. + which do not belong to any atom */
  33068. +static int cryptcompress_inode_has_anon_pages(struct inode *inode)
  33069. +{
  33070. + int result;
  33071. + spin_lock_irq(&inode->i_mapping->tree_lock);
  33072. + result = radix_tree_tagged(&inode->i_mapping->page_tree,
  33073. + PAGECACHE_TAG_REISER4_MOVED);
  33074. + spin_unlock_irq(&inode->i_mapping->tree_lock);
  33075. + return result;
  33076. +}
  33077. +
  33078. +/* plugin->writepages */
  33079. +int writepages_cryptcompress(struct address_space *mapping,
  33080. + struct writeback_control *wbc)
  33081. +{
  33082. + int result = 0;
  33083. + long to_capture;
  33084. + pgoff_t nrpages;
  33085. + pgoff_t index = 0;
  33086. + struct inode *inode;
  33087. + struct cryptcompress_info *info;
  33088. +
  33089. + inode = mapping->host;
  33090. + if (!cryptcompress_inode_has_anon_pages(inode))
  33091. + goto end;
  33092. + info = cryptcompress_inode_data(inode);
  33093. + nrpages = size_in_pages(i_size_read(inode));
  33094. +
  33095. + if (wbc->sync_mode != WB_SYNC_ALL)
  33096. + to_capture = min(wbc->nr_to_write, (long)MAX_PAGES_TO_CAPTURE);
  33097. + else
  33098. + to_capture = MAX_PAGES_TO_CAPTURE;
  33099. + do {
  33100. + reiser4_context *ctx;
  33101. +
  33102. + ctx = reiser4_init_context(inode->i_sb);
  33103. + if (IS_ERR(ctx)) {
  33104. + result = PTR_ERR(ctx);
  33105. + break;
  33106. + }
  33107. + /* avoid recursive calls to ->sync_inodes */
  33108. + ctx->nobalance = 1;
  33109. +
  33110. + assert("edward-1079",
  33111. + lock_stack_isclean(get_current_lock_stack()));
  33112. +
  33113. + reiser4_txn_restart_current();
  33114. +
  33115. + if (get_current_context()->entd) {
  33116. + if (mutex_trylock(&info->checkin_mutex) == 0) {
  33117. + /* the mutex might be occupied by
  33118. + entd caller */
  33119. + result = RETERR(-EBUSY);
  33120. + reiser4_exit_context(ctx);
  33121. + break;
  33122. + }
  33123. + } else
  33124. + mutex_lock(&info->checkin_mutex);
  33125. +
  33126. + result = capture_anon_pages(inode->i_mapping, &index,
  33127. + to_capture);
  33128. + mutex_unlock(&info->checkin_mutex);
  33129. +
  33130. + if (result < 0) {
  33131. + reiser4_exit_context(ctx);
  33132. + break;
  33133. + }
  33134. + wbc->nr_to_write -= result;
  33135. + if (wbc->sync_mode != WB_SYNC_ALL) {
  33136. + reiser4_exit_context(ctx);
  33137. + break;
  33138. + }
  33139. + result = txnmgr_force_commit_all(inode->i_sb, 0);
  33140. + reiser4_exit_context(ctx);
  33141. + } while (result >= 0 && index < nrpages);
  33142. +
  33143. + end:
  33144. + if (is_in_reiser4_context()) {
  33145. + if (get_current_context()->nr_captured >= CAPTURE_APAGE_BURST) {
  33146. + /* there are already pages to flush, flush them out,
  33147. + do not delay until end of reiser4_sync_inodes */
  33148. + reiser4_writeout(inode->i_sb, wbc);
  33149. + get_current_context()->nr_captured = 0;
  33150. + }
  33151. + }
  33152. + return result;
  33153. +}
  33154. +
  33155. +/* plugin->ioctl */
  33156. +int ioctl_cryptcompress(struct file *filp, unsigned int cmd,
  33157. + unsigned long arg)
  33158. +{
  33159. + return RETERR(-ENOTTY);
  33160. +}
  33161. +
  33162. +/* plugin->mmap */
  33163. +int mmap_cryptcompress(struct file *file, struct vm_area_struct *vma)
  33164. +{
  33165. + int result;
  33166. + struct inode *inode;
  33167. + reiser4_context *ctx;
  33168. +
  33169. + inode = file_inode(file);
  33170. + ctx = reiser4_init_context(inode->i_sb);
  33171. + if (IS_ERR(ctx))
  33172. + return PTR_ERR(ctx);
  33173. + /*
  33174. + * generic_file_mmap will do update_atime. Grab space for stat data
  33175. + * update.
  33176. + */
  33177. + result = reiser4_grab_space_force
  33178. + (inode_file_plugin(inode)->estimate.update(inode),
  33179. + BA_CAN_COMMIT);
  33180. + if (result) {
  33181. + reiser4_exit_context(ctx);
  33182. + return result;
  33183. + }
  33184. + result = generic_file_mmap(file, vma);
  33185. + reiser4_exit_context(ctx);
  33186. + return result;
  33187. +}
  33188. +
  33189. +/* plugin->delete_object */
  33190. +int delete_object_cryptcompress(struct inode *inode)
  33191. +{
  33192. + int result;
  33193. + struct cryptcompress_info * info;
  33194. +
  33195. + assert("edward-429", inode->i_nlink == 0);
  33196. +
  33197. + reiser4_txn_restart_current();
  33198. + info = cryptcompress_inode_data(inode);
  33199. +
  33200. + mutex_lock(&info->checkin_mutex);
  33201. + result = prune_cryptcompress(inode, 0, 0);
  33202. + mutex_unlock(&info->checkin_mutex);
  33203. +
  33204. + if (result) {
  33205. + warning("edward-430",
  33206. + "cannot truncate cryptcompress file %lli: %i",
  33207. + (unsigned long long)get_inode_oid(inode),
  33208. + result);
  33209. + }
  33210. + /* and remove stat data */
  33211. + return reiser4_delete_object_common(inode);
  33212. +}
  33213. +
  33214. +/*
  33215. + * plugin->setattr
  33216. + * This implements actual truncate (see comments in reiser4/page_cache.c)
  33217. + */
  33218. +int setattr_cryptcompress(struct dentry *dentry, struct iattr *attr)
  33219. +{
  33220. + int result;
  33221. + struct inode *inode;
  33222. + struct cryptcompress_info * info;
  33223. +
  33224. + inode = dentry->d_inode;
  33225. + info = cryptcompress_inode_data(inode);
  33226. +
  33227. + if (attr->ia_valid & ATTR_SIZE) {
  33228. + if (i_size_read(inode) != attr->ia_size) {
  33229. + reiser4_context *ctx;
  33230. + loff_t old_size;
  33231. +
  33232. + ctx = reiser4_init_context(dentry->d_inode->i_sb);
  33233. + if (IS_ERR(ctx))
  33234. + return PTR_ERR(ctx);
  33235. + result = setattr_dispatch_hook(inode);
  33236. + if (result) {
  33237. + context_set_commit_async(ctx);
  33238. + reiser4_exit_context(ctx);
  33239. + return result;
  33240. + }
  33241. + old_size = i_size_read(inode);
  33242. + inode_check_scale(inode, old_size, attr->ia_size);
  33243. +
  33244. + mutex_lock(&info->checkin_mutex);
  33245. + if (attr->ia_size > inode->i_size)
  33246. + result = expand_cryptcompress(inode,
  33247. + attr->ia_size);
  33248. + else
  33249. + result = prune_cryptcompress(inode,
  33250. + attr->ia_size,
  33251. + 1/* update sd */);
  33252. + mutex_unlock(&info->checkin_mutex);
  33253. + if (result) {
  33254. + warning("edward-1192",
  33255. + "truncate_cryptcompress failed: oid %lli, "
  33256. + "old size %lld, new size %lld, retval %d",
  33257. + (unsigned long long)
  33258. + get_inode_oid(inode), old_size,
  33259. + attr->ia_size, result);
  33260. + }
  33261. + context_set_commit_async(ctx);
  33262. + reiser4_exit_context(ctx);
  33263. + } else
  33264. + result = 0;
  33265. + } else
  33266. + result = reiser4_setattr_common(dentry, attr);
  33267. + return result;
  33268. +}
  33269. +
  33270. +/* plugin->release */
  33271. +int release_cryptcompress(struct inode *inode, struct file *file)
  33272. +{
  33273. + reiser4_context *ctx = reiser4_init_context(inode->i_sb);
  33274. +
  33275. + if (IS_ERR(ctx))
  33276. + return PTR_ERR(ctx);
  33277. + reiser4_free_file_fsdata(file);
  33278. + reiser4_exit_context(ctx);
  33279. + return 0;
  33280. +}
  33281. +
  33282. +/* plugin->write_begin() */
  33283. +int write_begin_cryptcompress(struct file *file, struct page *page,
  33284. + loff_t pos, unsigned len, void **fsdata)
  33285. +{
  33286. + int ret = -ENOMEM;
  33287. + char *buf;
  33288. + hint_t *hint;
  33289. + struct inode *inode;
  33290. + struct reiser4_slide *win;
  33291. + struct cluster_handle *clust;
  33292. + struct cryptcompress_info *info;
  33293. + reiser4_context *ctx;
  33294. +
  33295. + ctx = get_current_context();
  33296. + inode = page->mapping->host;
  33297. + info = cryptcompress_inode_data(inode);
  33298. +
  33299. + assert("edward-1564", PageLocked(page));
  33300. + buf = kmalloc(sizeof(*clust) +
  33301. + sizeof(*win) +
  33302. + sizeof(*hint),
  33303. + reiser4_ctx_gfp_mask_get());
  33304. + if (!buf)
  33305. + goto err2;
  33306. + clust = (struct cluster_handle *)buf;
  33307. + win = (struct reiser4_slide *)(buf + sizeof(*clust));
  33308. + hint = (hint_t *)(buf + sizeof(*clust) + sizeof(*win));
  33309. +
  33310. + hint_init_zero(hint);
  33311. + cluster_init_read(clust, NULL);
  33312. + clust->hint = hint;
  33313. +
  33314. + mutex_lock(&info->checkin_mutex);
  33315. +
  33316. + ret = set_window_and_cluster(inode, clust, win, len, pos);
  33317. + if (ret)
  33318. + goto err1;
  33319. + unlock_page(page);
  33320. + ret = prepare_logical_cluster(inode, pos, len, clust, LC_APPOV);
  33321. + done_lh(&hint->lh);
  33322. + assert("edward-1565", lock_stack_isclean(get_current_lock_stack()));
  33323. + lock_page(page);
  33324. + if (ret) {
  33325. + SetPageError(page);
  33326. + ClearPageUptodate(page);
  33327. + goto err0;
  33328. + }
  33329. + /*
  33330. + * Success. All resources (including checkin_mutex)
  33331. + * will be released in ->write_end()
  33332. + */
  33333. + ctx->locked_page = page;
  33334. + *fsdata = (void *)buf;
  33335. +
  33336. + return 0;
  33337. + err0:
  33338. + put_cluster_handle(clust);
  33339. + err1:
  33340. + mutex_unlock(&info->checkin_mutex);
  33341. + kfree(buf);
  33342. + err2:
  33343. + assert("edward-1568", !ret);
  33344. + return ret;
  33345. +}
  33346. +
  33347. +/* plugin->write_end() */
  33348. +int write_end_cryptcompress(struct file *file, struct page *page,
  33349. + loff_t pos, unsigned copied, void *fsdata)
  33350. +{
  33351. + int ret;
  33352. + hint_t *hint;
  33353. + struct inode *inode;
  33354. + struct cluster_handle *clust;
  33355. + struct cryptcompress_info *info;
  33356. + reiser4_context *ctx;
  33357. +
  33358. + assert("edward-1566",
  33359. + lock_stack_isclean(get_current_lock_stack()));
  33360. + ctx = get_current_context();
  33361. + inode = page->mapping->host;
  33362. + info = cryptcompress_inode_data(inode);
  33363. + clust = (struct cluster_handle *)fsdata;
  33364. + hint = clust->hint;
  33365. +
  33366. + unlock_page(page);
  33367. + ctx->locked_page = NULL;
  33368. + set_cluster_pages_dirty(clust, inode);
  33369. + ret = checkin_logical_cluster(clust, inode);
  33370. + if (ret) {
  33371. + SetPageError(page);
  33372. + goto exit;
  33373. + }
  33374. + exit:
  33375. + mutex_unlock(&info->checkin_mutex);
  33376. +
  33377. + put_cluster_handle(clust);
  33378. +
  33379. + if (pos + copied > inode->i_size) {
  33380. + /*
  33381. + * i_size has been updated in
  33382. + * checkin_logical_cluster
  33383. + */
  33384. + ret = reiser4_update_sd(inode);
  33385. + if (unlikely(ret != 0))
  33386. + warning("edward-1603",
  33387. + "Can not update stat-data: %i. FSCK?",
  33388. + ret);
  33389. + }
  33390. + kfree(fsdata);
  33391. + return ret;
  33392. +}
  33393. +
  33394. +/* plugin->bmap */
  33395. +sector_t bmap_cryptcompress(struct address_space *mapping, sector_t lblock)
  33396. +{
  33397. + return -EINVAL;
  33398. +}
  33399. +
  33400. +/*
  33401. + Local variables:
  33402. + c-indentation-style: "K&R"
  33403. + mode-name: "LC"
  33404. + c-basic-offset: 8
  33405. + tab-width: 8
  33406. + fill-column: 80
  33407. + scroll-step: 1
  33408. + End:
  33409. +*/
  33410. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/file/cryptcompress.h linux-4.14.2/fs/reiser4/plugin/file/cryptcompress.h
  33411. --- linux-4.14.2.orig/fs/reiser4/plugin/file/cryptcompress.h 1970-01-01 01:00:00.000000000 +0100
  33412. +++ linux-4.14.2/fs/reiser4/plugin/file/cryptcompress.h 2017-11-26 22:13:09.000000000 +0100
  33413. @@ -0,0 +1,619 @@
  33414. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  33415. +/* See http://www.namesys.com/cryptcompress_design.html */
  33416. +
  33417. +#if !defined( __FS_REISER4_CRYPTCOMPRESS_H__ )
  33418. +#define __FS_REISER4_CRYPTCOMPRESS_H__
  33419. +
  33420. +#include "../../page_cache.h"
  33421. +#include "../compress/compress.h"
  33422. +#include "../crypto/cipher.h"
  33423. +
  33424. +#include <linux/pagemap.h>
  33425. +
  33426. +#define MIN_CLUSTER_SHIFT PAGE_SHIFT
  33427. +#define MAX_CLUSTER_SHIFT 16
  33428. +#define MAX_CLUSTER_NRPAGES (1U << MAX_CLUSTER_SHIFT >> PAGE_SHIFT)
  33429. +#define DC_CHECKSUM_SIZE 4
  33430. +
  33431. +#define MIN_LATTICE_FACTOR 1
  33432. +#define MAX_LATTICE_FACTOR 32
  33433. +
  33434. +/* this mask contains all non-standard plugins that might
  33435. + be present in reiser4-specific part of inode managed by
  33436. + cryptcompress file plugin */
  33437. +#define cryptcompress_mask \
  33438. + ((1 << PSET_FILE) | \
  33439. + (1 << PSET_CLUSTER) | \
  33440. + (1 << PSET_CIPHER) | \
  33441. + (1 << PSET_DIGEST) | \
  33442. + (1 << PSET_COMPRESSION) | \
  33443. + (1 << PSET_COMPRESSION_MODE))
  33444. +
  33445. +#if REISER4_DEBUG
  33446. +static inline int cluster_shift_ok(int shift)
  33447. +{
  33448. + return (shift >= MIN_CLUSTER_SHIFT) && (shift <= MAX_CLUSTER_SHIFT);
  33449. +}
  33450. +#endif
  33451. +
  33452. +#if REISER4_DEBUG
  33453. +#define INODE_PGCOUNT(inode) \
  33454. +({ \
  33455. + assert("edward-1530", inode_file_plugin(inode) == \
  33456. + file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID)); \
  33457. + atomic_read(&cryptcompress_inode_data(inode)->pgcount); \
  33458. + })
  33459. +#define INODE_PGCOUNT_INC(inode) \
  33460. +do { \
  33461. + assert("edward-1531", inode_file_plugin(inode) == \
  33462. + file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID)); \
  33463. + atomic_inc(&cryptcompress_inode_data(inode)->pgcount); \
  33464. +} while (0)
  33465. +#define INODE_PGCOUNT_DEC(inode) \
  33466. +do { \
  33467. + if (inode_file_plugin(inode) == \
  33468. + file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID)) \
  33469. + atomic_dec(&cryptcompress_inode_data(inode)->pgcount); \
  33470. +} while (0)
  33471. +#else
  33472. +#define INODE_PGCOUNT(inode) (0)
  33473. +#define INODE_PGCOUNT_INC(inode)
  33474. +#define INODE_PGCOUNT_DEC(inode)
  33475. +#endif /* REISER4_DEBUG */
  33476. +
  33477. +struct tfm_stream {
  33478. + __u8 *data;
  33479. + size_t size;
  33480. +};
  33481. +
  33482. +typedef enum {
  33483. + INPUT_STREAM,
  33484. + OUTPUT_STREAM,
  33485. + LAST_STREAM
  33486. +} tfm_stream_id;
  33487. +
  33488. +typedef struct tfm_stream * tfm_unit[LAST_STREAM];
  33489. +
  33490. +static inline __u8 *ts_data(struct tfm_stream * stm)
  33491. +{
  33492. + assert("edward-928", stm != NULL);
  33493. + return stm->data;
  33494. +}
  33495. +
  33496. +static inline size_t ts_size(struct tfm_stream * stm)
  33497. +{
  33498. + assert("edward-929", stm != NULL);
  33499. + return stm->size;
  33500. +}
  33501. +
  33502. +static inline void set_ts_size(struct tfm_stream * stm, size_t size)
  33503. +{
  33504. + assert("edward-930", stm != NULL);
  33505. +
  33506. + stm->size = size;
  33507. +}
  33508. +
  33509. +static inline int alloc_ts(struct tfm_stream ** stm)
  33510. +{
  33511. + assert("edward-931", stm);
  33512. + assert("edward-932", *stm == NULL);
  33513. +
  33514. + *stm = kzalloc(sizeof(**stm), reiser4_ctx_gfp_mask_get());
  33515. + if (!*stm)
  33516. + return -ENOMEM;
  33517. + return 0;
  33518. +}
  33519. +
  33520. +static inline void free_ts(struct tfm_stream * stm)
  33521. +{
  33522. + assert("edward-933", !ts_data(stm));
  33523. + assert("edward-934", !ts_size(stm));
  33524. +
  33525. + kfree(stm);
  33526. +}
  33527. +
  33528. +static inline int alloc_ts_data(struct tfm_stream * stm, size_t size)
  33529. +{
  33530. + assert("edward-935", !ts_data(stm));
  33531. + assert("edward-936", !ts_size(stm));
  33532. + assert("edward-937", size != 0);
  33533. +
  33534. + stm->data = reiser4_vmalloc(size);
  33535. + if (!stm->data)
  33536. + return -ENOMEM;
  33537. + set_ts_size(stm, size);
  33538. + return 0;
  33539. +}
  33540. +
  33541. +static inline void free_ts_data(struct tfm_stream * stm)
  33542. +{
  33543. + assert("edward-938", equi(ts_data(stm), ts_size(stm)));
  33544. +
  33545. + if (ts_data(stm))
  33546. + vfree(ts_data(stm));
  33547. + memset(stm, 0, sizeof *stm);
  33548. +}
  33549. +
  33550. +/* Write modes for item conversion in flush convert phase */
  33551. +typedef enum {
  33552. + CTAIL_INVAL_CONVERT_MODE = 0,
  33553. + CTAIL_APPEND_ITEM = 1,
  33554. + CTAIL_OVERWRITE_ITEM = 2,
  33555. + CTAIL_CUT_ITEM = 3
  33556. +} ctail_convert_mode_t;
  33557. +
  33558. +typedef enum {
  33559. + LC_INVAL = 0, /* invalid value */
  33560. + LC_APPOV = 1, /* append and/or overwrite */
  33561. + LC_EXPAND = 2, /* expanding truncate */
  33562. + LC_SHRINK = 3 /* shrinking truncate */
  33563. +} logical_cluster_op;
  33564. +
  33565. +/* Transform cluster.
  33566. + * Intermediate state between page cluster and disk cluster
  33567. + * Is used for data transform (compression/encryption)
  33568. + */
  33569. +struct tfm_cluster {
  33570. + coa_set coa; /* compression algorithms info */
  33571. + tfm_unit tun; /* plain and transformed streams */
  33572. + tfm_action act;
  33573. + int uptodate;
  33574. + int lsize; /* number of bytes in logical cluster */
  33575. + int len; /* length of the transform stream */
  33576. + unsigned int hole:1; /* should punch hole */
  33577. +};
  33578. +
  33579. +static inline coa_t get_coa(struct tfm_cluster * tc, reiser4_compression_id id,
  33580. + tfm_action act)
  33581. +{
  33582. + return tc->coa[id][act];
  33583. +}
  33584. +
  33585. +static inline void set_coa(struct tfm_cluster * tc, reiser4_compression_id id,
  33586. + tfm_action act, coa_t coa)
  33587. +{
  33588. + tc->coa[id][act] = coa;
  33589. +}
  33590. +
  33591. +static inline int alloc_coa(struct tfm_cluster * tc, compression_plugin * cplug)
  33592. +{
  33593. + coa_t coa;
  33594. +
  33595. + coa = cplug->alloc(tc->act);
  33596. + if (IS_ERR(coa))
  33597. + return PTR_ERR(coa);
  33598. + set_coa(tc, cplug->h.id, tc->act, coa);
  33599. + return 0;
  33600. +}
  33601. +
  33602. +static inline int
  33603. +grab_coa(struct tfm_cluster * tc, compression_plugin * cplug)
  33604. +{
  33605. + return (cplug->alloc && !get_coa(tc, cplug->h.id, tc->act) ?
  33606. + alloc_coa(tc, cplug) : 0);
  33607. +}
  33608. +
  33609. +static inline void free_coa_set(struct tfm_cluster * tc)
  33610. +{
  33611. + tfm_action j;
  33612. + reiser4_compression_id i;
  33613. + compression_plugin *cplug;
  33614. +
  33615. + assert("edward-810", tc != NULL);
  33616. +
  33617. + for (j = 0; j < TFMA_LAST; j++)
  33618. + for (i = 0; i < LAST_COMPRESSION_ID; i++) {
  33619. + if (!get_coa(tc, i, j))
  33620. + continue;
  33621. + cplug = compression_plugin_by_id(i);
  33622. + assert("edward-812", cplug->free != NULL);
  33623. + cplug->free(get_coa(tc, i, j), j);
  33624. + set_coa(tc, i, j, 0);
  33625. + }
  33626. + return;
  33627. +}
  33628. +
  33629. +static inline struct tfm_stream * get_tfm_stream(struct tfm_cluster * tc,
  33630. + tfm_stream_id id)
  33631. +{
  33632. + return tc->tun[id];
  33633. +}
  33634. +
  33635. +static inline void set_tfm_stream(struct tfm_cluster * tc,
  33636. + tfm_stream_id id, struct tfm_stream * ts)
  33637. +{
  33638. + tc->tun[id] = ts;
  33639. +}
  33640. +
  33641. +static inline __u8 *tfm_stream_data(struct tfm_cluster * tc, tfm_stream_id id)
  33642. +{
  33643. + return ts_data(get_tfm_stream(tc, id));
  33644. +}
  33645. +
  33646. +static inline void set_tfm_stream_data(struct tfm_cluster * tc,
  33647. + tfm_stream_id id, __u8 * data)
  33648. +{
  33649. + get_tfm_stream(tc, id)->data = data;
  33650. +}
  33651. +
  33652. +static inline size_t tfm_stream_size(struct tfm_cluster * tc, tfm_stream_id id)
  33653. +{
  33654. + return ts_size(get_tfm_stream(tc, id));
  33655. +}
  33656. +
  33657. +static inline void
  33658. +set_tfm_stream_size(struct tfm_cluster * tc, tfm_stream_id id, size_t size)
  33659. +{
  33660. + get_tfm_stream(tc, id)->size = size;
  33661. +}
  33662. +
  33663. +static inline int
  33664. +alloc_tfm_stream(struct tfm_cluster * tc, size_t size, tfm_stream_id id)
  33665. +{
  33666. + assert("edward-939", tc != NULL);
  33667. + assert("edward-940", !get_tfm_stream(tc, id));
  33668. +
  33669. + tc->tun[id] = kzalloc(sizeof(struct tfm_stream),
  33670. + reiser4_ctx_gfp_mask_get());
  33671. + if (!tc->tun[id])
  33672. + return -ENOMEM;
  33673. + return alloc_ts_data(get_tfm_stream(tc, id), size);
  33674. +}
  33675. +
  33676. +static inline int
  33677. +realloc_tfm_stream(struct tfm_cluster * tc, size_t size, tfm_stream_id id)
  33678. +{
  33679. + assert("edward-941", tfm_stream_size(tc, id) < size);
  33680. + free_ts_data(get_tfm_stream(tc, id));
  33681. + return alloc_ts_data(get_tfm_stream(tc, id), size);
  33682. +}
  33683. +
  33684. +static inline void free_tfm_stream(struct tfm_cluster * tc, tfm_stream_id id)
  33685. +{
  33686. + free_ts_data(get_tfm_stream(tc, id));
  33687. + free_ts(get_tfm_stream(tc, id));
  33688. + set_tfm_stream(tc, id, 0);
  33689. +}
  33690. +
  33691. +static inline unsigned coa_overrun(compression_plugin * cplug, int ilen)
  33692. +{
  33693. + return (cplug->overrun != NULL ? cplug->overrun(ilen) : 0);
  33694. +}
  33695. +
  33696. +static inline void free_tfm_unit(struct tfm_cluster * tc)
  33697. +{
  33698. + tfm_stream_id id;
  33699. + for (id = 0; id < LAST_STREAM; id++) {
  33700. + if (!get_tfm_stream(tc, id))
  33701. + continue;
  33702. + free_tfm_stream(tc, id);
  33703. + }
  33704. +}
  33705. +
  33706. +static inline void put_tfm_cluster(struct tfm_cluster * tc)
  33707. +{
  33708. + assert("edward-942", tc != NULL);
  33709. + free_coa_set(tc);
  33710. + free_tfm_unit(tc);
  33711. +}
  33712. +
  33713. +static inline int tfm_cluster_is_uptodate(struct tfm_cluster * tc)
  33714. +{
  33715. + assert("edward-943", tc != NULL);
  33716. + assert("edward-944", tc->uptodate == 0 || tc->uptodate == 1);
  33717. + return (tc->uptodate == 1);
  33718. +}
  33719. +
  33720. +static inline void tfm_cluster_set_uptodate(struct tfm_cluster * tc)
  33721. +{
  33722. + assert("edward-945", tc != NULL);
  33723. + assert("edward-946", tc->uptodate == 0 || tc->uptodate == 1);
  33724. + tc->uptodate = 1;
  33725. + return;
  33726. +}
  33727. +
  33728. +static inline void tfm_cluster_clr_uptodate(struct tfm_cluster * tc)
  33729. +{
  33730. + assert("edward-947", tc != NULL);
  33731. + assert("edward-948", tc->uptodate == 0 || tc->uptodate == 1);
  33732. + tc->uptodate = 0;
  33733. + return;
  33734. +}
  33735. +
  33736. +static inline int tfm_stream_is_set(struct tfm_cluster * tc, tfm_stream_id id)
  33737. +{
  33738. + return (get_tfm_stream(tc, id) &&
  33739. + tfm_stream_data(tc, id) && tfm_stream_size(tc, id));
  33740. +}
  33741. +
  33742. +static inline int tfm_cluster_is_set(struct tfm_cluster * tc)
  33743. +{
  33744. + int i;
  33745. + for (i = 0; i < LAST_STREAM; i++)
  33746. + if (!tfm_stream_is_set(tc, i))
  33747. + return 0;
  33748. + return 1;
  33749. +}
  33750. +
  33751. +static inline void alternate_streams(struct tfm_cluster * tc)
  33752. +{
  33753. + struct tfm_stream *tmp = get_tfm_stream(tc, INPUT_STREAM);
  33754. +
  33755. + set_tfm_stream(tc, INPUT_STREAM, get_tfm_stream(tc, OUTPUT_STREAM));
  33756. + set_tfm_stream(tc, OUTPUT_STREAM, tmp);
  33757. +}
  33758. +
  33759. +/* Set of states to indicate a kind of data
  33760. + * that will be written to the window */
  33761. +typedef enum {
  33762. + DATA_WINDOW, /* user's data */
  33763. + HOLE_WINDOW /* zeroes (such kind of data can be written
  33764. + * if we start to write from offset > i_size) */
  33765. +} window_stat;
  33766. +
  33767. +/* Window (of logical cluster size) discretely sliding along a file.
  33768. + * Is used to locate hole region in a logical cluster to be properly
  33769. + * represented on disk.
  33770. + * We split a write to cryptcompress file into writes to its logical
  33771. + * clusters. Before writing to a logical cluster we set a window, i.e.
  33772. + * calculate values of the following fields:
  33773. + */
  33774. +struct reiser4_slide {
  33775. + unsigned off; /* offset to write from */
  33776. + unsigned count; /* number of bytes to write */
  33777. + unsigned delta; /* number of bytes to append to the hole */
  33778. + window_stat stat; /* what kind of data will be written starting
  33779. + from @off */
  33780. +};
  33781. +
  33782. +/* Possible states of a disk cluster */
  33783. +typedef enum {
  33784. + INVAL_DISK_CLUSTER, /* unknown state */
  33785. + PREP_DISK_CLUSTER, /* disk cluster got converted by flush
  33786. + * at least 1 time */
  33787. + UNPR_DISK_CLUSTER, /* disk cluster just created and should be
  33788. + * converted by flush */
  33789. + FAKE_DISK_CLUSTER, /* disk cluster doesn't exist neither in memory
  33790. + * nor on disk */
  33791. + TRNC_DISK_CLUSTER /* disk cluster is partially truncated */
  33792. +} disk_cluster_stat;
  33793. +
  33794. +/* The following structure represents various stages of the same logical
  33795. + * cluster of index @index:
  33796. + * . fixed slide
  33797. + * . page cluster (stage in primary cache)
  33798. + * . transform cluster (transition stage)
  33799. + * . disk cluster (stage in secondary cache)
  33800. + * This structure is used in transition and synchronizing operations, e.g.
  33801. + * transform cluster is a transition state when synchronizing page cluster
  33802. + * and disk cluster.
  33803. + * FIXME: Encapsulate page cluster, disk cluster.
  33804. + */
  33805. +struct cluster_handle {
  33806. + cloff_t index; /* offset in a file (unit is a cluster size) */
  33807. + int index_valid; /* for validating the index above, if needed */
  33808. + struct file *file; /* host file */
  33809. +
  33810. + /* logical cluster */
  33811. + struct reiser4_slide *win; /* sliding window to locate holes */
  33812. + logical_cluster_op op; /* logical cluster operation (truncate or
  33813. + append/overwrite) */
  33814. + /* transform cluster */
  33815. + struct tfm_cluster tc; /* contains all needed info to synchronize
  33816. + page cluster and disk cluster) */
  33817. + /* page cluster */
  33818. + int nr_pages; /* number of pages of current checkin action */
  33819. + int old_nrpages; /* number of pages of last checkin action */
  33820. + struct page **pages; /* attached pages */
  33821. + jnode * node; /* jnode for capture */
  33822. +
  33823. + /* disk cluster */
  33824. + hint_t *hint; /* current position in the tree */
  33825. + disk_cluster_stat dstat; /* state of the current disk cluster */
  33826. + int reserved; /* is space for disk cluster reserved */
  33827. +#if REISER4_DEBUG
  33828. + reiser4_context *ctx;
  33829. + int reserved_prepped;
  33830. + int reserved_unprepped;
  33831. +#endif
  33832. +
  33833. +};
  33834. +
  33835. +static inline __u8 * tfm_input_data (struct cluster_handle * clust)
  33836. +{
  33837. + return tfm_stream_data(&clust->tc, INPUT_STREAM);
  33838. +}
  33839. +
  33840. +static inline __u8 * tfm_output_data (struct cluster_handle * clust)
  33841. +{
  33842. + return tfm_stream_data(&clust->tc, OUTPUT_STREAM);
  33843. +}
  33844. +
  33845. +static inline int reset_cluster_pgset(struct cluster_handle * clust,
  33846. + int nrpages)
  33847. +{
  33848. + assert("edward-1057", clust->pages != NULL);
  33849. + memset(clust->pages, 0, sizeof(*clust->pages) * nrpages);
  33850. + return 0;
  33851. +}
  33852. +
  33853. +static inline int alloc_cluster_pgset(struct cluster_handle * clust,
  33854. + int nrpages)
  33855. +{
  33856. + assert("edward-949", clust != NULL);
  33857. + assert("edward-1362", clust->pages == NULL);
  33858. + assert("edward-950", nrpages != 0 && nrpages <= MAX_CLUSTER_NRPAGES);
  33859. +
  33860. + clust->pages = kzalloc(sizeof(*clust->pages) * nrpages,
  33861. + reiser4_ctx_gfp_mask_get());
  33862. + if (!clust->pages)
  33863. + return RETERR(-ENOMEM);
  33864. + return 0;
  33865. +}
  33866. +
  33867. +static inline void move_cluster_pgset(struct cluster_handle *clust,
  33868. + struct page ***pages, int * nr_pages)
  33869. +{
  33870. + assert("edward-1545", clust != NULL && clust->pages != NULL);
  33871. + assert("edward-1546", pages != NULL && *pages == NULL);
  33872. + *pages = clust->pages;
  33873. + *nr_pages = clust->nr_pages;
  33874. + clust->pages = NULL;
  33875. +}
  33876. +
  33877. +static inline void free_cluster_pgset(struct cluster_handle * clust)
  33878. +{
  33879. + assert("edward-951", clust->pages != NULL);
  33880. + kfree(clust->pages);
  33881. + clust->pages = NULL;
  33882. +}
  33883. +
  33884. +static inline void put_cluster_handle(struct cluster_handle * clust)
  33885. +{
  33886. + assert("edward-435", clust != NULL);
  33887. +
  33888. + put_tfm_cluster(&clust->tc);
  33889. + if (clust->pages)
  33890. + free_cluster_pgset(clust);
  33891. + memset(clust, 0, sizeof *clust);
  33892. +}
  33893. +
  33894. +static inline void inc_keyload_count(struct reiser4_crypto_info * data)
  33895. +{
  33896. + assert("edward-1410", data != NULL);
  33897. + data->keyload_count++;
  33898. +}
  33899. +
  33900. +static inline void dec_keyload_count(struct reiser4_crypto_info * data)
  33901. +{
  33902. + assert("edward-1411", data != NULL);
  33903. + assert("edward-1412", data->keyload_count > 0);
  33904. + data->keyload_count--;
  33905. +}
  33906. +
  33907. +static inline int capture_cluster_jnode(jnode * node)
  33908. +{
  33909. + return reiser4_try_capture(node, ZNODE_WRITE_LOCK, 0);
  33910. +}
  33911. +
  33912. +/* cryptcompress specific part of reiser4_inode */
  33913. +struct cryptcompress_info {
  33914. + struct mutex checkin_mutex; /* This is to serialize
  33915. + * checkin_logical_cluster operations */
  33916. + cloff_t trunc_index; /* Index of the leftmost truncated disk
  33917. + * cluster (to resolve races with read) */
  33918. + struct reiser4_crypto_info *crypt;
  33919. + /*
  33920. + * the following 2 fields are controlled by compression mode plugin
  33921. + */
  33922. + int compress_toggle; /* Current status of compressibility */
  33923. + int lattice_factor; /* Factor of dynamic lattice. FIXME: Have
  33924. + * a compression_toggle to keep the factor
  33925. + */
  33926. +#if REISER4_DEBUG
  33927. + atomic_t pgcount; /* number of grabbed pages */
  33928. +#endif
  33929. +};
  33930. +
  33931. +static inline void set_compression_toggle (struct cryptcompress_info * info, int val)
  33932. +{
  33933. + info->compress_toggle = val;
  33934. +}
  33935. +
  33936. +static inline int get_compression_toggle (struct cryptcompress_info * info)
  33937. +{
  33938. + return info->compress_toggle;
  33939. +}
  33940. +
  33941. +static inline int compression_is_on(struct cryptcompress_info * info)
  33942. +{
  33943. + return get_compression_toggle(info) == 1;
  33944. +}
  33945. +
  33946. +static inline void turn_on_compression(struct cryptcompress_info * info)
  33947. +{
  33948. + set_compression_toggle(info, 1);
  33949. +}
  33950. +
  33951. +static inline void turn_off_compression(struct cryptcompress_info * info)
  33952. +{
  33953. + set_compression_toggle(info, 0);
  33954. +}
  33955. +
  33956. +static inline void set_lattice_factor(struct cryptcompress_info * info, int val)
  33957. +{
  33958. + info->lattice_factor = val;
  33959. +}
  33960. +
  33961. +static inline int get_lattice_factor(struct cryptcompress_info * info)
  33962. +{
  33963. + return info->lattice_factor;
  33964. +}
  33965. +
  33966. +struct cryptcompress_info *cryptcompress_inode_data(const struct inode *);
  33967. +int equal_to_rdk(znode *, const reiser4_key *);
  33968. +int goto_right_neighbor(coord_t *, lock_handle *);
  33969. +int cryptcompress_inode_ok(struct inode *inode);
  33970. +int coord_is_unprepped_ctail(const coord_t * coord);
  33971. +extern int do_readpage_ctail(struct inode *, struct cluster_handle *,
  33972. + struct page * page, znode_lock_mode mode);
  33973. +extern int ctail_insert_unprepped_cluster(struct cluster_handle * clust,
  33974. + struct inode * inode);
  33975. +extern int readpages_cryptcompress(struct file*, struct address_space*,
  33976. + struct list_head*, unsigned);
  33977. +int bind_cryptcompress(struct inode *child, struct inode *parent);
  33978. +void destroy_inode_cryptcompress(struct inode * inode);
  33979. +int grab_page_cluster(struct inode *inode, struct cluster_handle * clust,
  33980. + rw_op rw);
  33981. +int write_dispatch_hook(struct file *file, struct inode * inode,
  33982. + loff_t pos, struct cluster_handle * clust,
  33983. + struct dispatch_context * cont);
  33984. +int setattr_dispatch_hook(struct inode * inode);
  33985. +struct reiser4_crypto_info * inode_crypto_info(struct inode * inode);
  33986. +void inherit_crypto_info_common(struct inode * parent, struct inode * object,
  33987. + int (*can_inherit)(struct inode * child,
  33988. + struct inode * parent));
  33989. +void reiser4_attach_crypto_info(struct inode * inode,
  33990. + struct reiser4_crypto_info * info);
  33991. +void change_crypto_info(struct inode * inode, struct reiser4_crypto_info * new);
  33992. +struct reiser4_crypto_info * reiser4_alloc_crypto_info (struct inode * inode);
  33993. +
  33994. +static inline struct crypto_blkcipher * info_get_cipher(struct reiser4_crypto_info * info)
  33995. +{
  33996. + return info->cipher;
  33997. +}
  33998. +
  33999. +static inline void info_set_cipher(struct reiser4_crypto_info * info,
  34000. + struct crypto_blkcipher * tfm)
  34001. +{
  34002. + info->cipher = tfm;
  34003. +}
  34004. +
  34005. +static inline struct crypto_hash * info_get_digest(struct reiser4_crypto_info * info)
  34006. +{
  34007. + return info->digest;
  34008. +}
  34009. +
  34010. +static inline void info_set_digest(struct reiser4_crypto_info * info,
  34011. + struct crypto_hash * tfm)
  34012. +{
  34013. + info->digest = tfm;
  34014. +}
  34015. +
  34016. +static inline void put_cluster_page(struct page * page)
  34017. +{
  34018. + put_page(page);
  34019. +}
  34020. +
  34021. +#endif /* __FS_REISER4_CRYPTCOMPRESS_H__ */
  34022. +
  34023. +/* Make Linus happy.
  34024. + Local variables:
  34025. + c-indentation-style: "K&R"
  34026. + mode-name: "LC"
  34027. + c-basic-offset: 8
  34028. + tab-width: 8
  34029. + fill-column: 120
  34030. + scroll-step: 1
  34031. + End:
  34032. +*/
  34033. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/file/file.c linux-4.14.2/fs/reiser4/plugin/file/file.c
  34034. --- linux-4.14.2.orig/fs/reiser4/plugin/file/file.c 1970-01-01 01:00:00.000000000 +0100
  34035. +++ linux-4.14.2/fs/reiser4/plugin/file/file.c 2017-11-26 22:13:09.000000000 +0100
  34036. @@ -0,0 +1,2796 @@
  34037. +/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
  34038. + * reiser4/README */
  34039. +
  34040. +/*
  34041. + * this file contains implementations of inode/file/address_space/file plugin
  34042. + * operations specific for "unix file plugin" (plugin id is
  34043. + * UNIX_FILE_PLUGIN_ID). "Unix file" is either built of tail items only
  34044. + * (FORMATTING_ID) or of extent items only (EXTENT_POINTER_ID) or empty (have
  34045. + * no items but stat data)
  34046. + */
  34047. +
  34048. +#include "../../inode.h"
  34049. +#include "../../super.h"
  34050. +#include "../../tree_walk.h"
  34051. +#include "../../carry.h"
  34052. +#include "../../page_cache.h"
  34053. +#include "../../ioctl.h"
  34054. +#include "../object.h"
  34055. +#include "../cluster.h"
  34056. +#include "../../safe_link.h"
  34057. +
  34058. +#include <linux/writeback.h>
  34059. +#include <linux/pagevec.h>
  34060. +#include <linux/syscalls.h>
  34061. +
  34062. +
  34063. +static int unpack(struct file *file, struct inode *inode, int forever);
  34064. +static void drop_access(struct unix_file_info *);
  34065. +static int hint_validate(hint_t * hint, const reiser4_key * key, int check_key,
  34066. + znode_lock_mode lock_mode);
  34067. +
  34068. +/* Get exclusive access and make sure that file is not partially
  34069. + * converted (It may happen that another process is doing tail
  34070. + * conversion. If so, wait until it completes)
  34071. + */
  34072. +static inline void get_exclusive_access_careful(struct unix_file_info * uf_info,
  34073. + struct inode *inode)
  34074. +{
  34075. + do {
  34076. + get_exclusive_access(uf_info);
  34077. + if (!reiser4_inode_get_flag(inode, REISER4_PART_IN_CONV))
  34078. + break;
  34079. + drop_exclusive_access(uf_info);
  34080. + schedule();
  34081. + } while (1);
  34082. +}
  34083. +
  34084. +/* get unix file plugin specific portion of inode */
  34085. +struct unix_file_info *unix_file_inode_data(const struct inode *inode)
  34086. +{
  34087. + return &reiser4_inode_data(inode)->file_plugin_data.unix_file_info;
  34088. +}
  34089. +
  34090. +/**
  34091. + * equal_to_rdk - compare key and znode's right delimiting key
  34092. + * @node: node whose right delimiting key to compare with @key
  34093. + * @key: key to compare with @node's right delimiting key
  34094. + *
  34095. + * Returns true if @key is equal to right delimiting key of @node.
  34096. + */
  34097. +int equal_to_rdk(znode *node, const reiser4_key *key)
  34098. +{
  34099. + int result;
  34100. +
  34101. + read_lock_dk(znode_get_tree(node));
  34102. + result = keyeq(key, znode_get_rd_key(node));
  34103. + read_unlock_dk(znode_get_tree(node));
  34104. + return result;
  34105. +}
  34106. +
  34107. +#if REISER4_DEBUG
  34108. +
  34109. +/**
  34110. + * equal_to_ldk - compare key and znode's left delimiting key
  34111. + * @node: node whose left delimiting key to compare with @key
  34112. + * @key: key to compare with @node's left delimiting key
  34113. + *
  34114. + * Returns true if @key is equal to left delimiting key of @node.
  34115. + */
  34116. +int equal_to_ldk(znode *node, const reiser4_key *key)
  34117. +{
  34118. + int result;
  34119. +
  34120. + read_lock_dk(znode_get_tree(node));
  34121. + result = keyeq(key, znode_get_ld_key(node));
  34122. + read_unlock_dk(znode_get_tree(node));
  34123. + return result;
  34124. +}
  34125. +
  34126. +/**
  34127. + * check_coord - check whether coord corresponds to key
  34128. + * @coord: coord to check
  34129. + * @key: key @coord has to correspond to
  34130. + *
  34131. + * Returns true if @coord is set as if it was set as result of lookup with @key
  34132. + * in coord->node.
  34133. + */
  34134. +static int check_coord(const coord_t *coord, const reiser4_key *key)
  34135. +{
  34136. + coord_t twin;
  34137. +
  34138. + node_plugin_by_node(coord->node)->lookup(coord->node, key,
  34139. + FIND_MAX_NOT_MORE_THAN, &twin);
  34140. + return coords_equal(coord, &twin);
  34141. +}
  34142. +
  34143. +#endif /* REISER4_DEBUG */
  34144. +
  34145. +/**
  34146. + * init_uf_coord - initialize extended coord
  34147. + * @uf_coord:
  34148. + * @lh:
  34149. + *
  34150. + *
  34151. + */
  34152. +void init_uf_coord(uf_coord_t *uf_coord, lock_handle *lh)
  34153. +{
  34154. + coord_init_zero(&uf_coord->coord);
  34155. + coord_clear_iplug(&uf_coord->coord);
  34156. + uf_coord->lh = lh;
  34157. + init_lh(lh);
  34158. + memset(&uf_coord->extension, 0, sizeof(uf_coord->extension));
  34159. + uf_coord->valid = 0;
  34160. +}
  34161. +
  34162. +static void validate_extended_coord(uf_coord_t *uf_coord, loff_t offset)
  34163. +{
  34164. + assert("vs-1333", uf_coord->valid == 0);
  34165. +
  34166. + if (coord_is_between_items(&uf_coord->coord))
  34167. + return;
  34168. +
  34169. + assert("vs-1348",
  34170. + item_plugin_by_coord(&uf_coord->coord)->s.file.
  34171. + init_coord_extension);
  34172. +
  34173. + item_body_by_coord(&uf_coord->coord);
  34174. + item_plugin_by_coord(&uf_coord->coord)->s.file.
  34175. + init_coord_extension(uf_coord, offset);
  34176. +}
  34177. +
  34178. +/**
  34179. + * goto_right_neighbor - lock right neighbor, drop current node lock
  34180. + * @coord:
  34181. + * @lh:
  34182. + *
  34183. + * Obtain lock on right neighbor and drop lock on current node.
  34184. + */
  34185. +int goto_right_neighbor(coord_t *coord, lock_handle *lh)
  34186. +{
  34187. + int result;
  34188. + lock_handle lh_right;
  34189. +
  34190. + assert("vs-1100", znode_is_locked(coord->node));
  34191. +
  34192. + init_lh(&lh_right);
  34193. + result = reiser4_get_right_neighbor(&lh_right, coord->node,
  34194. + znode_is_wlocked(coord->node) ?
  34195. + ZNODE_WRITE_LOCK : ZNODE_READ_LOCK,
  34196. + GN_CAN_USE_UPPER_LEVELS);
  34197. + if (result) {
  34198. + done_lh(&lh_right);
  34199. + return result;
  34200. + }
  34201. +
  34202. + /*
  34203. + * we hold two longterm locks on neighboring nodes. Unlock left of
  34204. + * them
  34205. + */
  34206. + done_lh(lh);
  34207. +
  34208. + coord_init_first_unit_nocheck(coord, lh_right.node);
  34209. + move_lh(lh, &lh_right);
  34210. +
  34211. + return 0;
  34212. +
  34213. +}
  34214. +
  34215. +/**
  34216. + * set_file_state
  34217. + * @uf_info:
  34218. + * @cbk_result:
  34219. + * @level:
  34220. + *
  34221. + * This is to be used by find_file_item and in find_file_state to
  34222. + * determine real state of file
  34223. + */
  34224. +static void set_file_state(struct unix_file_info *uf_info, int cbk_result,
  34225. + tree_level level)
  34226. +{
  34227. + if (cbk_errored(cbk_result))
  34228. + /* error happened in find_file_item */
  34229. + return;
  34230. +
  34231. + assert("vs-1164", level == LEAF_LEVEL || level == TWIG_LEVEL);
  34232. +
  34233. + if (uf_info->container == UF_CONTAINER_UNKNOWN) {
  34234. + if (cbk_result == CBK_COORD_NOTFOUND)
  34235. + uf_info->container = UF_CONTAINER_EMPTY;
  34236. + else if (level == LEAF_LEVEL)
  34237. + uf_info->container = UF_CONTAINER_TAILS;
  34238. + else
  34239. + uf_info->container = UF_CONTAINER_EXTENTS;
  34240. + } else {
  34241. + /*
  34242. + * file state is known, check whether it is set correctly if
  34243. + * file is not being tail converted
  34244. + */
  34245. + if (!reiser4_inode_get_flag(unix_file_info_to_inode(uf_info),
  34246. + REISER4_PART_IN_CONV)) {
  34247. + assert("vs-1162",
  34248. + ergo(level == LEAF_LEVEL &&
  34249. + cbk_result == CBK_COORD_FOUND,
  34250. + uf_info->container == UF_CONTAINER_TAILS));
  34251. + assert("vs-1165",
  34252. + ergo(level == TWIG_LEVEL &&
  34253. + cbk_result == CBK_COORD_FOUND,
  34254. + uf_info->container == UF_CONTAINER_EXTENTS));
  34255. + }
  34256. + }
  34257. +}
  34258. +
  34259. +int find_file_item_nohint(coord_t *coord, lock_handle *lh,
  34260. + const reiser4_key *key, znode_lock_mode lock_mode,
  34261. + struct inode *inode)
  34262. +{
  34263. + return reiser4_object_lookup(inode, key, coord, lh, lock_mode,
  34264. + FIND_MAX_NOT_MORE_THAN,
  34265. + TWIG_LEVEL, LEAF_LEVEL,
  34266. + (lock_mode == ZNODE_READ_LOCK) ? CBK_UNIQUE :
  34267. + (CBK_UNIQUE | CBK_FOR_INSERT),
  34268. + NULL /* ra_info */ );
  34269. +}
  34270. +
  34271. +/**
  34272. + * find_file_item - look for file item in the tree
  34273. + * @hint: provides coordinate, lock handle, seal
  34274. + * @key: key for search
  34275. + * @mode: mode of lock to put on returned node
  34276. + * @ra_info:
  34277. + * @inode:
  34278. + *
  34279. + * This finds position in the tree corresponding to @key. It first tries to use
  34280. + * @hint's seal if it is set.
  34281. + */
  34282. +int find_file_item(hint_t *hint, const reiser4_key *key,
  34283. + znode_lock_mode lock_mode,
  34284. + struct inode *inode)
  34285. +{
  34286. + int result;
  34287. + coord_t *coord;
  34288. + lock_handle *lh;
  34289. +
  34290. + assert("nikita-3030", reiser4_schedulable());
  34291. + assert("vs-1707", hint != NULL);
  34292. + assert("vs-47", inode != NULL);
  34293. +
  34294. + coord = &hint->ext_coord.coord;
  34295. + lh = hint->ext_coord.lh;
  34296. + init_lh(lh);
  34297. +
  34298. + result = hint_validate(hint, key, 1 /* check key */, lock_mode);
  34299. + if (!result) {
  34300. + if (coord->between == AFTER_UNIT &&
  34301. + equal_to_rdk(coord->node, key)) {
  34302. + result = goto_right_neighbor(coord, lh);
  34303. + if (result == -E_NO_NEIGHBOR)
  34304. + return RETERR(-EIO);
  34305. + if (result)
  34306. + return result;
  34307. + assert("vs-1152", equal_to_ldk(coord->node, key));
  34308. + /*
  34309. + * we moved to different node. Invalidate coord
  34310. + * extension, zload is necessary to init it again
  34311. + */
  34312. + hint->ext_coord.valid = 0;
  34313. + }
  34314. +
  34315. + set_file_state(unix_file_inode_data(inode), CBK_COORD_FOUND,
  34316. + znode_get_level(coord->node));
  34317. +
  34318. + return CBK_COORD_FOUND;
  34319. + }
  34320. +
  34321. + coord_init_zero(coord);
  34322. + result = find_file_item_nohint(coord, lh, key, lock_mode, inode);
  34323. + set_file_state(unix_file_inode_data(inode), result,
  34324. + znode_get_level(coord->node));
  34325. +
  34326. + /* FIXME: we might already have coord extension initialized */
  34327. + hint->ext_coord.valid = 0;
  34328. + return result;
  34329. +}
  34330. +
  34331. +void hint_init_zero(hint_t * hint)
  34332. +{
  34333. + memset(hint, 0, sizeof(*hint));
  34334. + init_lh(&hint->lh);
  34335. + hint->ext_coord.lh = &hint->lh;
  34336. +}
  34337. +
  34338. +static int find_file_state(struct inode *inode, struct unix_file_info *uf_info)
  34339. +{
  34340. + int result;
  34341. + reiser4_key key;
  34342. + coord_t coord;
  34343. + lock_handle lh;
  34344. +
  34345. + assert("vs-1628", ea_obtained(uf_info));
  34346. +
  34347. + if (uf_info->container == UF_CONTAINER_UNKNOWN) {
  34348. + key_by_inode_and_offset_common(inode, 0, &key);
  34349. + init_lh(&lh);
  34350. + result = find_file_item_nohint(&coord, &lh, &key,
  34351. + ZNODE_READ_LOCK, inode);
  34352. + set_file_state(uf_info, result, znode_get_level(coord.node));
  34353. + done_lh(&lh);
  34354. + if (!cbk_errored(result))
  34355. + result = 0;
  34356. + } else
  34357. + result = 0;
  34358. + assert("vs-1074",
  34359. + ergo(result == 0, uf_info->container != UF_CONTAINER_UNKNOWN));
  34360. + reiser4_txn_restart_current();
  34361. + return result;
  34362. +}
  34363. +
  34364. +/**
  34365. + * Estimate and reserve space needed to truncate page
  34366. + * which gets partially truncated: one block for page
  34367. + * itself, stat-data update (estimate_one_insert_into_item)
  34368. + * and one item insertion (estimate_one_insert_into_item)
  34369. + * which may happen if page corresponds to hole extent and
  34370. + * unallocated one will have to be created
  34371. + */
  34372. +static int reserve_partial_page(reiser4_tree * tree)
  34373. +{
  34374. + grab_space_enable();
  34375. + return reiser4_grab_reserved(reiser4_get_current_sb(),
  34376. + 1 +
  34377. + 2 * estimate_one_insert_into_item(tree),
  34378. + BA_CAN_COMMIT);
  34379. +}
  34380. +
  34381. +/* estimate and reserve space needed to cut one item and update one stat data */
  34382. +static int reserve_cut_iteration(reiser4_tree * tree)
  34383. +{
  34384. + __u64 estimate = estimate_one_item_removal(tree)
  34385. + + estimate_one_insert_into_item(tree);
  34386. +
  34387. + assert("nikita-3172", lock_stack_isclean(get_current_lock_stack()));
  34388. +
  34389. + grab_space_enable();
  34390. + /* We need to double our estimate now that we can delete more than one
  34391. + node. */
  34392. + return reiser4_grab_reserved(reiser4_get_current_sb(), estimate * 2,
  34393. + BA_CAN_COMMIT);
  34394. +}
  34395. +
  34396. +int reiser4_update_file_size(struct inode *inode, loff_t new_size,
  34397. + int update_sd)
  34398. +{
  34399. + int result = 0;
  34400. +
  34401. + INODE_SET_SIZE(inode, new_size);
  34402. + if (update_sd) {
  34403. + inode->i_ctime = inode->i_mtime = current_time(inode);
  34404. + result = reiser4_update_sd(inode);
  34405. + }
  34406. + return result;
  34407. +}
  34408. +
  34409. +/**
  34410. + * Cut file items one by one starting from the last one until
  34411. + * new file size (inode->i_size) is reached. Reserve space
  34412. + * and update file stat data on every single cut from the tree
  34413. + */
  34414. +int cut_file_items(struct inode *inode, loff_t new_size,
  34415. + int update_sd, loff_t cur_size,
  34416. + int (*update_actor) (struct inode *, loff_t, int))
  34417. +{
  34418. + reiser4_key from_key, to_key;
  34419. + reiser4_key smallest_removed;
  34420. + file_plugin *fplug = inode_file_plugin(inode);
  34421. + int result;
  34422. + int progress = 0;
  34423. +
  34424. + assert("vs-1248",
  34425. + fplug == file_plugin_by_id(UNIX_FILE_PLUGIN_ID) ||
  34426. + fplug == file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID));
  34427. +
  34428. + fplug->key_by_inode(inode, new_size, &from_key);
  34429. + to_key = from_key;
  34430. + set_key_offset(&to_key, cur_size - 1 /*get_key_offset(reiser4_max_key()) */ );
  34431. + /* this loop normally runs just once */
  34432. + while (1) {
  34433. + result = reserve_cut_iteration(reiser4_tree_by_inode(inode));
  34434. + if (result)
  34435. + break;
  34436. +
  34437. + result = reiser4_cut_tree_object(current_tree, &from_key, &to_key,
  34438. + &smallest_removed, inode, 1,
  34439. + &progress);
  34440. + if (result == -E_REPEAT) {
  34441. + /**
  34442. + * -E_REPEAT is a signal to interrupt a long
  34443. + * file truncation process
  34444. + */
  34445. + if (progress) {
  34446. + result = update_actor(inode,
  34447. + get_key_offset(&smallest_removed),
  34448. + update_sd);
  34449. + if (result)
  34450. + break;
  34451. + }
  34452. + /* the below does up(sbinfo->delete_mutex).
  34453. + * Do not get folled */
  34454. + reiser4_release_reserved(inode->i_sb);
  34455. + /**
  34456. + * reiser4_cut_tree_object() was interrupted probably
  34457. + * because current atom requires commit, we have to
  34458. + * release transaction handle to allow atom commit.
  34459. + */
  34460. + reiser4_txn_restart_current();
  34461. + continue;
  34462. + }
  34463. + if (result
  34464. + && !(result == CBK_COORD_NOTFOUND && new_size == 0
  34465. + && inode->i_size == 0))
  34466. + break;
  34467. +
  34468. + set_key_offset(&smallest_removed, new_size);
  34469. + /* Final sd update after the file gets its correct size */
  34470. + result = update_actor(inode, get_key_offset(&smallest_removed),
  34471. + update_sd);
  34472. + break;
  34473. + }
  34474. +
  34475. + /* the below does up(sbinfo->delete_mutex). Do not get folled */
  34476. + reiser4_release_reserved(inode->i_sb);
  34477. +
  34478. + return result;
  34479. +}
  34480. +
  34481. +int find_or_create_extent(struct page *page);
  34482. +
  34483. +/* part of truncate_file_body: it is called when truncate is used to make file
  34484. + shorter */
  34485. +static int shorten_file(struct inode *inode, loff_t new_size)
  34486. +{
  34487. + int result;
  34488. + struct page *page;
  34489. + int padd_from;
  34490. + unsigned long index;
  34491. + struct unix_file_info *uf_info;
  34492. +
  34493. + /*
  34494. + * all items of ordinary reiser4 file are grouped together. That is why
  34495. + * we can use reiser4_cut_tree. Plan B files (for instance) can not be
  34496. + * truncated that simply
  34497. + */
  34498. + result = cut_file_items(inode, new_size, 1 /*update_sd */ ,
  34499. + get_key_offset(reiser4_max_key()),
  34500. + reiser4_update_file_size);
  34501. + if (result)
  34502. + return result;
  34503. +
  34504. + uf_info = unix_file_inode_data(inode);
  34505. + assert("vs-1105", new_size == inode->i_size);
  34506. + if (new_size == 0) {
  34507. + uf_info->container = UF_CONTAINER_EMPTY;
  34508. + return 0;
  34509. + }
  34510. +
  34511. + result = find_file_state(inode, uf_info);
  34512. + if (result)
  34513. + return result;
  34514. + if (uf_info->container == UF_CONTAINER_TAILS)
  34515. + /*
  34516. + * No need to worry about zeroing last page after new file
  34517. + * end
  34518. + */
  34519. + return 0;
  34520. +
  34521. + padd_from = inode->i_size & (PAGE_SIZE - 1);
  34522. + if (!padd_from)
  34523. + /* file is truncated to page boundary */
  34524. + return 0;
  34525. +
  34526. + result = reserve_partial_page(reiser4_tree_by_inode(inode));
  34527. + if (result) {
  34528. + reiser4_release_reserved(inode->i_sb);
  34529. + return result;
  34530. + }
  34531. +
  34532. + /* last page is partially truncated - zero its content */
  34533. + index = (inode->i_size >> PAGE_SHIFT);
  34534. + page = read_mapping_page(inode->i_mapping, index, NULL);
  34535. + if (IS_ERR(page)) {
  34536. + /*
  34537. + * the below does up(sbinfo->delete_mutex). Do not get
  34538. + * confused
  34539. + */
  34540. + reiser4_release_reserved(inode->i_sb);
  34541. + if (likely(PTR_ERR(page) == -EINVAL)) {
  34542. + /* looks like file is built of tail items */
  34543. + return 0;
  34544. + }
  34545. + return PTR_ERR(page);
  34546. + }
  34547. + wait_on_page_locked(page);
  34548. + if (!PageUptodate(page)) {
  34549. + put_page(page);
  34550. + /*
  34551. + * the below does up(sbinfo->delete_mutex). Do not get
  34552. + * confused
  34553. + */
  34554. + reiser4_release_reserved(inode->i_sb);
  34555. + return RETERR(-EIO);
  34556. + }
  34557. +
  34558. + /*
  34559. + * if page correspons to hole extent unit - unallocated one will be
  34560. + * created here. This is not necessary
  34561. + */
  34562. + result = find_or_create_extent(page);
  34563. +
  34564. + /*
  34565. + * FIXME: cut_file_items has already updated inode. Probably it would
  34566. + * be better to update it here when file is really truncated
  34567. + */
  34568. + if (result) {
  34569. + put_page(page);
  34570. + /*
  34571. + * the below does up(sbinfo->delete_mutex). Do not get
  34572. + * confused
  34573. + */
  34574. + reiser4_release_reserved(inode->i_sb);
  34575. + return result;
  34576. + }
  34577. +
  34578. + lock_page(page);
  34579. + assert("vs-1066", PageLocked(page));
  34580. + zero_user_segment(page, padd_from, PAGE_SIZE);
  34581. + unlock_page(page);
  34582. + put_page(page);
  34583. + /* the below does up(sbinfo->delete_mutex). Do not get confused */
  34584. + reiser4_release_reserved(inode->i_sb);
  34585. + return 0;
  34586. +}
  34587. +
  34588. +/**
  34589. + * should_have_notail
  34590. + * @uf_info:
  34591. + * @new_size:
  34592. + *
  34593. + * Calls formatting plugin to see whether file of size @new_size has to be
  34594. + * stored in unformatted nodes or in tail items. 0 is returned for later case.
  34595. + */
  34596. +static int should_have_notail(const struct unix_file_info *uf_info, loff_t new_size)
  34597. +{
  34598. + if (!uf_info->tplug)
  34599. + return 1;
  34600. + return !uf_info->tplug->have_tail(unix_file_info_to_inode(uf_info),
  34601. + new_size);
  34602. +
  34603. +}
  34604. +
  34605. +/**
  34606. + * truncate_file_body - change length of file
  34607. + * @inode: inode of file
  34608. + * @new_size: new file length
  34609. + *
  34610. + * Adjusts items file @inode is built of to match @new_size. It may either cut
  34611. + * items or add them to represent a hole at the end of file. The caller has to
  34612. + * obtain exclusive access to the file.
  34613. + */
  34614. +static int truncate_file_body(struct inode *inode, struct iattr *attr)
  34615. +{
  34616. + int result;
  34617. + loff_t new_size = attr->ia_size;
  34618. +
  34619. + if (inode->i_size < new_size) {
  34620. + /* expanding truncate */
  34621. + struct unix_file_info *uf_info = unix_file_inode_data(inode);
  34622. +
  34623. + result = find_file_state(inode, uf_info);
  34624. + if (result)
  34625. + return result;
  34626. +
  34627. + if (should_have_notail(uf_info, new_size)) {
  34628. + /*
  34629. + * file of size @new_size has to be built of
  34630. + * extents. If it is built of tails - convert to
  34631. + * extents
  34632. + */
  34633. + if (uf_info->container == UF_CONTAINER_TAILS) {
  34634. + /*
  34635. + * if file is being convered by another process
  34636. + * - wait until it completes
  34637. + */
  34638. + while (1) {
  34639. + if (reiser4_inode_get_flag(inode,
  34640. + REISER4_PART_IN_CONV)) {
  34641. + drop_exclusive_access(uf_info);
  34642. + schedule();
  34643. + get_exclusive_access(uf_info);
  34644. + continue;
  34645. + }
  34646. + break;
  34647. + }
  34648. +
  34649. + if (uf_info->container == UF_CONTAINER_TAILS) {
  34650. + result = tail2extent(uf_info);
  34651. + if (result)
  34652. + return result;
  34653. + }
  34654. + }
  34655. + result = reiser4_write_extent(NULL, inode, NULL,
  34656. + 0, &new_size);
  34657. + if (result)
  34658. + return result;
  34659. + uf_info->container = UF_CONTAINER_EXTENTS;
  34660. + } else {
  34661. + if (uf_info->container == UF_CONTAINER_EXTENTS) {
  34662. + result = reiser4_write_extent(NULL, inode, NULL,
  34663. + 0, &new_size);
  34664. + if (result)
  34665. + return result;
  34666. + } else {
  34667. + result = reiser4_write_tail(NULL, inode, NULL,
  34668. + 0, &new_size);
  34669. + if (result)
  34670. + return result;
  34671. + uf_info->container = UF_CONTAINER_TAILS;
  34672. + }
  34673. + }
  34674. + BUG_ON(result > 0);
  34675. + result = reiser4_update_file_size(inode, new_size, 1);
  34676. + BUG_ON(result != 0);
  34677. + } else
  34678. + result = shorten_file(inode, new_size);
  34679. + return result;
  34680. +}
  34681. +
  34682. +/**
  34683. + * load_file_hint - copy hint from struct file to local variable
  34684. + * @file: file to get hint from
  34685. + * @hint: structure to fill
  34686. + *
  34687. + * Reiser4 specific portion of struct file may contain information (hint)
  34688. + * stored on exiting from previous read or write. That information includes
  34689. + * seal of znode and coord within that znode where previous read or write
  34690. + * stopped. This function copies that information to @hint if it was stored or
  34691. + * initializes @hint by 0s otherwise.
  34692. + */
  34693. +int load_file_hint(struct file *file, hint_t *hint)
  34694. +{
  34695. + reiser4_file_fsdata *fsdata;
  34696. +
  34697. + if (file) {
  34698. + fsdata = reiser4_get_file_fsdata(file);
  34699. + if (IS_ERR(fsdata))
  34700. + return PTR_ERR(fsdata);
  34701. +
  34702. + spin_lock_inode(file_inode(file));
  34703. + if (reiser4_seal_is_set(&fsdata->reg.hint.seal)) {
  34704. + memcpy(hint, &fsdata->reg.hint, sizeof(*hint));
  34705. + init_lh(&hint->lh);
  34706. + hint->ext_coord.lh = &hint->lh;
  34707. + spin_unlock_inode(file_inode(file));
  34708. + /*
  34709. + * force re-validation of the coord on the first
  34710. + * iteration of the read/write loop.
  34711. + */
  34712. + hint->ext_coord.valid = 0;
  34713. + assert("nikita-19892",
  34714. + coords_equal(&hint->seal.coord1,
  34715. + &hint->ext_coord.coord));
  34716. + return 0;
  34717. + }
  34718. + memset(&fsdata->reg.hint, 0, sizeof(hint_t));
  34719. + spin_unlock_inode(file_inode(file));
  34720. + }
  34721. + hint_init_zero(hint);
  34722. + return 0;
  34723. +}
  34724. +
  34725. +/**
  34726. + * save_file_hint - copy hint to reiser4 private struct file's part
  34727. + * @file: file to save hint in
  34728. + * @hint: hint to save
  34729. + *
  34730. + * This copies @hint to reiser4 private part of struct file. It can help
  34731. + * speedup future accesses to the file.
  34732. + */
  34733. +void save_file_hint(struct file *file, const hint_t *hint)
  34734. +{
  34735. + reiser4_file_fsdata *fsdata;
  34736. +
  34737. + assert("edward-1337", hint != NULL);
  34738. +
  34739. + if (!file || !reiser4_seal_is_set(&hint->seal))
  34740. + return;
  34741. + fsdata = reiser4_get_file_fsdata(file);
  34742. + assert("vs-965", !IS_ERR(fsdata));
  34743. + assert("nikita-19891",
  34744. + coords_equal(&hint->seal.coord1, &hint->ext_coord.coord));
  34745. + assert("vs-30", hint->lh.owner == NULL);
  34746. + spin_lock_inode(file_inode(file));
  34747. + fsdata->reg.hint = *hint;
  34748. + spin_unlock_inode(file_inode(file));
  34749. + return;
  34750. +}
  34751. +
  34752. +void reiser4_unset_hint(hint_t * hint)
  34753. +{
  34754. + assert("vs-1315", hint);
  34755. + hint->ext_coord.valid = 0;
  34756. + reiser4_seal_done(&hint->seal);
  34757. + done_lh(&hint->lh);
  34758. +}
  34759. +
  34760. +/* coord must be set properly. So, that reiser4_set_hint
  34761. + has nothing to do */
  34762. +void reiser4_set_hint(hint_t * hint, const reiser4_key * key,
  34763. + znode_lock_mode mode)
  34764. +{
  34765. + ON_DEBUG(coord_t * coord = &hint->ext_coord.coord);
  34766. + assert("vs-1207", WITH_DATA(coord->node, check_coord(coord, key)));
  34767. +
  34768. + reiser4_seal_init(&hint->seal, &hint->ext_coord.coord, key);
  34769. + hint->offset = get_key_offset(key);
  34770. + hint->mode = mode;
  34771. + done_lh(&hint->lh);
  34772. +}
  34773. +
  34774. +int hint_is_set(const hint_t * hint)
  34775. +{
  34776. + return reiser4_seal_is_set(&hint->seal);
  34777. +}
  34778. +
  34779. +#if REISER4_DEBUG
  34780. +static int all_but_offset_key_eq(const reiser4_key * k1, const reiser4_key * k2)
  34781. +{
  34782. + return (get_key_locality(k1) == get_key_locality(k2) &&
  34783. + get_key_type(k1) == get_key_type(k2) &&
  34784. + get_key_band(k1) == get_key_band(k2) &&
  34785. + get_key_ordering(k1) == get_key_ordering(k2) &&
  34786. + get_key_objectid(k1) == get_key_objectid(k2));
  34787. +}
  34788. +#endif
  34789. +
  34790. +static int
  34791. +hint_validate(hint_t * hint, const reiser4_key * key, int check_key,
  34792. + znode_lock_mode lock_mode)
  34793. +{
  34794. + if (!hint || !hint_is_set(hint) || hint->mode != lock_mode)
  34795. + /* hint either not set or set by different operation */
  34796. + return RETERR(-E_REPEAT);
  34797. +
  34798. + assert("vs-1277", all_but_offset_key_eq(key, &hint->seal.key));
  34799. +
  34800. + if (check_key && get_key_offset(key) != hint->offset)
  34801. + /* hint is set for different key */
  34802. + return RETERR(-E_REPEAT);
  34803. +
  34804. + assert("vs-31", hint->ext_coord.lh == &hint->lh);
  34805. + return reiser4_seal_validate(&hint->seal, &hint->ext_coord.coord, key,
  34806. + hint->ext_coord.lh, lock_mode,
  34807. + ZNODE_LOCK_LOPRI);
  34808. +}
  34809. +
  34810. +/**
  34811. + * Look for place at twig level for extent corresponding to page,
  34812. + * call extent's writepage method to create unallocated extent if
  34813. + * it does not exist yet, initialize jnode, capture page
  34814. + */
  34815. +int find_or_create_extent(struct page *page)
  34816. +{
  34817. + int result;
  34818. + struct inode *inode;
  34819. + int plugged_hole;
  34820. +
  34821. + jnode *node;
  34822. +
  34823. + assert("vs-1065", page->mapping && page->mapping->host);
  34824. + inode = page->mapping->host;
  34825. +
  34826. + lock_page(page);
  34827. + node = jnode_of_page(page);
  34828. + if (IS_ERR(node)) {
  34829. + unlock_page(page);
  34830. + return PTR_ERR(node);
  34831. + }
  34832. + JF_SET(node, JNODE_WRITE_PREPARED);
  34833. + unlock_page(page);
  34834. +
  34835. + if (node->blocknr == 0) {
  34836. + plugged_hole = 0;
  34837. + result = reiser4_update_extent(inode, node, page_offset(page),
  34838. + &plugged_hole);
  34839. + if (result) {
  34840. + JF_CLR(node, JNODE_WRITE_PREPARED);
  34841. + jput(node);
  34842. + warning("edward-1549",
  34843. + "reiser4_update_extent failed: %d", result);
  34844. + return result;
  34845. + }
  34846. + if (plugged_hole)
  34847. + reiser4_update_sd(inode);
  34848. + } else {
  34849. + spin_lock_jnode(node);
  34850. + result = reiser4_try_capture(node, ZNODE_WRITE_LOCK, 0);
  34851. + BUG_ON(result != 0);
  34852. + jnode_make_dirty_locked(node);
  34853. + spin_unlock_jnode(node);
  34854. + }
  34855. +
  34856. + BUG_ON(node->atom == NULL);
  34857. + JF_CLR(node, JNODE_WRITE_PREPARED);
  34858. +
  34859. + if (get_current_context()->entd) {
  34860. + entd_context *ent = get_entd_context(node->tree->super);
  34861. +
  34862. + if (ent->cur_request->page == page)
  34863. + /* the following reference will be
  34864. + dropped in reiser4_writeout */
  34865. + ent->cur_request->node = jref(node);
  34866. + }
  34867. + jput(node);
  34868. + return 0;
  34869. +}
  34870. +
  34871. +/**
  34872. + * has_anonymous_pages - check whether inode has pages dirtied via mmap
  34873. + * @inode: inode to check
  34874. + *
  34875. + * Returns true if inode's mapping has dirty pages which do not belong to any
  34876. + * atom. Those are either tagged PAGECACHE_TAG_REISER4_MOVED in mapping's page
  34877. + * tree or were eflushed and can be found via jnodes tagged
  34878. + * EFLUSH_TAG_ANONYMOUS in radix tree of jnodes.
  34879. + */
  34880. +static int has_anonymous_pages(struct inode *inode)
  34881. +{
  34882. + int result;
  34883. +
  34884. + spin_lock_irq(&inode->i_mapping->tree_lock);
  34885. + result = radix_tree_tagged(&inode->i_mapping->page_tree, PAGECACHE_TAG_REISER4_MOVED);
  34886. + spin_unlock_irq(&inode->i_mapping->tree_lock);
  34887. + return result;
  34888. +}
  34889. +
  34890. +/**
  34891. + * capture_page_and_create_extent -
  34892. + * @page: page to be captured
  34893. + *
  34894. + * Grabs space for extent creation and stat data update and calls function to
  34895. + * do actual work.
  34896. + * Exclusive, or non-exclusive lock must be held.
  34897. + */
  34898. +static int capture_page_and_create_extent(struct page *page)
  34899. +{
  34900. + int result;
  34901. + struct inode *inode;
  34902. +
  34903. + assert("vs-1084", page->mapping && page->mapping->host);
  34904. + inode = page->mapping->host;
  34905. + assert("vs-1139",
  34906. + unix_file_inode_data(inode)->container == UF_CONTAINER_EXTENTS);
  34907. + /* page belongs to file */
  34908. + assert("vs-1393",
  34909. + inode->i_size > page_offset(page));
  34910. +
  34911. + /* page capture may require extent creation (if it does not exist yet)
  34912. + and stat data's update (number of blocks changes on extent
  34913. + creation) */
  34914. + grab_space_enable();
  34915. + result = reiser4_grab_space(2 * estimate_one_insert_into_item
  34916. + (reiser4_tree_by_inode(inode)),
  34917. + BA_CAN_COMMIT);
  34918. + if (likely(!result))
  34919. + result = find_or_create_extent(page);
  34920. +
  34921. + if (result != 0)
  34922. + SetPageError(page);
  34923. + return result;
  34924. +}
  34925. +
  34926. +/*
  34927. + * Support for "anonymous" pages and jnodes.
  34928. + *
  34929. + * When file is write-accessed through mmap pages can be dirtied from the user
  34930. + * level. In this case kernel is not notified until one of following happens:
  34931. + *
  34932. + * (1) msync()
  34933. + *
  34934. + * (2) truncate() (either explicit or through unlink)
  34935. + *
  34936. + * (3) VM scanner starts reclaiming mapped pages, dirtying them before
  34937. + * starting write-back.
  34938. + *
  34939. + * As a result of (3) ->writepage may be called on a dirty page without
  34940. + * jnode. Such page is called "anonymous" in reiser4. Certain work-loads
  34941. + * (iozone) generate huge number of anonymous pages.
  34942. + *
  34943. + * reiser4_sync_sb() method tries to insert anonymous pages into
  34944. + * tree. This is done by capture_anonymous_*() functions below.
  34945. + */
  34946. +
  34947. +/**
  34948. + * capture_anonymous_page - involve page into transaction
  34949. + * @pg: page to deal with
  34950. + *
  34951. + * Takes care that @page has corresponding metadata in the tree, creates jnode
  34952. + * for @page and captures it. On success 1 is returned.
  34953. + */
  34954. +static int capture_anonymous_page(struct page *page)
  34955. +{
  34956. + int result;
  34957. +
  34958. + if (PageWriteback(page))
  34959. + /* FIXME: do nothing? */
  34960. + return 0;
  34961. +
  34962. + result = capture_page_and_create_extent(page);
  34963. + if (result == 0) {
  34964. + result = 1;
  34965. + } else
  34966. + warning("nikita-3329",
  34967. + "Cannot capture anon page: %i", result);
  34968. +
  34969. + return result;
  34970. +}
  34971. +
  34972. +/**
  34973. + * capture_anonymous_pages - find and capture pages dirtied via mmap
  34974. + * @mapping: address space where to look for pages
  34975. + * @index: start index
  34976. + * @to_capture: maximum number of pages to capture
  34977. + *
  34978. + * Looks for pages tagged REISER4_MOVED starting from the *@index-th page,
  34979. + * captures (involves into atom) them, returns number of captured pages,
  34980. + * updates @index to next page after the last captured one.
  34981. + */
  34982. +static int
  34983. +capture_anonymous_pages(struct address_space *mapping, pgoff_t *index,
  34984. + unsigned int to_capture)
  34985. +{
  34986. + int result;
  34987. + struct pagevec pvec;
  34988. + unsigned int i, count;
  34989. + int nr;
  34990. +
  34991. + pagevec_init(&pvec, 0);
  34992. + count = min(pagevec_space(&pvec), to_capture);
  34993. + nr = 0;
  34994. +
  34995. + /* find pages tagged MOVED */
  34996. + spin_lock_irq(&mapping->tree_lock);
  34997. + pvec.nr = radix_tree_gang_lookup_tag(&mapping->page_tree,
  34998. + (void **)pvec.pages, *index, count,
  34999. + PAGECACHE_TAG_REISER4_MOVED);
  35000. + if (pagevec_count(&pvec) == 0) {
  35001. + /*
  35002. + * there are no pages tagged MOVED in mapping->page_tree
  35003. + * starting from *index
  35004. + */
  35005. + spin_unlock_irq(&mapping->tree_lock);
  35006. + *index = (pgoff_t)-1;
  35007. + return 0;
  35008. + }
  35009. +
  35010. + /* clear MOVED tag for all found pages */
  35011. + for (i = 0; i < pagevec_count(&pvec); i++) {
  35012. + get_page(pvec.pages[i]);
  35013. + radix_tree_tag_clear(&mapping->page_tree, pvec.pages[i]->index,
  35014. + PAGECACHE_TAG_REISER4_MOVED);
  35015. + }
  35016. + spin_unlock_irq(&mapping->tree_lock);
  35017. +
  35018. +
  35019. + *index = pvec.pages[i - 1]->index + 1;
  35020. +
  35021. + for (i = 0; i < pagevec_count(&pvec); i++) {
  35022. + result = capture_anonymous_page(pvec.pages[i]);
  35023. + if (result == 1)
  35024. + nr++;
  35025. + else {
  35026. + if (result < 0) {
  35027. + warning("vs-1454",
  35028. + "failed to capture page: "
  35029. + "result=%d, captured=%d)\n",
  35030. + result, i);
  35031. +
  35032. + /*
  35033. + * set MOVED tag to all pages which left not
  35034. + * captured
  35035. + */
  35036. + spin_lock_irq(&mapping->tree_lock);
  35037. + for (; i < pagevec_count(&pvec); i ++) {
  35038. + radix_tree_tag_set(&mapping->page_tree,
  35039. + pvec.pages[i]->index,
  35040. + PAGECACHE_TAG_REISER4_MOVED);
  35041. + }
  35042. + spin_unlock_irq(&mapping->tree_lock);
  35043. +
  35044. + pagevec_release(&pvec);
  35045. + return result;
  35046. + } else {
  35047. + /*
  35048. + * result == 0. capture_anonymous_page returns
  35049. + * 0 for Writeback-ed page. Set MOVED tag on
  35050. + * that page
  35051. + */
  35052. + spin_lock_irq(&mapping->tree_lock);
  35053. + radix_tree_tag_set(&mapping->page_tree,
  35054. + pvec.pages[i]->index,
  35055. + PAGECACHE_TAG_REISER4_MOVED);
  35056. + spin_unlock_irq(&mapping->tree_lock);
  35057. + if (i == 0)
  35058. + *index = pvec.pages[0]->index;
  35059. + else
  35060. + *index = pvec.pages[i - 1]->index + 1;
  35061. + }
  35062. + }
  35063. + }
  35064. + pagevec_release(&pvec);
  35065. + return nr;
  35066. +}
  35067. +
  35068. +/**
  35069. + * capture_anonymous_jnodes - find and capture anonymous jnodes
  35070. + * @mapping: address space where to look for jnodes
  35071. + * @from: start index
  35072. + * @to: end index
  35073. + * @to_capture: maximum number of jnodes to capture
  35074. + *
  35075. + * Looks for jnodes tagged EFLUSH_TAG_ANONYMOUS in inode's tree of jnodes in
  35076. + * the range of indexes @from-@to and captures them, returns number of captured
  35077. + * jnodes, updates @from to next jnode after the last captured one.
  35078. + */
  35079. +static int
  35080. +capture_anonymous_jnodes(struct address_space *mapping,
  35081. + pgoff_t *from, pgoff_t to, int to_capture)
  35082. +{
  35083. + *from = to;
  35084. + return 0;
  35085. +}
  35086. +
  35087. +/*
  35088. + * Commit atom of the jnode of a page.
  35089. + */
  35090. +static int sync_page(struct page *page)
  35091. +{
  35092. + int result;
  35093. + do {
  35094. + jnode *node;
  35095. + txn_atom *atom;
  35096. +
  35097. + lock_page(page);
  35098. + node = jprivate(page);
  35099. + if (node != NULL) {
  35100. + spin_lock_jnode(node);
  35101. + atom = jnode_get_atom(node);
  35102. + spin_unlock_jnode(node);
  35103. + } else
  35104. + atom = NULL;
  35105. + unlock_page(page);
  35106. + result = reiser4_sync_atom(atom);
  35107. + } while (result == -E_REPEAT);
  35108. + /*
  35109. + * ZAM-FIXME-HANS: document the logic of this loop, is it just to
  35110. + * handle the case where more pages get added to the atom while we are
  35111. + * syncing it?
  35112. + */
  35113. + assert("nikita-3485", ergo(result == 0,
  35114. + get_current_context()->trans->atom == NULL));
  35115. + return result;
  35116. +}
  35117. +
  35118. +/*
  35119. + * Commit atoms of pages on @pages list.
  35120. + * call sync_page for each page from mapping's page tree
  35121. + */
  35122. +static int sync_page_list(struct inode *inode)
  35123. +{
  35124. + int result;
  35125. + struct address_space *mapping;
  35126. + unsigned long from; /* start index for radix_tree_gang_lookup */
  35127. + unsigned int found; /* return value for radix_tree_gang_lookup */
  35128. +
  35129. + mapping = inode->i_mapping;
  35130. + from = 0;
  35131. + result = 0;
  35132. + spin_lock_irq(&mapping->tree_lock);
  35133. + while (result == 0) {
  35134. + struct page *page;
  35135. +
  35136. + found =
  35137. + radix_tree_gang_lookup(&mapping->page_tree, (void **)&page,
  35138. + from, 1);
  35139. + assert("edward-1550", found < 2);
  35140. + if (found == 0)
  35141. + break;
  35142. + /**
  35143. + * page may not leave radix tree because it is protected from
  35144. + * truncating by inode->i_mutex locked by sys_fsync
  35145. + */
  35146. + get_page(page);
  35147. + spin_unlock_irq(&mapping->tree_lock);
  35148. +
  35149. + from = page->index + 1;
  35150. +
  35151. + result = sync_page(page);
  35152. +
  35153. + put_page(page);
  35154. + spin_lock_irq(&mapping->tree_lock);
  35155. + }
  35156. +
  35157. + spin_unlock_irq(&mapping->tree_lock);
  35158. + return result;
  35159. +}
  35160. +
  35161. +static int commit_file_atoms(struct inode *inode)
  35162. +{
  35163. + int result;
  35164. + struct unix_file_info *uf_info;
  35165. +
  35166. + uf_info = unix_file_inode_data(inode);
  35167. +
  35168. + get_exclusive_access(uf_info);
  35169. + /*
  35170. + * find what items file is made from
  35171. + */
  35172. + result = find_file_state(inode, uf_info);
  35173. + drop_exclusive_access(uf_info);
  35174. + if (result != 0)
  35175. + return result;
  35176. +
  35177. + /*
  35178. + * file state cannot change because we are under ->i_mutex
  35179. + */
  35180. + switch (uf_info->container) {
  35181. + case UF_CONTAINER_EXTENTS:
  35182. + /* find_file_state might open join an atom */
  35183. + reiser4_txn_restart_current();
  35184. + result =
  35185. + /*
  35186. + * when we are called by
  35187. + * filemap_fdatawrite->
  35188. + * do_writepages()->
  35189. + * reiser4_writepages_dispatch()
  35190. + *
  35191. + * inode->i_mapping->dirty_pages are spices into
  35192. + * ->io_pages, leaving ->dirty_pages dirty.
  35193. + *
  35194. + * When we are called from
  35195. + * reiser4_fsync()->sync_unix_file(), we have to
  35196. + * commit atoms of all pages on the ->dirty_list.
  35197. + *
  35198. + * So for simplicity we just commit ->io_pages and
  35199. + * ->dirty_pages.
  35200. + */
  35201. + sync_page_list(inode);
  35202. + break;
  35203. + case UF_CONTAINER_TAILS:
  35204. + /*
  35205. + * NOTE-NIKITA probably we can be smarter for tails. For now
  35206. + * just commit all existing atoms.
  35207. + */
  35208. + result = txnmgr_force_commit_all(inode->i_sb, 0);
  35209. + break;
  35210. + case UF_CONTAINER_EMPTY:
  35211. + result = 0;
  35212. + break;
  35213. + case UF_CONTAINER_UNKNOWN:
  35214. + default:
  35215. + result = -EIO;
  35216. + break;
  35217. + }
  35218. +
  35219. + /*
  35220. + * commit current transaction: there can be captured nodes from
  35221. + * find_file_state() and finish_conversion().
  35222. + */
  35223. + reiser4_txn_restart_current();
  35224. + return result;
  35225. +}
  35226. +
  35227. +/**
  35228. + * writepages_unix_file - writepages of struct address_space_operations
  35229. + * @mapping:
  35230. + * @wbc:
  35231. + *
  35232. + * This captures anonymous pages and anonymous jnodes. Anonymous pages are
  35233. + * pages which are dirtied via mmapping. Anonymous jnodes are ones which were
  35234. + * created by reiser4_writepage.
  35235. + */
  35236. +int writepages_unix_file(struct address_space *mapping,
  35237. + struct writeback_control *wbc)
  35238. +{
  35239. + int result;
  35240. + struct unix_file_info *uf_info;
  35241. + pgoff_t pindex, jindex, nr_pages;
  35242. + long to_capture;
  35243. + struct inode *inode;
  35244. +
  35245. + inode = mapping->host;
  35246. + if (!has_anonymous_pages(inode)) {
  35247. + result = 0;
  35248. + goto end;
  35249. + }
  35250. + jindex = pindex = wbc->range_start >> PAGE_SHIFT;
  35251. + result = 0;
  35252. + nr_pages = size_in_pages(i_size_read(inode));
  35253. +
  35254. + uf_info = unix_file_inode_data(inode);
  35255. +
  35256. + do {
  35257. + reiser4_context *ctx;
  35258. +
  35259. + if (wbc->sync_mode != WB_SYNC_ALL)
  35260. + to_capture = min(wbc->nr_to_write, CAPTURE_APAGE_BURST);
  35261. + else
  35262. + to_capture = CAPTURE_APAGE_BURST;
  35263. +
  35264. + ctx = reiser4_init_context(inode->i_sb);
  35265. + if (IS_ERR(ctx)) {
  35266. + result = PTR_ERR(ctx);
  35267. + break;
  35268. + }
  35269. + /* avoid recursive calls to ->sync_inodes */
  35270. + ctx->nobalance = 1;
  35271. + assert("zam-760", lock_stack_isclean(get_current_lock_stack()));
  35272. + assert("edward-1551", LOCK_CNT_NIL(inode_sem_w));
  35273. + assert("edward-1552", LOCK_CNT_NIL(inode_sem_r));
  35274. +
  35275. + reiser4_txn_restart_current();
  35276. +
  35277. + /* we have to get nonexclusive access to the file */
  35278. + if (get_current_context()->entd) {
  35279. + /*
  35280. + * use nonblocking version of nonexclusive_access to
  35281. + * avoid deadlock which might look like the following:
  35282. + * process P1 holds NEA on file F1 and called entd to
  35283. + * reclaim some memory. Entd works for P1 and is going
  35284. + * to capture pages of file F2. To do that entd has to
  35285. + * get NEA to F2. F2 is held by process P2 which also
  35286. + * called entd. But entd is serving P1 at the moment
  35287. + * and P2 has to wait. Process P3 trying to get EA to
  35288. + * file F2. Existence of pending EA request to file F2
  35289. + * makes impossible for entd to get NEA to file
  35290. + * F2. Neither of these process can continue. Using
  35291. + * nonblocking version of gettign NEA is supposed to
  35292. + * avoid this deadlock.
  35293. + */
  35294. + if (try_to_get_nonexclusive_access(uf_info) == 0) {
  35295. + result = RETERR(-EBUSY);
  35296. + reiser4_exit_context(ctx);
  35297. + break;
  35298. + }
  35299. + } else
  35300. + get_nonexclusive_access(uf_info);
  35301. +
  35302. + while (to_capture > 0) {
  35303. + pgoff_t start;
  35304. +
  35305. + assert("vs-1727", jindex <= pindex);
  35306. + if (pindex == jindex) {
  35307. + start = pindex;
  35308. + result =
  35309. + capture_anonymous_pages(inode->i_mapping,
  35310. + &pindex,
  35311. + to_capture);
  35312. + if (result <= 0)
  35313. + break;
  35314. + to_capture -= result;
  35315. + wbc->nr_to_write -= result;
  35316. + if (start + result == pindex) {
  35317. + jindex = pindex;
  35318. + continue;
  35319. + }
  35320. + if (to_capture <= 0)
  35321. + break;
  35322. + }
  35323. + /* deal with anonymous jnodes between jindex and pindex */
  35324. + result =
  35325. + capture_anonymous_jnodes(inode->i_mapping, &jindex,
  35326. + pindex, to_capture);
  35327. + if (result < 0)
  35328. + break;
  35329. + to_capture -= result;
  35330. + get_current_context()->nr_captured += result;
  35331. +
  35332. + if (jindex == (pgoff_t) - 1) {
  35333. + assert("vs-1728", pindex == (pgoff_t) - 1);
  35334. + break;
  35335. + }
  35336. + }
  35337. + if (to_capture <= 0)
  35338. + /* there may be left more pages */
  35339. + __mark_inode_dirty(inode, I_DIRTY_PAGES);
  35340. +
  35341. + drop_nonexclusive_access(uf_info);
  35342. + if (result < 0) {
  35343. + /* error happened */
  35344. + reiser4_exit_context(ctx);
  35345. + return result;
  35346. + }
  35347. + if (wbc->sync_mode != WB_SYNC_ALL) {
  35348. + reiser4_exit_context(ctx);
  35349. + return 0;
  35350. + }
  35351. + result = commit_file_atoms(inode);
  35352. + reiser4_exit_context(ctx);
  35353. + if (pindex >= nr_pages && jindex == pindex)
  35354. + break;
  35355. + } while (1);
  35356. +
  35357. + end:
  35358. + if (is_in_reiser4_context()) {
  35359. + if (get_current_context()->nr_captured >= CAPTURE_APAGE_BURST) {
  35360. + /*
  35361. + * there are already pages to flush, flush them out, do
  35362. + * not delay until end of reiser4_sync_inodes
  35363. + */
  35364. + reiser4_writeout(inode->i_sb, wbc);
  35365. + get_current_context()->nr_captured = 0;
  35366. + }
  35367. + }
  35368. + return result;
  35369. +}
  35370. +
  35371. +/**
  35372. + * readpage_unix_file_nolock - readpage of struct address_space_operations
  35373. + * @file:
  35374. + * @page:
  35375. + *
  35376. + * Compose a key and search for item containing information about @page
  35377. + * data. If item is found - its readpage method is called.
  35378. + */
  35379. +int readpage_unix_file(struct file *file, struct page *page)
  35380. +{
  35381. + reiser4_context *ctx;
  35382. + int result;
  35383. + struct inode *inode;
  35384. + reiser4_key key;
  35385. + item_plugin *iplug;
  35386. + hint_t *hint;
  35387. + lock_handle *lh;
  35388. + coord_t *coord;
  35389. +
  35390. + assert("vs-1062", PageLocked(page));
  35391. + assert("vs-976", !PageUptodate(page));
  35392. + assert("vs-1061", page->mapping && page->mapping->host);
  35393. +
  35394. + if (page->mapping->host->i_size <= page_offset(page)) {
  35395. + /* page is out of file */
  35396. + zero_user(page, 0, PAGE_SIZE);
  35397. + SetPageUptodate(page);
  35398. + unlock_page(page);
  35399. + return 0;
  35400. + }
  35401. +
  35402. + inode = page->mapping->host;
  35403. + ctx = reiser4_init_context(inode->i_sb);
  35404. + if (IS_ERR(ctx)) {
  35405. + unlock_page(page);
  35406. + return PTR_ERR(ctx);
  35407. + }
  35408. +
  35409. + hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
  35410. + if (hint == NULL) {
  35411. + unlock_page(page);
  35412. + reiser4_exit_context(ctx);
  35413. + return RETERR(-ENOMEM);
  35414. + }
  35415. +
  35416. + result = load_file_hint(file, hint);
  35417. + if (result) {
  35418. + kfree(hint);
  35419. + unlock_page(page);
  35420. + reiser4_exit_context(ctx);
  35421. + return result;
  35422. + }
  35423. + lh = &hint->lh;
  35424. +
  35425. + /* get key of first byte of the page */
  35426. + key_by_inode_and_offset_common(inode, page_offset(page), &key);
  35427. +
  35428. + /* look for file metadata corresponding to first byte of page */
  35429. + get_page(page);
  35430. + unlock_page(page);
  35431. + result = find_file_item(hint, &key, ZNODE_READ_LOCK, inode);
  35432. + lock_page(page);
  35433. + put_page(page);
  35434. +
  35435. + if (page->mapping == NULL) {
  35436. + /*
  35437. + * readpage allows truncate to run concurrently. Page was
  35438. + * truncated while it was not locked
  35439. + */
  35440. + done_lh(lh);
  35441. + kfree(hint);
  35442. + unlock_page(page);
  35443. + reiser4_txn_restart(ctx);
  35444. + reiser4_exit_context(ctx);
  35445. + return -EINVAL;
  35446. + }
  35447. +
  35448. + if (result != CBK_COORD_FOUND || hint->ext_coord.coord.between != AT_UNIT) {
  35449. + if (result == CBK_COORD_FOUND &&
  35450. + hint->ext_coord.coord.between != AT_UNIT)
  35451. + /* file is truncated */
  35452. + result = -EINVAL;
  35453. + done_lh(lh);
  35454. + kfree(hint);
  35455. + unlock_page(page);
  35456. + reiser4_txn_restart(ctx);
  35457. + reiser4_exit_context(ctx);
  35458. + return result;
  35459. + }
  35460. +
  35461. + /*
  35462. + * item corresponding to page is found. It can not be removed because
  35463. + * znode lock is held
  35464. + */
  35465. + if (PageUptodate(page)) {
  35466. + done_lh(lh);
  35467. + kfree(hint);
  35468. + unlock_page(page);
  35469. + reiser4_txn_restart(ctx);
  35470. + reiser4_exit_context(ctx);
  35471. + return 0;
  35472. + }
  35473. +
  35474. + coord = &hint->ext_coord.coord;
  35475. + result = zload(coord->node);
  35476. + if (result) {
  35477. + done_lh(lh);
  35478. + kfree(hint);
  35479. + unlock_page(page);
  35480. + reiser4_txn_restart(ctx);
  35481. + reiser4_exit_context(ctx);
  35482. + return result;
  35483. + }
  35484. +
  35485. + validate_extended_coord(&hint->ext_coord, page_offset(page));
  35486. +
  35487. + if (!coord_is_existing_unit(coord)) {
  35488. + /* this indicates corruption */
  35489. + warning("vs-280",
  35490. + "Looking for page %lu of file %llu (size %lli). "
  35491. + "No file items found (%d). File is corrupted?\n",
  35492. + page->index, (unsigned long long)get_inode_oid(inode),
  35493. + inode->i_size, result);
  35494. + zrelse(coord->node);
  35495. + done_lh(lh);
  35496. + kfree(hint);
  35497. + unlock_page(page);
  35498. + reiser4_txn_restart(ctx);
  35499. + reiser4_exit_context(ctx);
  35500. + return RETERR(-EIO);
  35501. + }
  35502. +
  35503. + /*
  35504. + * get plugin of found item or use plugin if extent if there are no
  35505. + * one
  35506. + */
  35507. + iplug = item_plugin_by_coord(coord);
  35508. + if (iplug->s.file.readpage)
  35509. + result = iplug->s.file.readpage(coord, page);
  35510. + else
  35511. + result = RETERR(-EINVAL);
  35512. +
  35513. + if (!result) {
  35514. + set_key_offset(&key,
  35515. + (loff_t) (page->index + 1) << PAGE_SHIFT);
  35516. + /* FIXME should call reiser4_set_hint() */
  35517. + reiser4_unset_hint(hint);
  35518. + } else {
  35519. + unlock_page(page);
  35520. + reiser4_unset_hint(hint);
  35521. + }
  35522. + assert("vs-979",
  35523. + ergo(result == 0, (PageLocked(page) || PageUptodate(page))));
  35524. + assert("vs-9791", ergo(result != 0, !PageLocked(page)));
  35525. +
  35526. + zrelse(coord->node);
  35527. + done_lh(lh);
  35528. +
  35529. + save_file_hint(file, hint);
  35530. + kfree(hint);
  35531. +
  35532. + /*
  35533. + * FIXME: explain why it is needed. HINT: page allocation in write can
  35534. + * not be done when atom is not NULL because reiser4_writepage can not
  35535. + * kick entd and have to eflush
  35536. + */
  35537. + reiser4_txn_restart(ctx);
  35538. + reiser4_exit_context(ctx);
  35539. + return result;
  35540. +}
  35541. +
  35542. +struct uf_readpages_context {
  35543. + lock_handle lh;
  35544. + coord_t coord;
  35545. +};
  35546. +
  35547. +/*
  35548. + * A callback function for readpages_unix_file/read_cache_pages.
  35549. + * We don't take non-exclusive access. If an item different from
  35550. + * extent pointer is found in some iteration, then return error
  35551. + * (-EINVAL).
  35552. + *
  35553. + * @data -- a pointer to reiser4_readpages_context object,
  35554. + * to save the twig lock and the coord between
  35555. + * read_cache_page iterations.
  35556. + * @page -- page to start read.
  35557. + */
  35558. +static int readpages_filler(void * data, struct page * page)
  35559. +{
  35560. + struct uf_readpages_context *rc = data;
  35561. + jnode * node;
  35562. + int ret = 0;
  35563. + reiser4_extent *ext;
  35564. + __u64 ext_index;
  35565. + int cbk_done = 0;
  35566. + struct address_space *mapping = page->mapping;
  35567. +
  35568. + if (PageUptodate(page)) {
  35569. + unlock_page(page);
  35570. + return 0;
  35571. + }
  35572. + get_page(page);
  35573. +
  35574. + if (rc->lh.node == 0) {
  35575. + /* no twig lock - have to do tree search. */
  35576. + reiser4_key key;
  35577. + repeat:
  35578. + unlock_page(page);
  35579. + key_by_inode_and_offset_common(
  35580. + mapping->host, page_offset(page), &key);
  35581. + ret = coord_by_key(
  35582. + &get_super_private(mapping->host->i_sb)->tree,
  35583. + &key, &rc->coord, &rc->lh,
  35584. + ZNODE_READ_LOCK, FIND_EXACT,
  35585. + TWIG_LEVEL, TWIG_LEVEL, CBK_UNIQUE, NULL);
  35586. + if (unlikely(ret))
  35587. + goto exit;
  35588. + lock_page(page);
  35589. + if (PageUptodate(page))
  35590. + goto unlock;
  35591. + cbk_done = 1;
  35592. + }
  35593. + ret = zload(rc->coord.node);
  35594. + if (unlikely(ret))
  35595. + goto unlock;
  35596. + if (!coord_is_existing_item(&rc->coord)) {
  35597. + zrelse(rc->coord.node);
  35598. + ret = RETERR(-ENOENT);
  35599. + goto unlock;
  35600. + }
  35601. + if (!item_is_extent(&rc->coord)) {
  35602. + /*
  35603. + * ->readpages() is not
  35604. + * defined for tail items
  35605. + */
  35606. + zrelse(rc->coord.node);
  35607. + ret = RETERR(-EINVAL);
  35608. + goto unlock;
  35609. + }
  35610. + ext = extent_by_coord(&rc->coord);
  35611. + ext_index = extent_unit_index(&rc->coord);
  35612. + if (page->index < ext_index ||
  35613. + page->index >= ext_index + extent_get_width(ext)) {
  35614. + /* the page index doesn't belong to the extent unit
  35615. + which the coord points to - release the lock and
  35616. + repeat with tree search. */
  35617. + zrelse(rc->coord.node);
  35618. + done_lh(&rc->lh);
  35619. + /* we can be here after a CBK call only in case of
  35620. + corruption of the tree or the tree lookup algorithm bug. */
  35621. + if (unlikely(cbk_done)) {
  35622. + ret = RETERR(-EIO);
  35623. + goto unlock;
  35624. + }
  35625. + goto repeat;
  35626. + }
  35627. + node = jnode_of_page(page);
  35628. + if (unlikely(IS_ERR(node))) {
  35629. + zrelse(rc->coord.node);
  35630. + ret = PTR_ERR(node);
  35631. + goto unlock;
  35632. + }
  35633. + ret = reiser4_do_readpage_extent(ext, page->index - ext_index, page);
  35634. + jput(node);
  35635. + zrelse(rc->coord.node);
  35636. + if (likely(!ret))
  35637. + goto exit;
  35638. + unlock:
  35639. + unlock_page(page);
  35640. + exit:
  35641. + put_page(page);
  35642. + return ret;
  35643. +}
  35644. +
  35645. +/**
  35646. + * readpages_unix_file - called by the readahead code, starts reading for each
  35647. + * page of given list of pages
  35648. + */
  35649. +int readpages_unix_file(struct file *file, struct address_space *mapping,
  35650. + struct list_head *pages, unsigned nr_pages)
  35651. +{
  35652. + reiser4_context *ctx;
  35653. + struct uf_readpages_context rc;
  35654. + int ret;
  35655. +
  35656. + ctx = reiser4_init_context(mapping->host->i_sb);
  35657. + if (IS_ERR(ctx)) {
  35658. + put_pages_list(pages);
  35659. + return PTR_ERR(ctx);
  35660. + }
  35661. + init_lh(&rc.lh);
  35662. + ret = read_cache_pages(mapping, pages, readpages_filler, &rc);
  35663. + done_lh(&rc.lh);
  35664. +
  35665. + context_set_commit_async(ctx);
  35666. + /* close the transaction to protect further page allocation from deadlocks */
  35667. + reiser4_txn_restart(ctx);
  35668. + reiser4_exit_context(ctx);
  35669. + return ret;
  35670. +}
  35671. +
  35672. +static reiser4_block_nr unix_file_estimate_read(struct inode *inode,
  35673. + loff_t count UNUSED_ARG)
  35674. +{
  35675. + /* We should reserve one block, because of updating of the stat data
  35676. + item */
  35677. + assert("vs-1249",
  35678. + inode_file_plugin(inode)->estimate.update ==
  35679. + estimate_update_common);
  35680. + return estimate_update_common(inode);
  35681. +}
  35682. +
  35683. +/* this is called with nonexclusive access obtained,
  35684. + file's container can not change */
  35685. +static ssize_t do_read_compound_file(hint_t *hint, struct file *file,
  35686. + char __user *buf, size_t count,
  35687. + loff_t *off)
  35688. +{
  35689. + int result;
  35690. + struct inode *inode;
  35691. + flow_t flow;
  35692. + coord_t *coord;
  35693. + znode *loaded;
  35694. +
  35695. + inode = file_inode(file);
  35696. +
  35697. + /* build flow */
  35698. + assert("vs-1250",
  35699. + inode_file_plugin(inode)->flow_by_inode ==
  35700. + flow_by_inode_unix_file);
  35701. + result = flow_by_inode_unix_file(inode, buf, 1 /* user space */,
  35702. + count, *off, READ_OP, &flow);
  35703. + if (unlikely(result))
  35704. + return result;
  35705. +
  35706. + /* get seal and coord sealed with it from reiser4 private data
  35707. + of struct file. The coord will tell us where our last read
  35708. + of this file finished, and the seal will help to determine
  35709. + if that location is still valid.
  35710. + */
  35711. + coord = &hint->ext_coord.coord;
  35712. + while (flow.length && result == 0) {
  35713. + result = find_file_item(hint, &flow.key,
  35714. + ZNODE_READ_LOCK, inode);
  35715. + if (cbk_errored(result))
  35716. + /* error happened */
  35717. + break;
  35718. +
  35719. + if (coord->between != AT_UNIT) {
  35720. + /* there were no items corresponding to given offset */
  35721. + done_lh(hint->ext_coord.lh);
  35722. + break;
  35723. + }
  35724. +
  35725. + loaded = coord->node;
  35726. + result = zload(loaded);
  35727. + if (unlikely(result)) {
  35728. + done_lh(hint->ext_coord.lh);
  35729. + break;
  35730. + }
  35731. +
  35732. + if (hint->ext_coord.valid == 0)
  35733. + validate_extended_coord(&hint->ext_coord,
  35734. + get_key_offset(&flow.key));
  35735. +
  35736. + assert("vs-4", hint->ext_coord.valid == 1);
  35737. + assert("vs-33", hint->ext_coord.lh == &hint->lh);
  35738. + /* call item's read method */
  35739. + result = item_plugin_by_coord(coord)->s.file.read(file,
  35740. + &flow,
  35741. + hint);
  35742. + zrelse(loaded);
  35743. + done_lh(hint->ext_coord.lh);
  35744. + }
  35745. + return (count - flow.length) ? (count - flow.length) : result;
  35746. +}
  35747. +
  35748. +static ssize_t read_compound_file(struct file*, char __user*, size_t, loff_t*);
  35749. +
  35750. +/**
  35751. + * unix-file specific ->read() method
  35752. + * of struct file_operations.
  35753. + */
  35754. +ssize_t read_unix_file(struct file *file, char __user *buf,
  35755. + size_t read_amount, loff_t *off)
  35756. +{
  35757. + reiser4_context *ctx;
  35758. + ssize_t result;
  35759. + struct inode *inode;
  35760. + struct unix_file_info *uf_info;
  35761. +
  35762. + if (unlikely(read_amount == 0))
  35763. + return 0;
  35764. +
  35765. + inode = file_inode(file);
  35766. + assert("vs-972", !reiser4_inode_get_flag(inode, REISER4_NO_SD));
  35767. +
  35768. + ctx = reiser4_init_context(inode->i_sb);
  35769. + if (IS_ERR(ctx))
  35770. + return PTR_ERR(ctx);
  35771. +
  35772. + result = reiser4_grab_space_force(unix_file_estimate_read(inode,
  35773. + read_amount), BA_CAN_COMMIT);
  35774. + if (unlikely(result != 0))
  35775. + goto out2;
  35776. +
  35777. + uf_info = unix_file_inode_data(inode);
  35778. +
  35779. + if (uf_info->container == UF_CONTAINER_UNKNOWN) {
  35780. + get_exclusive_access(uf_info);
  35781. + result = find_file_state(inode, uf_info);
  35782. + if (unlikely(result != 0))
  35783. + goto out;
  35784. + }
  35785. + else
  35786. + get_nonexclusive_access(uf_info);
  35787. +
  35788. + switch (uf_info->container) {
  35789. + case UF_CONTAINER_EXTENTS:
  35790. + if (!reiser4_inode_get_flag(inode, REISER4_PART_MIXED)) {
  35791. + result = new_sync_read(file, buf, read_amount, off);
  35792. + break;
  35793. + }
  35794. + case UF_CONTAINER_TAILS:
  35795. + case UF_CONTAINER_UNKNOWN:
  35796. + result = read_compound_file(file, buf, read_amount, off);
  35797. + break;
  35798. + case UF_CONTAINER_EMPTY:
  35799. + result = 0;
  35800. + }
  35801. + out:
  35802. + drop_access(uf_info);
  35803. + out2:
  35804. + context_set_commit_async(ctx);
  35805. + reiser4_exit_context(ctx);
  35806. + return result;
  35807. +}
  35808. +
  35809. +/*
  35810. + * Read a file, which contains tails and, maybe,
  35811. + * extents.
  35812. + *
  35813. + * Sometimes file can consist of items of both types
  35814. + * (extents and tails). It can happen, e.g. because
  35815. + * of failed tail conversion. Also the conversion code
  35816. + * may release exclusive lock before calling
  35817. + * balance_dirty_pages().
  35818. + *
  35819. + * In this case applying a generic VFS library function
  35820. + * would be suboptimal. We use our own "light-weigth"
  35821. + * version below.
  35822. + */
  35823. +static ssize_t read_compound_file(struct file *file, char __user *buf,
  35824. + size_t count, loff_t *off)
  35825. +{
  35826. + ssize_t result = 0;
  35827. + struct inode *inode;
  35828. + hint_t *hint;
  35829. + struct unix_file_info *uf_info;
  35830. + size_t to_read;
  35831. + size_t was_read = 0;
  35832. + loff_t i_size;
  35833. +
  35834. + inode = file_inode(file);
  35835. + assert("vs-972", !reiser4_inode_get_flag(inode, REISER4_NO_SD));
  35836. +
  35837. + i_size = i_size_read(inode);
  35838. + if (*off >= i_size)
  35839. + /* position to read from is past the end of file */
  35840. + goto exit;
  35841. + if (*off + count > i_size)
  35842. + count = i_size - *off;
  35843. +
  35844. + hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
  35845. + if (hint == NULL)
  35846. + return RETERR(-ENOMEM);
  35847. +
  35848. + result = load_file_hint(file, hint);
  35849. + if (result) {
  35850. + kfree(hint);
  35851. + return result;
  35852. + }
  35853. + uf_info = unix_file_inode_data(inode);
  35854. +
  35855. + /* read by page-aligned chunks */
  35856. + to_read = PAGE_SIZE - (*off & (loff_t)(PAGE_SIZE - 1));
  35857. + if (to_read > count)
  35858. + to_read = count;
  35859. + while (count > 0) {
  35860. + reiser4_txn_restart_current();
  35861. + /*
  35862. + * faultin user page
  35863. + */
  35864. + result = fault_in_pages_writeable(buf, to_read);
  35865. + if (result)
  35866. + return RETERR(-EFAULT);
  35867. +
  35868. + result = do_read_compound_file(hint, file, buf, to_read, off);
  35869. + if (result < 0)
  35870. + break;
  35871. + count -= result;
  35872. + buf += result;
  35873. +
  35874. + /* update position in a file */
  35875. + *off += result;
  35876. + /* total number of read bytes */
  35877. + was_read += result;
  35878. + to_read = count;
  35879. + if (to_read > PAGE_SIZE)
  35880. + to_read = PAGE_SIZE;
  35881. + }
  35882. + done_lh(&hint->lh);
  35883. + save_file_hint(file, hint);
  35884. + kfree(hint);
  35885. + if (was_read)
  35886. + file_accessed(file);
  35887. + exit:
  35888. + return was_read ? was_read : result;
  35889. +}
  35890. +
  35891. +/* This function takes care about @file's pages. First of all it checks if
  35892. + filesystems readonly and if so gets out. Otherwise, it throws out all
  35893. + pages of file if it was mapped for read and going to be mapped for write
  35894. + and consists of tails. This is done in order to not manage few copies
  35895. + of the data (first in page cache and second one in tails them selves)
  35896. + for the case of mapping files consisting tails.
  35897. +
  35898. + Here also tail2extent conversion is performed if it is allowed and file
  35899. + is going to be written or mapped for write. This functions may be called
  35900. + from write_unix_file() or mmap_unix_file(). */
  35901. +static int check_pages_unix_file(struct file *file, struct inode *inode)
  35902. +{
  35903. + reiser4_invalidate_pages(inode->i_mapping, 0,
  35904. + (inode->i_size + PAGE_SIZE -
  35905. + 1) >> PAGE_SHIFT, 0);
  35906. + return unpack(file, inode, 0 /* not forever */ );
  35907. +}
  35908. +
  35909. +/**
  35910. + * mmap_unix_file - mmap of struct file_operations
  35911. + * @file: file to mmap
  35912. + * @vma:
  35913. + *
  35914. + * This is implementation of vfs's mmap method of struct file_operations for
  35915. + * unix file plugin. It converts file to extent if necessary. Sets
  35916. + * reiser4_inode's flag - REISER4_HAS_MMAP.
  35917. + */
  35918. +int mmap_unix_file(struct file *file, struct vm_area_struct *vma)
  35919. +{
  35920. + reiser4_context *ctx;
  35921. + int result;
  35922. + struct inode *inode;
  35923. + struct unix_file_info *uf_info;
  35924. + reiser4_block_nr needed;
  35925. +
  35926. + inode = file_inode(file);
  35927. + ctx = reiser4_init_context(inode->i_sb);
  35928. + if (IS_ERR(ctx))
  35929. + return PTR_ERR(ctx);
  35930. +
  35931. + uf_info = unix_file_inode_data(inode);
  35932. +
  35933. + get_exclusive_access_careful(uf_info, inode);
  35934. +
  35935. + if (!IS_RDONLY(inode) && (vma->vm_flags & (VM_MAYWRITE | VM_SHARED))) {
  35936. + /*
  35937. + * we need file built of extent items. If it is still built of
  35938. + * tail items we have to convert it. Find what items the file
  35939. + * is built of
  35940. + */
  35941. + result = find_file_state(inode, uf_info);
  35942. + if (result != 0) {
  35943. + drop_exclusive_access(uf_info);
  35944. + reiser4_exit_context(ctx);
  35945. + return result;
  35946. + }
  35947. +
  35948. + assert("vs-1648", (uf_info->container == UF_CONTAINER_TAILS ||
  35949. + uf_info->container == UF_CONTAINER_EXTENTS ||
  35950. + uf_info->container == UF_CONTAINER_EMPTY));
  35951. + if (uf_info->container == UF_CONTAINER_TAILS) {
  35952. + /*
  35953. + * invalidate all pages and convert file from tails to
  35954. + * extents
  35955. + */
  35956. + result = check_pages_unix_file(file, inode);
  35957. + if (result) {
  35958. + drop_exclusive_access(uf_info);
  35959. + reiser4_exit_context(ctx);
  35960. + return result;
  35961. + }
  35962. + }
  35963. + }
  35964. +
  35965. + /*
  35966. + * generic_file_mmap will do update_atime. Grab space for stat data
  35967. + * update.
  35968. + */
  35969. + needed = inode_file_plugin(inode)->estimate.update(inode);
  35970. + result = reiser4_grab_space_force(needed, BA_CAN_COMMIT);
  35971. + if (result) {
  35972. + drop_exclusive_access(uf_info);
  35973. + reiser4_exit_context(ctx);
  35974. + return result;
  35975. + }
  35976. +
  35977. + result = generic_file_mmap(file, vma);
  35978. + if (result == 0) {
  35979. + /* mark file as having mapping. */
  35980. + reiser4_inode_set_flag(inode, REISER4_HAS_MMAP);
  35981. + }
  35982. +
  35983. + drop_exclusive_access(uf_info);
  35984. + reiser4_exit_context(ctx);
  35985. + return result;
  35986. +}
  35987. +
  35988. +/**
  35989. + * find_first_item
  35990. + * @inode:
  35991. + *
  35992. + * Finds file item which is responsible for first byte in the file.
  35993. + */
  35994. +static int find_first_item(struct inode *inode)
  35995. +{
  35996. + coord_t coord;
  35997. + lock_handle lh;
  35998. + reiser4_key key;
  35999. + int result;
  36000. +
  36001. + coord_init_zero(&coord);
  36002. + init_lh(&lh);
  36003. + inode_file_plugin(inode)->key_by_inode(inode, 0, &key);
  36004. + result = find_file_item_nohint(&coord, &lh, &key, ZNODE_READ_LOCK,
  36005. + inode);
  36006. + if (result == CBK_COORD_FOUND) {
  36007. + if (coord.between == AT_UNIT) {
  36008. + result = zload(coord.node);
  36009. + if (result == 0) {
  36010. + result = item_id_by_coord(&coord);
  36011. + zrelse(coord.node);
  36012. + if (result != EXTENT_POINTER_ID &&
  36013. + result != FORMATTING_ID)
  36014. + result = RETERR(-EIO);
  36015. + }
  36016. + } else
  36017. + result = RETERR(-EIO);
  36018. + }
  36019. + done_lh(&lh);
  36020. + return result;
  36021. +}
  36022. +
  36023. +/**
  36024. + * open_unix_file
  36025. + * @inode:
  36026. + * @file:
  36027. + *
  36028. + * If filesystem is not readonly - complete uncompleted tail conversion if
  36029. + * there was one
  36030. + */
  36031. +int open_unix_file(struct inode *inode, struct file *file)
  36032. +{
  36033. + int result;
  36034. + reiser4_context *ctx;
  36035. + struct unix_file_info *uf_info;
  36036. +
  36037. + if (IS_RDONLY(inode))
  36038. + return 0;
  36039. +
  36040. + if (!reiser4_inode_get_flag(inode, REISER4_PART_MIXED))
  36041. + return 0;
  36042. +
  36043. + ctx = reiser4_init_context(inode->i_sb);
  36044. + if (IS_ERR(ctx))
  36045. + return PTR_ERR(ctx);
  36046. +
  36047. + uf_info = unix_file_inode_data(inode);
  36048. +
  36049. + get_exclusive_access_careful(uf_info, inode);
  36050. +
  36051. + if (!reiser4_inode_get_flag(inode, REISER4_PART_MIXED)) {
  36052. + /*
  36053. + * other process completed the conversion
  36054. + */
  36055. + drop_exclusive_access(uf_info);
  36056. + reiser4_exit_context(ctx);
  36057. + return 0;
  36058. + }
  36059. +
  36060. + /*
  36061. + * file left in semi converted state after unclean shutdown or another
  36062. + * thread is doing conversion and dropped exclusive access which doing
  36063. + * balance dirty pages. Complete the conversion
  36064. + */
  36065. + result = find_first_item(inode);
  36066. + if (result == EXTENT_POINTER_ID)
  36067. + /*
  36068. + * first item is extent, therefore there was incomplete
  36069. + * tail2extent conversion. Complete it
  36070. + */
  36071. + result = tail2extent(unix_file_inode_data(inode));
  36072. + else if (result == FORMATTING_ID)
  36073. + /*
  36074. + * first item is formatting item, therefore there was
  36075. + * incomplete extent2tail conversion. Complete it
  36076. + */
  36077. + result = extent2tail(file, unix_file_inode_data(inode));
  36078. + else
  36079. + result = -EIO;
  36080. +
  36081. + assert("vs-1712",
  36082. + ergo(result == 0,
  36083. + (!reiser4_inode_get_flag(inode, REISER4_PART_MIXED) &&
  36084. + !reiser4_inode_get_flag(inode, REISER4_PART_IN_CONV))));
  36085. + drop_exclusive_access(uf_info);
  36086. + reiser4_exit_context(ctx);
  36087. + return result;
  36088. +}
  36089. +
  36090. +#define NEITHER_OBTAINED 0
  36091. +#define EA_OBTAINED 1
  36092. +#define NEA_OBTAINED 2
  36093. +
  36094. +static void drop_access(struct unix_file_info *uf_info)
  36095. +{
  36096. + if (uf_info->exclusive_use)
  36097. + drop_exclusive_access(uf_info);
  36098. + else
  36099. + drop_nonexclusive_access(uf_info);
  36100. +}
  36101. +
  36102. +#define debug_wuf(format, ...) printk("%s: %d: %s: " format "\n", \
  36103. + __FILE__, __LINE__, __FUNCTION__, ## __VA_ARGS__)
  36104. +
  36105. +/**
  36106. + * write_unix_file - private ->write() method of unix_file plugin.
  36107. + *
  36108. + * @file: file to write to
  36109. + * @buf: address of user-space buffer
  36110. + * @count: number of bytes to write
  36111. + * @pos: position in file to write to
  36112. + * @cont: unused argument, as we don't perform plugin conversion when being
  36113. + * managed by unix_file plugin.
  36114. + */
  36115. +ssize_t write_unix_file(struct file *file,
  36116. + const char __user *buf,
  36117. + size_t count, loff_t *pos,
  36118. + struct dispatch_context *cont)
  36119. +{
  36120. + int result;
  36121. + reiser4_context *ctx;
  36122. + struct inode *inode;
  36123. + struct unix_file_info *uf_info;
  36124. + ssize_t written;
  36125. + int to_write = PAGE_SIZE * WRITE_GRANULARITY;
  36126. + size_t left;
  36127. + ssize_t (*write_op)(struct file *, struct inode *,
  36128. + const char __user *, size_t,
  36129. + loff_t *pos);
  36130. + int ea;
  36131. + int enospc = 0; /* item plugin ->write() returned ENOSPC */
  36132. + loff_t new_size;
  36133. +
  36134. + ctx = get_current_context();
  36135. + inode = file_inode(file);
  36136. +
  36137. + assert("vs-947", !reiser4_inode_get_flag(inode, REISER4_NO_SD));
  36138. + assert("vs-9471", (!reiser4_inode_get_flag(inode, REISER4_PART_MIXED)));
  36139. +
  36140. + result = file_remove_privs(file);
  36141. + if (result) {
  36142. + context_set_commit_async(ctx);
  36143. + return result;
  36144. + }
  36145. + /* remove_suid might create a transaction */
  36146. + reiser4_txn_restart(ctx);
  36147. +
  36148. + uf_info = unix_file_inode_data(inode);
  36149. +
  36150. + written = 0;
  36151. + left = count;
  36152. + ea = NEITHER_OBTAINED;
  36153. + enospc = 0;
  36154. +
  36155. + new_size = i_size_read(inode);
  36156. + if (*pos + count > new_size)
  36157. + new_size = *pos + count;
  36158. +
  36159. + while (left) {
  36160. + int update_sd = 0;
  36161. + if (left < to_write)
  36162. + to_write = left;
  36163. +
  36164. + if (uf_info->container == UF_CONTAINER_EMPTY) {
  36165. + get_exclusive_access(uf_info);
  36166. + ea = EA_OBTAINED;
  36167. + if (uf_info->container != UF_CONTAINER_EMPTY) {
  36168. + /* file is made not empty by another process */
  36169. + drop_exclusive_access(uf_info);
  36170. + ea = NEITHER_OBTAINED;
  36171. + continue;
  36172. + }
  36173. + } else if (uf_info->container == UF_CONTAINER_UNKNOWN) {
  36174. + /*
  36175. + * get exclusive access directly just to not have to
  36176. + * re-obtain it if file will appear empty
  36177. + */
  36178. + get_exclusive_access(uf_info);
  36179. + ea = EA_OBTAINED;
  36180. + result = find_file_state(inode, uf_info);
  36181. + if (result) {
  36182. + drop_exclusive_access(uf_info);
  36183. + ea = NEITHER_OBTAINED;
  36184. + break;
  36185. + }
  36186. + } else {
  36187. + get_nonexclusive_access(uf_info);
  36188. + ea = NEA_OBTAINED;
  36189. + }
  36190. +
  36191. + /* either EA or NEA is obtained. Choose item write method */
  36192. + if (uf_info->container == UF_CONTAINER_EXTENTS) {
  36193. + /* file is built of extent items */
  36194. + write_op = reiser4_write_extent;
  36195. + } else if (uf_info->container == UF_CONTAINER_EMPTY) {
  36196. + /* file is empty */
  36197. + if (should_have_notail(uf_info, new_size))
  36198. + write_op = reiser4_write_extent;
  36199. + else
  36200. + write_op = reiser4_write_tail;
  36201. + } else {
  36202. + /* file is built of tail items */
  36203. + if (should_have_notail(uf_info, new_size)) {
  36204. + if (ea == NEA_OBTAINED) {
  36205. + drop_nonexclusive_access(uf_info);
  36206. + get_exclusive_access(uf_info);
  36207. + ea = EA_OBTAINED;
  36208. + }
  36209. + if (uf_info->container == UF_CONTAINER_TAILS) {
  36210. + /*
  36211. + * if file is being convered by another
  36212. + * process - wait until it completes
  36213. + */
  36214. + while (1) {
  36215. + if (reiser4_inode_get_flag(inode,
  36216. + REISER4_PART_IN_CONV)) {
  36217. + drop_exclusive_access(uf_info);
  36218. + schedule();
  36219. + get_exclusive_access(uf_info);
  36220. + continue;
  36221. + }
  36222. + break;
  36223. + }
  36224. + if (uf_info->container == UF_CONTAINER_TAILS) {
  36225. + result = tail2extent(uf_info);
  36226. + if (result) {
  36227. + drop_exclusive_access(uf_info);
  36228. + context_set_commit_async(ctx);
  36229. + break;
  36230. + }
  36231. + }
  36232. + }
  36233. + drop_exclusive_access(uf_info);
  36234. + ea = NEITHER_OBTAINED;
  36235. + continue;
  36236. + }
  36237. + write_op = reiser4_write_tail;
  36238. + }
  36239. +
  36240. + written = write_op(file, inode, buf, to_write, pos);
  36241. + if (written == -ENOSPC && !enospc) {
  36242. + drop_access(uf_info);
  36243. + txnmgr_force_commit_all(inode->i_sb, 0);
  36244. + enospc = 1;
  36245. + continue;
  36246. + }
  36247. + if (written < 0) {
  36248. + /*
  36249. + * If this is -ENOSPC, then it happened
  36250. + * second time, so don't try to free space
  36251. + * once again.
  36252. + */
  36253. + drop_access(uf_info);
  36254. + result = written;
  36255. + break;
  36256. + }
  36257. + /* something is written. */
  36258. + if (enospc)
  36259. + enospc = 0;
  36260. + if (uf_info->container == UF_CONTAINER_EMPTY) {
  36261. + assert("edward-1553", ea == EA_OBTAINED);
  36262. + uf_info->container =
  36263. + (write_op == reiser4_write_extent) ?
  36264. + UF_CONTAINER_EXTENTS : UF_CONTAINER_TAILS;
  36265. + }
  36266. + assert("edward-1554",
  36267. + ergo(uf_info->container == UF_CONTAINER_EXTENTS,
  36268. + write_op == reiser4_write_extent));
  36269. + assert("edward-1555",
  36270. + ergo(uf_info->container == UF_CONTAINER_TAILS,
  36271. + write_op == reiser4_write_tail));
  36272. + if (*pos + written > inode->i_size) {
  36273. + INODE_SET_FIELD(inode, i_size, *pos + written);
  36274. + update_sd = 1;
  36275. + }
  36276. + if (!IS_NOCMTIME(inode)) {
  36277. + inode->i_ctime = inode->i_mtime = current_time(inode);
  36278. + update_sd = 1;
  36279. + }
  36280. + if (update_sd) {
  36281. + /*
  36282. + * space for update_sd was reserved in write_op
  36283. + */
  36284. + result = reiser4_update_sd(inode);
  36285. + if (result) {
  36286. + warning("edward-1574",
  36287. + "Can not update stat-data: %i. FSCK?",
  36288. + result);
  36289. + drop_access(uf_info);
  36290. + context_set_commit_async(ctx);
  36291. + break;
  36292. + }
  36293. + }
  36294. + drop_access(uf_info);
  36295. + ea = NEITHER_OBTAINED;
  36296. +
  36297. + /*
  36298. + * tell VM how many pages were dirtied. Maybe number of pages
  36299. + * which were dirty already should not be counted
  36300. + */
  36301. + reiser4_throttle_write(inode);
  36302. + left -= written;
  36303. + buf += written;
  36304. + *pos += written;
  36305. + }
  36306. + if (result == 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
  36307. + reiser4_txn_restart_current();
  36308. + grab_space_enable();
  36309. + result = reiser4_sync_file_common(file, 0, LONG_MAX,
  36310. + 0 /* data and stat data */);
  36311. + if (result)
  36312. + warning("reiser4-7", "failed to sync file %llu",
  36313. + (unsigned long long)get_inode_oid(inode));
  36314. + }
  36315. + /*
  36316. + * return number of written bytes or error code if nothing is
  36317. + * written. Note, that it does not work correctly in case when
  36318. + * sync_unix_file returns error
  36319. + */
  36320. + return (count - left) ? (count - left) : result;
  36321. +}
  36322. +
  36323. +/**
  36324. + * release_unix_file - release of struct file_operations
  36325. + * @inode: inode of released file
  36326. + * @file: file to release
  36327. + *
  36328. + * Implementation of release method of struct file_operations for unix file
  36329. + * plugin. If last reference to indode is released - convert all extent items
  36330. + * into tail items if necessary. Frees reiser4 specific file data.
  36331. + */
  36332. +int release_unix_file(struct inode *inode, struct file *file)
  36333. +{
  36334. + reiser4_context *ctx;
  36335. + struct unix_file_info *uf_info;
  36336. + int result;
  36337. + int in_reiser4;
  36338. +
  36339. + in_reiser4 = is_in_reiser4_context();
  36340. +
  36341. + ctx = reiser4_init_context(inode->i_sb);
  36342. + if (IS_ERR(ctx))
  36343. + return PTR_ERR(ctx);
  36344. +
  36345. + result = 0;
  36346. + if (in_reiser4 == 0) {
  36347. + uf_info = unix_file_inode_data(inode);
  36348. +
  36349. + get_exclusive_access_careful(uf_info, inode);
  36350. + if (file->f_path.dentry->d_lockref.count == 1 &&
  36351. + uf_info->container == UF_CONTAINER_EXTENTS &&
  36352. + !should_have_notail(uf_info, inode->i_size) &&
  36353. + !rofs_inode(inode)) {
  36354. + result = extent2tail(file, uf_info);
  36355. + if (result != 0) {
  36356. + context_set_commit_async(ctx);
  36357. + warning("nikita-3233",
  36358. + "Failed (%d) to convert in %s (%llu)",
  36359. + result, __FUNCTION__,
  36360. + (unsigned long long)
  36361. + get_inode_oid(inode));
  36362. + }
  36363. + }
  36364. + drop_exclusive_access(uf_info);
  36365. + } else {
  36366. + /*
  36367. + we are within reiser4 context already. How latter is
  36368. + possible? Simple:
  36369. +
  36370. + (gdb) bt
  36371. + #0 get_exclusive_access ()
  36372. + #2 0xc01e56d3 in release_unix_file ()
  36373. + #3 0xc01c3643 in reiser4_release ()
  36374. + #4 0xc014cae0 in __fput ()
  36375. + #5 0xc013ffc3 in remove_vm_struct ()
  36376. + #6 0xc0141786 in exit_mmap ()
  36377. + #7 0xc0118480 in mmput ()
  36378. + #8 0xc0133205 in oom_kill ()
  36379. + #9 0xc01332d1 in out_of_memory ()
  36380. + #10 0xc013bc1d in try_to_free_pages ()
  36381. + #11 0xc013427b in __alloc_pages ()
  36382. + #12 0xc013f058 in do_anonymous_page ()
  36383. + #13 0xc013f19d in do_no_page ()
  36384. + #14 0xc013f60e in handle_mm_fault ()
  36385. + #15 0xc01131e5 in do_page_fault ()
  36386. + #16 0xc0104935 in error_code ()
  36387. + #17 0xc025c0c6 in __copy_to_user_ll ()
  36388. + #18 0xc01d496f in reiser4_read_tail ()
  36389. + #19 0xc01e4def in read_unix_file ()
  36390. + #20 0xc01c3504 in reiser4_read ()
  36391. + #21 0xc014bd4f in vfs_read ()
  36392. + #22 0xc014bf66 in sys_read ()
  36393. + */
  36394. + warning("vs-44", "out of memory?");
  36395. + }
  36396. +
  36397. + reiser4_free_file_fsdata(file);
  36398. +
  36399. + reiser4_exit_context(ctx);
  36400. + return result;
  36401. +}
  36402. +
  36403. +static void set_file_notail(struct inode *inode)
  36404. +{
  36405. + reiser4_inode *state;
  36406. + formatting_plugin *tplug;
  36407. +
  36408. + state = reiser4_inode_data(inode);
  36409. + tplug = formatting_plugin_by_id(NEVER_TAILS_FORMATTING_ID);
  36410. + force_plugin_pset(inode, PSET_FORMATTING, (reiser4_plugin *)tplug);
  36411. +}
  36412. +
  36413. +/* if file is built of tails - convert it to extents */
  36414. +static int unpack(struct file *filp, struct inode *inode, int forever)
  36415. +{
  36416. + int result = 0;
  36417. + struct unix_file_info *uf_info;
  36418. +
  36419. + uf_info = unix_file_inode_data(inode);
  36420. + assert("vs-1628", ea_obtained(uf_info));
  36421. +
  36422. + result = find_file_state(inode, uf_info);
  36423. + if (result)
  36424. + return result;
  36425. + assert("vs-1074", uf_info->container != UF_CONTAINER_UNKNOWN);
  36426. +
  36427. + if (uf_info->container == UF_CONTAINER_TAILS) {
  36428. + /*
  36429. + * if file is being convered by another process - wait until it
  36430. + * completes
  36431. + */
  36432. + while (1) {
  36433. + if (reiser4_inode_get_flag(inode,
  36434. + REISER4_PART_IN_CONV)) {
  36435. + drop_exclusive_access(uf_info);
  36436. + schedule();
  36437. + get_exclusive_access(uf_info);
  36438. + continue;
  36439. + }
  36440. + break;
  36441. + }
  36442. + if (uf_info->container == UF_CONTAINER_TAILS) {
  36443. + result = tail2extent(uf_info);
  36444. + if (result)
  36445. + return result;
  36446. + }
  36447. + }
  36448. + if (forever) {
  36449. + /* safe new formatting plugin in stat data */
  36450. + __u64 tograb;
  36451. +
  36452. + set_file_notail(inode);
  36453. +
  36454. + grab_space_enable();
  36455. + tograb = inode_file_plugin(inode)->estimate.update(inode);
  36456. + result = reiser4_grab_space(tograb, BA_CAN_COMMIT);
  36457. + result = reiser4_update_sd(inode);
  36458. + }
  36459. +
  36460. + return result;
  36461. +}
  36462. +
  36463. +/* implentation of vfs' ioctl method of struct file_operations for unix file
  36464. + plugin
  36465. +*/
  36466. +int ioctl_unix_file(struct file *filp, unsigned int cmd,
  36467. + unsigned long arg UNUSED_ARG)
  36468. +{
  36469. + reiser4_context *ctx;
  36470. + int result;
  36471. + struct inode *inode = filp->f_path.dentry->d_inode;
  36472. +
  36473. + ctx = reiser4_init_context(inode->i_sb);
  36474. + if (IS_ERR(ctx))
  36475. + return PTR_ERR(ctx);
  36476. +
  36477. + switch (cmd) {
  36478. + case REISER4_IOC_UNPACK:
  36479. + get_exclusive_access(unix_file_inode_data(inode));
  36480. + result = unpack(filp, inode, 1 /* forever */ );
  36481. + drop_exclusive_access(unix_file_inode_data(inode));
  36482. + break;
  36483. +
  36484. + default:
  36485. + result = RETERR(-ENOTTY);
  36486. + break;
  36487. + }
  36488. + reiser4_exit_context(ctx);
  36489. + return result;
  36490. +}
  36491. +
  36492. +/* implentation of vfs' bmap method of struct address_space_operations for unix
  36493. + file plugin
  36494. +*/
  36495. +sector_t bmap_unix_file(struct address_space * mapping, sector_t lblock)
  36496. +{
  36497. + reiser4_context *ctx;
  36498. + sector_t result;
  36499. + reiser4_key key;
  36500. + coord_t coord;
  36501. + lock_handle lh;
  36502. + struct inode *inode;
  36503. + item_plugin *iplug;
  36504. + sector_t block;
  36505. +
  36506. + inode = mapping->host;
  36507. +
  36508. + ctx = reiser4_init_context(inode->i_sb);
  36509. + if (IS_ERR(ctx))
  36510. + return PTR_ERR(ctx);
  36511. + key_by_inode_and_offset_common(inode,
  36512. + (loff_t) lblock * current_blocksize,
  36513. + &key);
  36514. +
  36515. + init_lh(&lh);
  36516. + result =
  36517. + find_file_item_nohint(&coord, &lh, &key, ZNODE_READ_LOCK, inode);
  36518. + if (cbk_errored(result)) {
  36519. + done_lh(&lh);
  36520. + reiser4_exit_context(ctx);
  36521. + return result;
  36522. + }
  36523. +
  36524. + result = zload(coord.node);
  36525. + if (result) {
  36526. + done_lh(&lh);
  36527. + reiser4_exit_context(ctx);
  36528. + return result;
  36529. + }
  36530. +
  36531. + iplug = item_plugin_by_coord(&coord);
  36532. + if (iplug->s.file.get_block) {
  36533. + result = iplug->s.file.get_block(&coord, lblock, &block);
  36534. + if (result == 0)
  36535. + result = block;
  36536. + } else
  36537. + result = RETERR(-EINVAL);
  36538. +
  36539. + zrelse(coord.node);
  36540. + done_lh(&lh);
  36541. + reiser4_exit_context(ctx);
  36542. + return result;
  36543. +}
  36544. +
  36545. +/**
  36546. + * flow_by_inode_unix_file - initizlize structure flow
  36547. + * @inode: inode of file for which read or write is abou
  36548. + * @buf: buffer to perform read to or write from
  36549. + * @user: flag showing whether @buf is user space or kernel space
  36550. + * @size: size of buffer @buf
  36551. + * @off: start offset fro read or write
  36552. + * @op: READ or WRITE
  36553. + * @flow:
  36554. + *
  36555. + * Initializes fields of @flow: key, size of data, i/o mode (read or write).
  36556. + */
  36557. +int flow_by_inode_unix_file(struct inode *inode,
  36558. + const char __user *buf, int user,
  36559. + loff_t size, loff_t off,
  36560. + rw_op op, flow_t *flow)
  36561. +{
  36562. + assert("nikita-1100", inode != NULL);
  36563. +
  36564. + flow->length = size;
  36565. + memcpy(&flow->data, &buf, sizeof(buf));
  36566. + flow->user = user;
  36567. + flow->op = op;
  36568. + assert("nikita-1931", inode_file_plugin(inode) != NULL);
  36569. + assert("nikita-1932",
  36570. + inode_file_plugin(inode)->key_by_inode ==
  36571. + key_by_inode_and_offset_common);
  36572. + /* calculate key of write position and insert it into flow->key */
  36573. + return key_by_inode_and_offset_common(inode, off, &flow->key);
  36574. +}
  36575. +
  36576. +/* plugin->u.file.set_plug_in_sd = NULL
  36577. + plugin->u.file.set_plug_in_inode = NULL
  36578. + plugin->u.file.create_blank_sd = NULL */
  36579. +/* plugin->u.file.delete */
  36580. +/*
  36581. + plugin->u.file.add_link = reiser4_add_link_common
  36582. + plugin->u.file.rem_link = NULL */
  36583. +
  36584. +/* plugin->u.file.owns_item
  36585. + this is common_file_owns_item with assertion */
  36586. +/* Audited by: green(2002.06.15) */
  36587. +int
  36588. +owns_item_unix_file(const struct inode *inode /* object to check against */ ,
  36589. + const coord_t * coord /* coord to check */ )
  36590. +{
  36591. + int result;
  36592. +
  36593. + result = owns_item_common(inode, coord);
  36594. + if (!result)
  36595. + return 0;
  36596. + if (!plugin_of_group(item_plugin_by_coord(coord),
  36597. + UNIX_FILE_METADATA_ITEM_TYPE))
  36598. + return 0;
  36599. + assert("vs-547",
  36600. + item_id_by_coord(coord) == EXTENT_POINTER_ID ||
  36601. + item_id_by_coord(coord) == FORMATTING_ID);
  36602. + return 1;
  36603. +}
  36604. +
  36605. +static int setattr_truncate(struct inode *inode, struct iattr *attr)
  36606. +{
  36607. + int result;
  36608. + int s_result;
  36609. + loff_t old_size;
  36610. + reiser4_tree *tree;
  36611. +
  36612. + inode_check_scale(inode, inode->i_size, attr->ia_size);
  36613. +
  36614. + old_size = inode->i_size;
  36615. + tree = reiser4_tree_by_inode(inode);
  36616. +
  36617. + result = safe_link_grab(tree, BA_CAN_COMMIT);
  36618. + if (result == 0)
  36619. + result = safe_link_add(inode, SAFE_TRUNCATE);
  36620. + if (result == 0)
  36621. + result = truncate_file_body(inode, attr);
  36622. + if (result)
  36623. + warning("vs-1588", "truncate_file failed: oid %lli, "
  36624. + "old size %lld, new size %lld, retval %d",
  36625. + (unsigned long long)get_inode_oid(inode),
  36626. + old_size, attr->ia_size, result);
  36627. +
  36628. + s_result = safe_link_grab(tree, BA_CAN_COMMIT);
  36629. + if (s_result == 0)
  36630. + s_result =
  36631. + safe_link_del(tree, get_inode_oid(inode), SAFE_TRUNCATE);
  36632. + if (s_result != 0) {
  36633. + warning("nikita-3417", "Cannot kill safelink %lli: %i",
  36634. + (unsigned long long)get_inode_oid(inode), s_result);
  36635. + }
  36636. + safe_link_release(tree);
  36637. + return result;
  36638. +}
  36639. +
  36640. +/* plugin->u.file.setattr method */
  36641. +/* This calls inode_setattr and if truncate is in effect it also takes
  36642. + exclusive inode access to avoid races */
  36643. +int setattr_unix_file(struct dentry *dentry, /* Object to change attributes */
  36644. + struct iattr *attr /* change description */ )
  36645. +{
  36646. + int result;
  36647. +
  36648. + if (attr->ia_valid & ATTR_SIZE) {
  36649. + reiser4_context *ctx;
  36650. + struct unix_file_info *uf_info;
  36651. +
  36652. + /* truncate does reservation itself and requires exclusive
  36653. + access obtained */
  36654. + ctx = reiser4_init_context(dentry->d_inode->i_sb);
  36655. + if (IS_ERR(ctx))
  36656. + return PTR_ERR(ctx);
  36657. +
  36658. + uf_info = unix_file_inode_data(dentry->d_inode);
  36659. + get_exclusive_access_careful(uf_info, dentry->d_inode);
  36660. + result = setattr_truncate(dentry->d_inode, attr);
  36661. + drop_exclusive_access(uf_info);
  36662. + context_set_commit_async(ctx);
  36663. + reiser4_exit_context(ctx);
  36664. + } else
  36665. + result = reiser4_setattr_common(dentry, attr);
  36666. +
  36667. + return result;
  36668. +}
  36669. +
  36670. +/* plugin->u.file.init_inode_data */
  36671. +void
  36672. +init_inode_data_unix_file(struct inode *inode,
  36673. + reiser4_object_create_data * crd, int create)
  36674. +{
  36675. + struct unix_file_info *data;
  36676. +
  36677. + data = unix_file_inode_data(inode);
  36678. + data->container = create ? UF_CONTAINER_EMPTY : UF_CONTAINER_UNKNOWN;
  36679. + init_rwsem(&data->latch);
  36680. + data->tplug = inode_formatting_plugin(inode);
  36681. + data->exclusive_use = 0;
  36682. +
  36683. +#if REISER4_DEBUG
  36684. + data->ea_owner = NULL;
  36685. + atomic_set(&data->nr_neas, 0);
  36686. +#endif
  36687. + init_inode_ordering(inode, crd, create);
  36688. +}
  36689. +
  36690. +/**
  36691. + * delete_unix_file - delete_object of file_plugin
  36692. + * @inode: inode to be deleted
  36693. + *
  36694. + * Truncates file to length 0, removes stat data and safe link.
  36695. + */
  36696. +int delete_object_unix_file(struct inode *inode)
  36697. +{
  36698. + struct unix_file_info *uf_info;
  36699. + int result;
  36700. +
  36701. + if (reiser4_inode_get_flag(inode, REISER4_NO_SD))
  36702. + return 0;
  36703. +
  36704. + /* truncate file bogy first */
  36705. + uf_info = unix_file_inode_data(inode);
  36706. + get_exclusive_access(uf_info);
  36707. + result = shorten_file(inode, 0 /* size */ );
  36708. + drop_exclusive_access(uf_info);
  36709. +
  36710. + if (result)
  36711. + warning("edward-1556",
  36712. + "failed to truncate file (%llu) on removal: %d",
  36713. + get_inode_oid(inode), result);
  36714. +
  36715. + /* remove stat data and safe link */
  36716. + return reiser4_delete_object_common(inode);
  36717. +}
  36718. +
  36719. +static int do_write_begin(struct file *file, struct page *page,
  36720. + loff_t pos, unsigned len)
  36721. +{
  36722. + int ret;
  36723. + if (len == PAGE_SIZE || PageUptodate(page))
  36724. + return 0;
  36725. +
  36726. + ret = readpage_unix_file(file, page);
  36727. + if (ret) {
  36728. + SetPageError(page);
  36729. + ClearPageUptodate(page);
  36730. + /* All reiser4 readpage() implementations should return the
  36731. + * page locked in case of error. */
  36732. + assert("nikita-3472", PageLocked(page));
  36733. + return ret;
  36734. + }
  36735. + /*
  36736. + * ->readpage() either:
  36737. + *
  36738. + * 1. starts IO against @page. @page is locked for IO in
  36739. + * this case.
  36740. + *
  36741. + * 2. doesn't start IO. @page is unlocked.
  36742. + *
  36743. + * In either case, page should be locked.
  36744. + */
  36745. + lock_page(page);
  36746. + /*
  36747. + * IO (if any) is completed at this point. Check for IO
  36748. + * errors.
  36749. + */
  36750. + if (!PageUptodate(page))
  36751. + return RETERR(-EIO);
  36752. + return ret;
  36753. +}
  36754. +
  36755. +/* plugin->write_begin() */
  36756. +int write_begin_unix_file(struct file *file, struct page *page,
  36757. + loff_t pos, unsigned len, void **fsdata)
  36758. +{
  36759. + int ret;
  36760. + struct inode * inode;
  36761. + struct unix_file_info *info;
  36762. +
  36763. + inode = file_inode(file);
  36764. + info = unix_file_inode_data(inode);
  36765. +
  36766. + ret = reiser4_grab_space_force(estimate_one_insert_into_item
  36767. + (reiser4_tree_by_inode(inode)),
  36768. + BA_CAN_COMMIT);
  36769. + if (ret)
  36770. + return ret;
  36771. + get_exclusive_access(info);
  36772. + ret = find_file_state(file_inode(file), info);
  36773. + if (unlikely(ret != 0)) {
  36774. + drop_exclusive_access(info);
  36775. + return ret;
  36776. + }
  36777. + if (info->container == UF_CONTAINER_TAILS) {
  36778. + ret = tail2extent(info);
  36779. + if (ret) {
  36780. + warning("edward-1575",
  36781. + "tail conversion failed: %d", ret);
  36782. + drop_exclusive_access(info);
  36783. + return ret;
  36784. + }
  36785. + }
  36786. + ret = do_write_begin(file, page, pos, len);
  36787. + if (unlikely(ret != 0))
  36788. + drop_exclusive_access(info);
  36789. + /* else exclusive access will be dropped in ->write_end() */
  36790. + return ret;
  36791. +}
  36792. +
  36793. +/* plugin->write_end() */
  36794. +int write_end_unix_file(struct file *file, struct page *page,
  36795. + loff_t pos, unsigned copied, void *fsdata)
  36796. +{
  36797. + int ret;
  36798. + struct inode *inode;
  36799. + struct unix_file_info *info;
  36800. +
  36801. + inode = file_inode(file);
  36802. + info = unix_file_inode_data(inode);
  36803. +
  36804. + unlock_page(page);
  36805. + ret = find_or_create_extent(page);
  36806. + if (ret) {
  36807. + SetPageError(page);
  36808. + goto exit;
  36809. + }
  36810. + if (pos + copied > inode->i_size) {
  36811. + INODE_SET_FIELD(inode, i_size, pos + copied);
  36812. + ret = reiser4_update_sd(inode);
  36813. + if (unlikely(ret != 0))
  36814. + warning("edward-1604",
  36815. + "Can not update stat-data: %i. FSCK?",
  36816. + ret);
  36817. + }
  36818. + exit:
  36819. + drop_exclusive_access(info);
  36820. + return ret;
  36821. +}
  36822. +
  36823. +/*
  36824. + * Local variables:
  36825. + * c-indentation-style: "K&R"
  36826. + * mode-name: "LC"
  36827. + * c-basic-offset: 8
  36828. + * tab-width: 8
  36829. + * fill-column: 79
  36830. + * scroll-step: 1
  36831. + * End:
  36832. + */
  36833. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/file/file_conversion.c linux-4.14.2/fs/reiser4/plugin/file/file_conversion.c
  36834. --- linux-4.14.2.orig/fs/reiser4/plugin/file/file_conversion.c 1970-01-01 01:00:00.000000000 +0100
  36835. +++ linux-4.14.2/fs/reiser4/plugin/file/file_conversion.c 2017-11-26 22:13:09.000000000 +0100
  36836. @@ -0,0 +1,755 @@
  36837. +/* Copyright 2001, 2002, 2003 by Hans Reiser,
  36838. + licensing governed by reiser4/README */
  36839. +
  36840. +/**
  36841. + * This file contains dispatching hooks, and conversion methods, which
  36842. + * implement transitions in the FILE interface.
  36843. + *
  36844. + * Dispatching hook makes a decision (at dispatching point) about the
  36845. + * most reasonable plugin. Such decision is made in accordance with some
  36846. + * O(1)-heuristic.
  36847. + *
  36848. + * We implement a transition CRYPTCOMPRESS -> UNIX_FILE for files with
  36849. + * incompressible data. Current heuristic to estimate compressibility is
  36850. + * very simple: if first complete logical cluster (64K by default) of a
  36851. + * file is incompressible, then we make a decision, that the whole file
  36852. + * is incompressible.
  36853. + *
  36854. + * To enable dispatching we install a special "magic" compression mode
  36855. + * plugin CONVX_COMPRESSION_MODE_ID at file creation time.
  36856. + *
  36857. + * Note, that we don't perform back conversion (UNIX_FILE->CRYPTCOMPRESS)
  36858. + * because of compatibility reasons).
  36859. + *
  36860. + * In conversion time we protect CS, the conversion set (file's (meta)data
  36861. + * and plugin table (pset)) via special per-inode rw-semaphore (conv_sem).
  36862. + * The methods which implement conversion are CS writers. The methods of FS
  36863. + * interface (file_operations, inode_operations, address_space_operations)
  36864. + * are CS readers.
  36865. + */
  36866. +
  36867. +#include <linux/uio.h>
  36868. +#include "../../inode.h"
  36869. +#include "../cluster.h"
  36870. +#include "file.h"
  36871. +
  36872. +#define conversion_enabled(inode) \
  36873. + (inode_compression_mode_plugin(inode) == \
  36874. + compression_mode_plugin_by_id(CONVX_COMPRESSION_MODE_ID))
  36875. +
  36876. +/**
  36877. + * Located sections (readers and writers of @pset) are not permanently
  36878. + * critical: cryptcompress file can be converted only if the conversion
  36879. + * is enabled (see the macrio above). Also we don't perform back
  36880. + * conversion. The following helper macro is a sanity check to decide
  36881. + * if we need the protection (locks are always additional overheads).
  36882. + */
  36883. +#define should_protect(inode) \
  36884. + (inode_file_plugin(inode) == \
  36885. + file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID) && \
  36886. + conversion_enabled(inode))
  36887. +/**
  36888. + * To avoid confusion with read/write file operations, we'll speak about
  36889. + * "passive" protection for FCS readers and "active" protection for FCS
  36890. + * writers. All methods with active or passive protection have suffix
  36891. + * "careful".
  36892. + */
  36893. +/**
  36894. + * Macros for passive protection.
  36895. + *
  36896. + * Construct invariant operation to be supplied to VFS.
  36897. + * The macro accepts the following lexemes:
  36898. + * @type - type of the value represented by the compound statement;
  36899. + * @method - name of an operation to be supplied to VFS (reiser4 file
  36900. + * plugin also should contain a method with such name).
  36901. + */
  36902. +#define PROT_PASSIVE(type, method, args) \
  36903. +({ \
  36904. + type _result; \
  36905. + struct rw_semaphore * guard = \
  36906. + &reiser4_inode_data(inode)->conv_sem; \
  36907. + \
  36908. + if (should_protect(inode)) { \
  36909. + down_read(guard); \
  36910. + if (!should_protect(inode)) \
  36911. + up_read(guard); \
  36912. + } \
  36913. + _result = inode_file_plugin(inode)->method args; \
  36914. + if (should_protect(inode)) \
  36915. + up_read(guard); \
  36916. + _result; \
  36917. +})
  36918. +
  36919. +#define PROT_PASSIVE_VOID(method, args) \
  36920. +({ \
  36921. + struct rw_semaphore * guard = \
  36922. + &reiser4_inode_data(inode)->conv_sem; \
  36923. + \
  36924. + if (should_protect(inode)) { \
  36925. + down_read(guard); \
  36926. + if (!should_protect(inode)) \
  36927. + up_read(guard); \
  36928. + } \
  36929. + inode_file_plugin(inode)->method args; \
  36930. + \
  36931. + if (should_protect(inode)) \
  36932. + up_read(guard); \
  36933. +})
  36934. +
  36935. +/* Pass management to the unix-file plugin with "notail" policy */
  36936. +static int __cryptcompress2unixfile(struct file *file, struct inode * inode)
  36937. +{
  36938. + int result;
  36939. + reiser4_inode *info;
  36940. + struct unix_file_info * uf;
  36941. + info = reiser4_inode_data(inode);
  36942. +
  36943. + result = aset_set_unsafe(&info->pset,
  36944. + PSET_FILE,
  36945. + (reiser4_plugin *)
  36946. + file_plugin_by_id(UNIX_FILE_PLUGIN_ID));
  36947. + if (result)
  36948. + return result;
  36949. + result = aset_set_unsafe(&info->pset,
  36950. + PSET_FORMATTING,
  36951. + (reiser4_plugin *)
  36952. + formatting_plugin_by_id(NEVER_TAILS_FORMATTING_ID));
  36953. + if (result)
  36954. + return result;
  36955. + /* get rid of non-standard plugins */
  36956. + info->plugin_mask &= ~cryptcompress_mask;
  36957. + /* get rid of plugin stat-data extension */
  36958. + info->extmask &= ~(1 << PLUGIN_STAT);
  36959. +
  36960. + reiser4_inode_clr_flag(inode, REISER4_SDLEN_KNOWN);
  36961. +
  36962. + /* FIXME use init_inode_data_unix_file() instead,
  36963. + but aviod init_inode_ordering() */
  36964. + /* Init unix-file specific part of inode */
  36965. + uf = unix_file_inode_data(inode);
  36966. + uf->container = UF_CONTAINER_UNKNOWN;
  36967. + init_rwsem(&uf->latch);
  36968. + uf->tplug = inode_formatting_plugin(inode);
  36969. + uf->exclusive_use = 0;
  36970. +#if REISER4_DEBUG
  36971. + uf->ea_owner = NULL;
  36972. + atomic_set(&uf->nr_neas, 0);
  36973. +#endif
  36974. + /**
  36975. + * we was carefull for file_ops, inode_ops and as_ops
  36976. + * to be invariant for plugin conversion, so there is
  36977. + * no need to update ones already installed in the
  36978. + * vfs's residence.
  36979. + */
  36980. + return 0;
  36981. +}
  36982. +
  36983. +#if REISER4_DEBUG
  36984. +static int disabled_conversion_inode_ok(struct inode * inode)
  36985. +{
  36986. + __u64 extmask = reiser4_inode_data(inode)->extmask;
  36987. + __u16 plugin_mask = reiser4_inode_data(inode)->plugin_mask;
  36988. +
  36989. + return ((extmask & (1 << LIGHT_WEIGHT_STAT)) &&
  36990. + (extmask & (1 << UNIX_STAT)) &&
  36991. + (extmask & (1 << LARGE_TIMES_STAT)) &&
  36992. + (extmask & (1 << PLUGIN_STAT)) &&
  36993. + (plugin_mask & (1 << PSET_COMPRESSION_MODE)));
  36994. +}
  36995. +#endif
  36996. +
  36997. +/**
  36998. + * Disable future attempts to schedule/convert file plugin.
  36999. + * This function is called by plugin schedule hooks.
  37000. + *
  37001. + * To disable conversion we assign any compression mode plugin id
  37002. + * different from CONVX_COMPRESSION_MODE_ID.
  37003. + */
  37004. +static int disable_conversion(struct inode * inode)
  37005. +{
  37006. + int result;
  37007. + result =
  37008. + force_plugin_pset(inode,
  37009. + PSET_COMPRESSION_MODE,
  37010. + (reiser4_plugin *)compression_mode_plugin_by_id
  37011. + (LATTD_COMPRESSION_MODE_ID));
  37012. + assert("edward-1500",
  37013. + ergo(!result, disabled_conversion_inode_ok(inode)));
  37014. + return result;
  37015. +}
  37016. +
  37017. +/**
  37018. + * Check if we really have achieved plugin scheduling point
  37019. + */
  37020. +static int check_dispatch_point(struct inode * inode,
  37021. + loff_t pos /* position in the
  37022. + file to write from */,
  37023. + struct cluster_handle * clust,
  37024. + struct dispatch_context * cont)
  37025. +{
  37026. + assert("edward-1505", conversion_enabled(inode));
  37027. + /*
  37028. + * if file size is more then cluster size, then compressible
  37029. + * status must be figured out (i.e. compression was disabled,
  37030. + * or file plugin was converted to unix_file)
  37031. + */
  37032. + assert("edward-1506", inode->i_size <= inode_cluster_size(inode));
  37033. +
  37034. + if (pos > inode->i_size)
  37035. + /* first logical cluster will contain a (partial) hole */
  37036. + return disable_conversion(inode);
  37037. + if (pos < inode_cluster_size(inode))
  37038. + /* writing to the first logical cluster */
  37039. + return 0;
  37040. + /*
  37041. + * here we have:
  37042. + * cluster_size <= pos <= i_size <= cluster_size,
  37043. + * and, hence, pos == i_size == cluster_size
  37044. + */
  37045. + assert("edward-1498",
  37046. + pos == inode->i_size &&
  37047. + pos == inode_cluster_size(inode));
  37048. + assert("edward-1539", cont != NULL);
  37049. + assert("edward-1540", cont->state == DISPATCH_INVAL_STATE);
  37050. +
  37051. + cont->state = DISPATCH_POINT;
  37052. + return 0;
  37053. +}
  37054. +
  37055. +static void start_check_compressibility(struct inode * inode,
  37056. + struct cluster_handle * clust,
  37057. + hint_t * hint)
  37058. +{
  37059. + assert("edward-1507", clust->index == 1);
  37060. + assert("edward-1508", !tfm_cluster_is_uptodate(&clust->tc));
  37061. + assert("edward-1509", cluster_get_tfm_act(&clust->tc) == TFMA_READ);
  37062. +
  37063. + hint_init_zero(hint);
  37064. + clust->hint = hint;
  37065. + clust->index --;
  37066. + clust->nr_pages = size_in_pages(lbytes(clust->index, inode));
  37067. +
  37068. + /* first logical cluster (of index #0) must be complete */
  37069. + assert("edward-1510", lbytes(clust->index, inode) ==
  37070. + inode_cluster_size(inode));
  37071. +}
  37072. +
  37073. +static void finish_check_compressibility(struct inode * inode,
  37074. + struct cluster_handle * clust,
  37075. + hint_t * hint)
  37076. +{
  37077. + reiser4_unset_hint(clust->hint);
  37078. + clust->hint = hint;
  37079. + clust->index ++;
  37080. +}
  37081. +
  37082. +#if REISER4_DEBUG
  37083. +static int prepped_dclust_ok(hint_t * hint)
  37084. +{
  37085. + reiser4_key key;
  37086. + coord_t * coord = &hint->ext_coord.coord;
  37087. +
  37088. + item_key_by_coord(coord, &key);
  37089. + return (item_id_by_coord(coord) == CTAIL_ID &&
  37090. + !coord_is_unprepped_ctail(coord) &&
  37091. + (get_key_offset(&key) + nr_units_ctail(coord) ==
  37092. + dclust_get_extension_dsize(hint)));
  37093. +}
  37094. +#endif
  37095. +
  37096. +#define fifty_persent(size) (size >> 1)
  37097. +/* evaluation of data compressibility */
  37098. +#define data_is_compressible(osize, isize) \
  37099. + (osize < fifty_persent(isize))
  37100. +
  37101. +/**
  37102. + * A simple O(1)-heuristic for compressibility.
  37103. + * This is called not more then one time per file's life.
  37104. + * Read first logical cluster (of index #0) and estimate its compressibility.
  37105. + * Save estimation result in @cont.
  37106. + */
  37107. +static int read_check_compressibility(struct inode * inode,
  37108. + struct cluster_handle * clust,
  37109. + struct dispatch_context * cont)
  37110. +{
  37111. + int i;
  37112. + int result;
  37113. + size_t dst_len;
  37114. + hint_t tmp_hint;
  37115. + hint_t * cur_hint = clust->hint;
  37116. + assert("edward-1541", cont->state == DISPATCH_POINT);
  37117. +
  37118. + start_check_compressibility(inode, clust, &tmp_hint);
  37119. +
  37120. + reset_cluster_pgset(clust, cluster_nrpages(inode));
  37121. + result = grab_page_cluster(inode, clust, READ_OP);
  37122. + if (result)
  37123. + return result;
  37124. + /* Read page cluster here */
  37125. + for (i = 0; i < clust->nr_pages; i++) {
  37126. + struct page *page = clust->pages[i];
  37127. + lock_page(page);
  37128. + result = do_readpage_ctail(inode, clust, page,
  37129. + ZNODE_READ_LOCK);
  37130. + unlock_page(page);
  37131. + if (result)
  37132. + goto error;
  37133. + }
  37134. + tfm_cluster_clr_uptodate(&clust->tc);
  37135. +
  37136. + cluster_set_tfm_act(&clust->tc, TFMA_WRITE);
  37137. +
  37138. + if (hint_is_valid(&tmp_hint) && !hint_is_unprepped_dclust(&tmp_hint)) {
  37139. + /* lenght of compressed data is known, no need to compress */
  37140. + assert("edward-1511",
  37141. + znode_is_any_locked(tmp_hint.lh.node));
  37142. + assert("edward-1512",
  37143. + WITH_DATA(tmp_hint.ext_coord.coord.node,
  37144. + prepped_dclust_ok(&tmp_hint)));
  37145. + dst_len = dclust_get_extension_dsize(&tmp_hint);
  37146. + }
  37147. + else {
  37148. + struct tfm_cluster * tc = &clust->tc;
  37149. + compression_plugin * cplug = inode_compression_plugin(inode);
  37150. + result = grab_tfm_stream(inode, tc, INPUT_STREAM);
  37151. + if (result)
  37152. + goto error;
  37153. + for (i = 0; i < clust->nr_pages; i++) {
  37154. + char *data;
  37155. + lock_page(clust->pages[i]);
  37156. + BUG_ON(!PageUptodate(clust->pages[i]));
  37157. + data = kmap(clust->pages[i]);
  37158. + memcpy(tfm_stream_data(tc, INPUT_STREAM) + pg_to_off(i),
  37159. + data, PAGE_SIZE);
  37160. + kunmap(clust->pages[i]);
  37161. + unlock_page(clust->pages[i]);
  37162. + }
  37163. + result = grab_tfm_stream(inode, tc, OUTPUT_STREAM);
  37164. + if (result)
  37165. + goto error;
  37166. + result = grab_coa(tc, cplug);
  37167. + if (result)
  37168. + goto error;
  37169. + tc->len = tc->lsize = lbytes(clust->index, inode);
  37170. + assert("edward-1513", tc->len == inode_cluster_size(inode));
  37171. + dst_len = tfm_stream_size(tc, OUTPUT_STREAM);
  37172. + cplug->compress(get_coa(tc, cplug->h.id, tc->act),
  37173. + tfm_input_data(clust), tc->len,
  37174. + tfm_output_data(clust), &dst_len);
  37175. + assert("edward-1514",
  37176. + dst_len <= tfm_stream_size(tc, OUTPUT_STREAM));
  37177. + }
  37178. + finish_check_compressibility(inode, clust, cur_hint);
  37179. + cont->state =
  37180. + (data_is_compressible(dst_len, inode_cluster_size(inode)) ?
  37181. + DISPATCH_REMAINS_OLD :
  37182. + DISPATCH_ASSIGNED_NEW);
  37183. + return 0;
  37184. + error:
  37185. + put_page_cluster(clust, inode, READ_OP);
  37186. + return result;
  37187. +}
  37188. +
  37189. +/* Cut disk cluster of index @idx */
  37190. +static int cut_disk_cluster(struct inode * inode, cloff_t idx)
  37191. +{
  37192. + reiser4_key from, to;
  37193. + assert("edward-1515", inode_file_plugin(inode) ==
  37194. + file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID));
  37195. + key_by_inode_cryptcompress(inode, clust_to_off(idx, inode), &from);
  37196. + to = from;
  37197. + set_key_offset(&to,
  37198. + get_key_offset(&from) + inode_cluster_size(inode) - 1);
  37199. + return reiser4_cut_tree(reiser4_tree_by_inode(inode),
  37200. + &from, &to, inode, 0);
  37201. +}
  37202. +
  37203. +static int reserve_cryptcompress2unixfile(struct inode *inode)
  37204. +{
  37205. + reiser4_block_nr unformatted_nodes;
  37206. + reiser4_tree *tree;
  37207. +
  37208. + tree = reiser4_tree_by_inode(inode);
  37209. +
  37210. + /* number of unformatted nodes which will be created */
  37211. + unformatted_nodes = cluster_nrpages(inode); /* N */
  37212. +
  37213. + /*
  37214. + * space required for one iteration of extent->tail conversion:
  37215. + *
  37216. + * 1. kill ctail items
  37217. + *
  37218. + * 2. insert N unformatted nodes
  37219. + *
  37220. + * 3. insert N (worst-case single-block
  37221. + * extents) extent units.
  37222. + *
  37223. + * 4. drilling to the leaf level by coord_by_key()
  37224. + *
  37225. + * 5. possible update of stat-data
  37226. + *
  37227. + */
  37228. + grab_space_enable();
  37229. + return reiser4_grab_space
  37230. + (2 * tree->height +
  37231. + unformatted_nodes +
  37232. + unformatted_nodes * estimate_one_insert_into_item(tree) +
  37233. + 1 + estimate_one_insert_item(tree) +
  37234. + inode_file_plugin(inode)->estimate.update(inode),
  37235. + BA_CAN_COMMIT);
  37236. +}
  37237. +
  37238. +/**
  37239. + * Convert cryptcompress file plugin to unix_file plugin.
  37240. + */
  37241. +static int cryptcompress2unixfile(struct file *file, struct inode *inode,
  37242. + struct dispatch_context *cont)
  37243. +{
  37244. + int i;
  37245. + int result = 0;
  37246. + struct cryptcompress_info *cr_info;
  37247. + struct unix_file_info *uf_info;
  37248. + assert("edward-1516", cont->pages[0]->index == 0);
  37249. +
  37250. + /* release all cryptcompress-specific resources */
  37251. + cr_info = cryptcompress_inode_data(inode);
  37252. + result = reserve_cryptcompress2unixfile(inode);
  37253. + if (result)
  37254. + goto out;
  37255. + /* tell kill_hook to not truncate pages */
  37256. + reiser4_inode_set_flag(inode, REISER4_FILE_CONV_IN_PROGRESS);
  37257. + result = cut_disk_cluster(inode, 0);
  37258. + if (result)
  37259. + goto out;
  37260. + /* captured jnode of cluster and assotiated resources (pages,
  37261. + reserved disk space) were released by ->kill_hook() method
  37262. + of the item plugin */
  37263. +
  37264. + result = __cryptcompress2unixfile(file, inode);
  37265. + if (result)
  37266. + goto out;
  37267. + /* At this point file is managed by unix file plugin */
  37268. +
  37269. + uf_info = unix_file_inode_data(inode);
  37270. +
  37271. + assert("edward-1518",
  37272. + ergo(jprivate(cont->pages[0]),
  37273. + !jnode_is_cluster_page(jprivate(cont->pages[0]))));
  37274. + for(i = 0; i < cont->nr_pages; i++) {
  37275. + assert("edward-1519", cont->pages[i]);
  37276. + assert("edward-1520", PageUptodate(cont->pages[i]));
  37277. +
  37278. + result = find_or_create_extent(cont->pages[i]);
  37279. + if (result)
  37280. + break;
  37281. + }
  37282. + if (unlikely(result))
  37283. + goto out;
  37284. + uf_info->container = UF_CONTAINER_EXTENTS;
  37285. + result = reiser4_update_sd(inode);
  37286. + out:
  37287. + all_grabbed2free();
  37288. + return result;
  37289. +}
  37290. +
  37291. +#define convert_file_plugin cryptcompress2unixfile
  37292. +
  37293. +/**
  37294. + * This is called by ->write() method of a cryptcompress file plugin.
  37295. + * Make a decision about the most reasonable file plugin id to manage
  37296. + * the file.
  37297. + */
  37298. +int write_dispatch_hook(struct file *file, struct inode *inode,
  37299. + loff_t pos, struct cluster_handle *clust,
  37300. + struct dispatch_context *cont)
  37301. +{
  37302. + int result;
  37303. + if (!conversion_enabled(inode))
  37304. + return 0;
  37305. + result = check_dispatch_point(inode, pos, clust, cont);
  37306. + if (result || cont->state != DISPATCH_POINT)
  37307. + return result;
  37308. + result = read_check_compressibility(inode, clust, cont);
  37309. + if (result)
  37310. + return result;
  37311. + if (cont->state == DISPATCH_REMAINS_OLD) {
  37312. + put_page_cluster(clust, inode, READ_OP);
  37313. + return disable_conversion(inode);
  37314. + }
  37315. + assert("edward-1543", cont->state == DISPATCH_ASSIGNED_NEW);
  37316. + /*
  37317. + * page cluster is grabbed and uptodate. It will be
  37318. + * released with a pgset after plugin conversion is
  37319. + * finished, see put_dispatch_context().
  37320. + */
  37321. + reiser4_unset_hint(clust->hint);
  37322. + move_cluster_pgset(clust, &cont->pages, &cont->nr_pages);
  37323. + return 0;
  37324. +}
  37325. +
  37326. +/**
  37327. + * This is called by ->setattr() method of cryptcompress file plugin.
  37328. + */
  37329. +int setattr_dispatch_hook(struct inode * inode)
  37330. +{
  37331. + if (conversion_enabled(inode))
  37332. + return disable_conversion(inode);
  37333. + return 0;
  37334. +}
  37335. +
  37336. +static inline void init_dispatch_context(struct dispatch_context * cont)
  37337. +{
  37338. + memset(cont, 0, sizeof(*cont));
  37339. +}
  37340. +
  37341. +static inline void done_dispatch_context(struct dispatch_context * cont,
  37342. + struct inode * inode)
  37343. +{
  37344. + if (cont->pages) {
  37345. + __put_page_cluster(0, cont->nr_pages, cont->pages, inode);
  37346. + kfree(cont->pages);
  37347. + }
  37348. +}
  37349. +
  37350. +static inline ssize_t reiser4_write_checks(struct file *file,
  37351. + const char __user *buf,
  37352. + size_t count, loff_t *off)
  37353. +{
  37354. + ssize_t result;
  37355. + struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
  37356. + struct kiocb iocb;
  37357. + struct iov_iter iter;
  37358. +
  37359. + init_sync_kiocb(&iocb, file);
  37360. + iocb.ki_pos = *off;
  37361. + iov_iter_init(&iter, WRITE, &iov, 1, count);
  37362. +
  37363. + result = generic_write_checks(&iocb, &iter);
  37364. + *off = iocb.ki_pos;
  37365. + return result;
  37366. +}
  37367. +
  37368. +/*
  37369. + * ->write() VFS file operation
  37370. + *
  37371. + * performs "intelligent" conversion in the FILE interface.
  37372. + * Write a file in 3 steps (2d and 3d steps are optional).
  37373. + */
  37374. +ssize_t reiser4_write_dispatch(struct file *file, const char __user *buf,
  37375. + size_t count, loff_t *off)
  37376. +{
  37377. + ssize_t result;
  37378. + reiser4_context *ctx;
  37379. + ssize_t written_old = 0; /* bytes written with initial plugin */
  37380. + ssize_t written_new = 0; /* bytes written with new plugin */
  37381. + struct dispatch_context cont;
  37382. + struct inode * inode = file_inode(file);
  37383. +
  37384. + ctx = reiser4_init_context(inode->i_sb);
  37385. + if (IS_ERR(ctx))
  37386. + return PTR_ERR(ctx);
  37387. + current->backing_dev_info = inode_to_bdi(inode);
  37388. + init_dispatch_context(&cont);
  37389. + inode_lock(inode);
  37390. +
  37391. + result = reiser4_write_checks(file, buf, count, off);
  37392. + if (unlikely(result <= 0))
  37393. + goto exit;
  37394. + /**
  37395. + * First step.
  37396. + * Start write with initial file plugin.
  37397. + * Keep a plugin schedule status at @cont (if any).
  37398. + */
  37399. + written_old = inode_file_plugin(inode)->write(file,
  37400. + buf,
  37401. + count,
  37402. + off,
  37403. + &cont);
  37404. + if (cont.state != DISPATCH_ASSIGNED_NEW || written_old < 0)
  37405. + goto exit;
  37406. + /**
  37407. + * Second step.
  37408. + * New file plugin has been scheduled.
  37409. + * Perform conversion to the new plugin.
  37410. + */
  37411. + down_read(&reiser4_inode_data(inode)->conv_sem);
  37412. + result = convert_file_plugin(file, inode, &cont);
  37413. + up_read(&reiser4_inode_data(inode)->conv_sem);
  37414. + if (result) {
  37415. + warning("edward-1544",
  37416. + "Inode %llu: file plugin conversion failed (%d)",
  37417. + (unsigned long long)get_inode_oid(inode),
  37418. + (int)result);
  37419. + goto exit;
  37420. + }
  37421. + reiser4_txn_restart(ctx);
  37422. + /**
  37423. + * Third step:
  37424. + * Finish write with the new file plugin.
  37425. + */
  37426. + assert("edward-1536",
  37427. + inode_file_plugin(inode) ==
  37428. + file_plugin_by_id(UNIX_FILE_PLUGIN_ID));
  37429. +
  37430. + written_new = inode_file_plugin(inode)->write(file,
  37431. + buf + written_old,
  37432. + count - written_old,
  37433. + off,
  37434. + NULL);
  37435. + exit:
  37436. + inode_unlock(inode);
  37437. + done_dispatch_context(&cont, inode);
  37438. + current->backing_dev_info = NULL;
  37439. + context_set_commit_async(ctx);
  37440. + reiser4_exit_context(ctx);
  37441. +
  37442. + return written_old + (written_new < 0 ? 0 : written_new);
  37443. +}
  37444. +
  37445. +/*
  37446. + * Dispatchers with "passive" protection for:
  37447. + *
  37448. + * ->open();
  37449. + * ->read();
  37450. + * ->ioctl();
  37451. + * ->mmap();
  37452. + * ->release();
  37453. + * ->bmap().
  37454. + */
  37455. +
  37456. +int reiser4_open_dispatch(struct inode *inode, struct file *file)
  37457. +{
  37458. + return PROT_PASSIVE(int, open, (inode, file));
  37459. +}
  37460. +
  37461. +ssize_t reiser4_read_dispatch(struct file * file, char __user * buf,
  37462. + size_t size, loff_t * off)
  37463. +{
  37464. + struct inode * inode = file_inode(file);
  37465. + return PROT_PASSIVE(ssize_t, read, (file, buf, size, off));
  37466. +}
  37467. +
  37468. +long reiser4_ioctl_dispatch(struct file *filp, unsigned int cmd,
  37469. + unsigned long arg)
  37470. +{
  37471. + struct inode * inode = file_inode(filp);
  37472. + return PROT_PASSIVE(int, ioctl, (filp, cmd, arg));
  37473. +}
  37474. +
  37475. +int reiser4_mmap_dispatch(struct file *file, struct vm_area_struct *vma)
  37476. +{
  37477. + struct inode *inode = file_inode(file);
  37478. + return PROT_PASSIVE(int, mmap, (file, vma));
  37479. +}
  37480. +
  37481. +int reiser4_release_dispatch(struct inode *inode, struct file *file)
  37482. +{
  37483. + return PROT_PASSIVE(int, release, (inode, file));
  37484. +}
  37485. +
  37486. +sector_t reiser4_bmap_dispatch(struct address_space * mapping, sector_t lblock)
  37487. +{
  37488. + struct inode *inode = mapping->host;
  37489. + return PROT_PASSIVE(sector_t, bmap, (mapping, lblock));
  37490. +}
  37491. +
  37492. +/**
  37493. + * NOTE: The following two methods are
  37494. + * used only for loopback functionality.
  37495. + * reiser4_write_end() can not cope with
  37496. + * short writes for now.
  37497. + */
  37498. +int reiser4_write_begin_dispatch(struct file *file,
  37499. + struct address_space *mapping,
  37500. + loff_t pos,
  37501. + unsigned len,
  37502. + unsigned flags,
  37503. + struct page **pagep,
  37504. + void **fsdata)
  37505. +{
  37506. + int ret = 0;
  37507. + struct page *page;
  37508. + pgoff_t index;
  37509. + reiser4_context *ctx;
  37510. + struct inode * inode = file_inode(file);
  37511. +
  37512. + index = pos >> PAGE_SHIFT;
  37513. + page = grab_cache_page_write_begin(mapping, index,
  37514. + flags & AOP_FLAG_NOFS);
  37515. + *pagep = page;
  37516. + if (!page)
  37517. + return -ENOMEM;
  37518. +
  37519. + ctx = reiser4_init_context(file_inode(file)->i_sb);
  37520. + if (IS_ERR(ctx)) {
  37521. + ret = PTR_ERR(ctx);
  37522. + goto err2;
  37523. + }
  37524. + ret = reiser4_grab_space_force(/* for update_sd:
  37525. + * one when updating file size and
  37526. + * one when updating mtime/ctime */
  37527. + 2 * estimate_update_common(inode),
  37528. + BA_CAN_COMMIT);
  37529. + if (ret)
  37530. + goto err1;
  37531. + ret = PROT_PASSIVE(int, write_begin, (file, page, pos, len, fsdata));
  37532. + if (unlikely(ret))
  37533. + goto err1;
  37534. + /* Success. Resorces will be released in write_end_dispatch */
  37535. + return 0;
  37536. + err1:
  37537. + reiser4_exit_context(ctx);
  37538. + err2:
  37539. + unlock_page(page);
  37540. + put_page(page);
  37541. + return ret;
  37542. +}
  37543. +
  37544. +int reiser4_write_end_dispatch(struct file *file,
  37545. + struct address_space *mapping,
  37546. + loff_t pos,
  37547. + unsigned len,
  37548. + unsigned copied,
  37549. + struct page *page,
  37550. + void *fsdata)
  37551. +{
  37552. + int ret;
  37553. + reiser4_context *ctx;
  37554. + struct inode *inode = page->mapping->host;
  37555. +
  37556. + assert("umka-3101", file != NULL);
  37557. + assert("umka-3102", page != NULL);
  37558. + assert("umka-3093", PageLocked(page));
  37559. +
  37560. + ctx = get_current_context();
  37561. +
  37562. + SetPageUptodate(page);
  37563. + set_page_dirty_notag(page);
  37564. +
  37565. + ret = PROT_PASSIVE(int, write_end, (file, page, pos, copied, fsdata));
  37566. + put_page(page);
  37567. +
  37568. + /* don't commit transaction under inode semaphore */
  37569. + context_set_commit_async(ctx);
  37570. + reiser4_exit_context(ctx);
  37571. + return ret == 0 ? copied : ret;
  37572. +}
  37573. +
  37574. +/*
  37575. + * Dispatchers without protection
  37576. + */
  37577. +int reiser4_setattr_dispatch(struct dentry *dentry, struct iattr *attr)
  37578. +{
  37579. + return inode_file_plugin(dentry->d_inode)->setattr(dentry, attr);
  37580. +}
  37581. +
  37582. +/*
  37583. + Local variables:
  37584. + c-indentation-style: "K&R"
  37585. + mode-name: "LC"
  37586. + c-basic-offset: 8
  37587. + tab-width: 8
  37588. + fill-column: 80
  37589. + scroll-step: 1
  37590. + End:
  37591. +*/
  37592. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/file/file.h linux-4.14.2/fs/reiser4/plugin/file/file.h
  37593. --- linux-4.14.2.orig/fs/reiser4/plugin/file/file.h 1970-01-01 01:00:00.000000000 +0100
  37594. +++ linux-4.14.2/fs/reiser4/plugin/file/file.h 2017-11-26 22:13:09.000000000 +0100
  37595. @@ -0,0 +1,322 @@
  37596. +/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
  37597. + * reiser4/README */
  37598. +
  37599. +/* this file contains declarations of methods implementing
  37600. + file plugins (UNIX_FILE_PLUGIN_ID, CRYPTCOMPRESS_FILE_PLUGIN_ID
  37601. + and SYMLINK_FILE_PLUGIN_ID) */
  37602. +
  37603. +#if !defined( __REISER4_FILE_H__ )
  37604. +#define __REISER4_FILE_H__
  37605. +
  37606. +/* possible states in dispatching process */
  37607. +typedef enum {
  37608. + DISPATCH_INVAL_STATE, /* invalid state */
  37609. + DISPATCH_POINT, /* dispatching point has been achieved */
  37610. + DISPATCH_REMAINS_OLD, /* made a decision to manage by old plugin */
  37611. + DISPATCH_ASSIGNED_NEW /* a new plugin has been assigned */
  37612. +} dispatch_state;
  37613. +
  37614. +struct dispatch_context {
  37615. + int nr_pages;
  37616. + struct page **pages;
  37617. + dispatch_state state;
  37618. +};
  37619. +
  37620. +/*
  37621. + * Declarations of methods provided for VFS.
  37622. + */
  37623. +
  37624. +/* inode operations */
  37625. +int reiser4_setattr_dispatch(struct dentry *, struct iattr *);
  37626. +
  37627. +/* file operations */
  37628. +ssize_t reiser4_read_dispatch(struct file *, char __user *buf,
  37629. + size_t count, loff_t *off);
  37630. +ssize_t reiser4_write_dispatch(struct file *, const char __user *buf,
  37631. + size_t count, loff_t * off);
  37632. +long reiser4_ioctl_dispatch(struct file *filp, unsigned int cmd,
  37633. + unsigned long arg);
  37634. +int reiser4_mmap_dispatch(struct file *, struct vm_area_struct *);
  37635. +int reiser4_open_dispatch(struct inode *inode, struct file *file);
  37636. +int reiser4_release_dispatch(struct inode *, struct file *);
  37637. +int reiser4_sync_file_common(struct file *, loff_t, loff_t, int datasync);
  37638. +
  37639. +/* address space operations */
  37640. +int reiser4_readpage_dispatch(struct file *, struct page *);
  37641. +int reiser4_readpages_dispatch(struct file *, struct address_space *,
  37642. + struct list_head *, unsigned);
  37643. +int reiser4_writepages_dispatch(struct address_space *,
  37644. + struct writeback_control *);
  37645. +int reiser4_write_begin_dispatch(struct file *file,
  37646. + struct address_space *mapping,
  37647. + loff_t pos, unsigned len, unsigned flags,
  37648. + struct page **pagep, void **fsdata);
  37649. +int reiser4_write_end_dispatch(struct file *file,
  37650. + struct address_space *mapping,
  37651. + loff_t pos, unsigned len, unsigned copied,
  37652. + struct page *page, void *fsdata);
  37653. +sector_t reiser4_bmap_dispatch(struct address_space *, sector_t lblock);
  37654. +
  37655. +/*
  37656. + * Private methods of unix-file plugin
  37657. + * (UNIX_FILE_PLUGIN_ID)
  37658. + */
  37659. +
  37660. +/* private inode operations */
  37661. +int setattr_unix_file(struct dentry *, struct iattr *);
  37662. +
  37663. +/* private file operations */
  37664. +
  37665. +ssize_t read_unix_file(struct file *, char __user *buf, size_t read_amount,
  37666. + loff_t *off);
  37667. +ssize_t write_unix_file(struct file *, const char __user *buf, size_t write_amount,
  37668. + loff_t * off, struct dispatch_context * cont);
  37669. +int ioctl_unix_file(struct file *, unsigned int cmd, unsigned long arg);
  37670. +int mmap_unix_file(struct file *, struct vm_area_struct *);
  37671. +int open_unix_file(struct inode *, struct file *);
  37672. +int release_unix_file(struct inode *, struct file *);
  37673. +
  37674. +/* private address space operations */
  37675. +int readpage_unix_file(struct file *, struct page *);
  37676. +int readpages_unix_file(struct file*, struct address_space*, struct list_head*,
  37677. + unsigned);
  37678. +int writepages_unix_file(struct address_space *, struct writeback_control *);
  37679. +int write_begin_unix_file(struct file *file, struct page *page,
  37680. + loff_t pos, unsigned len, void **fsdata);
  37681. +int write_end_unix_file(struct file *file, struct page *page,
  37682. + loff_t pos, unsigned copied, void *fsdata);
  37683. +sector_t bmap_unix_file(struct address_space *, sector_t lblock);
  37684. +
  37685. +/* other private methods */
  37686. +int delete_object_unix_file(struct inode *);
  37687. +int flow_by_inode_unix_file(struct inode *, const char __user *buf,
  37688. + int user, loff_t, loff_t, rw_op, flow_t *);
  37689. +int owns_item_unix_file(const struct inode *, const coord_t *);
  37690. +void init_inode_data_unix_file(struct inode *, reiser4_object_create_data *,
  37691. + int create);
  37692. +
  37693. +/*
  37694. + * Private methods of cryptcompress file plugin
  37695. + * (CRYPTCOMPRESS_FILE_PLUGIN_ID)
  37696. + */
  37697. +
  37698. +/* private inode operations */
  37699. +int setattr_cryptcompress(struct dentry *, struct iattr *);
  37700. +
  37701. +/* private file operations */
  37702. +ssize_t read_cryptcompress(struct file *, char __user *buf,
  37703. + size_t count, loff_t *off);
  37704. +ssize_t write_cryptcompress(struct file *, const char __user *buf,
  37705. + size_t count, loff_t * off,
  37706. + struct dispatch_context *cont);
  37707. +int ioctl_cryptcompress(struct file *, unsigned int cmd, unsigned long arg);
  37708. +int mmap_cryptcompress(struct file *, struct vm_area_struct *);
  37709. +int open_cryptcompress(struct inode *, struct file *);
  37710. +int release_cryptcompress(struct inode *, struct file *);
  37711. +
  37712. +/* private address space operations */
  37713. +int readpage_cryptcompress(struct file *, struct page *);
  37714. +int readpages_cryptcompress(struct file*, struct address_space*,
  37715. + struct list_head*, unsigned);
  37716. +int writepages_cryptcompress(struct address_space *,
  37717. + struct writeback_control *);
  37718. +int write_begin_cryptcompress(struct file *file, struct page *page,
  37719. + loff_t pos, unsigned len, void **fsdata);
  37720. +int write_end_cryptcompress(struct file *file, struct page *page,
  37721. + loff_t pos, unsigned copied, void *fsdata);
  37722. +sector_t bmap_cryptcompress(struct address_space *, sector_t lblock);
  37723. +
  37724. +/* other private methods */
  37725. +int flow_by_inode_cryptcompress(struct inode *, const char __user *buf,
  37726. + int user, loff_t, loff_t, rw_op, flow_t *);
  37727. +int key_by_inode_cryptcompress(struct inode *, loff_t off, reiser4_key *);
  37728. +int create_object_cryptcompress(struct inode *, struct inode *,
  37729. + reiser4_object_create_data *);
  37730. +int delete_object_cryptcompress(struct inode *);
  37731. +void init_inode_data_cryptcompress(struct inode *, reiser4_object_create_data *,
  37732. + int create);
  37733. +int cut_tree_worker_cryptcompress(tap_t *, const reiser4_key * from_key,
  37734. + const reiser4_key * to_key,
  37735. + reiser4_key * smallest_removed,
  37736. + struct inode *object, int truncate,
  37737. + int *progress);
  37738. +void destroy_inode_cryptcompress(struct inode *);
  37739. +
  37740. +/*
  37741. + * Private methods of symlink file plugin
  37742. + * (SYMLINK_FILE_PLUGIN_ID)
  37743. + */
  37744. +int reiser4_create_symlink(struct inode *symlink, struct inode *dir,
  37745. + reiser4_object_create_data *);
  37746. +void destroy_inode_symlink(struct inode *);
  37747. +
  37748. +/*
  37749. + * all the write into unix file is performed by item write method. Write method
  37750. + * of unix file plugin only decides which item plugin (extent or tail) and in
  37751. + * which mode (one from the enum below) to call
  37752. + */
  37753. +typedef enum {
  37754. + FIRST_ITEM = 1,
  37755. + APPEND_ITEM = 2,
  37756. + OVERWRITE_ITEM = 3
  37757. +} write_mode_t;
  37758. +
  37759. +/* unix file may be in one the following states */
  37760. +typedef enum {
  37761. + UF_CONTAINER_UNKNOWN = 0,
  37762. + UF_CONTAINER_TAILS = 1,
  37763. + UF_CONTAINER_EXTENTS = 2,
  37764. + UF_CONTAINER_EMPTY = 3
  37765. +} file_container_t;
  37766. +
  37767. +struct formatting_plugin;
  37768. +struct inode;
  37769. +
  37770. +/* unix file plugin specific part of reiser4 inode */
  37771. +struct unix_file_info {
  37772. + /*
  37773. + * this read-write lock protects file containerization change. Accesses
  37774. + * which do not change file containerization (see file_container_t)
  37775. + * (read, readpage, writepage, write (until tail conversion is
  37776. + * involved)) take read-lock. Accesses which modify file
  37777. + * containerization (truncate, conversion from tail to extent and back)
  37778. + * take write-lock.
  37779. + */
  37780. + struct rw_semaphore latch;
  37781. + /* this enum specifies which items are used to build the file */
  37782. + file_container_t container;
  37783. + /*
  37784. + * plugin which controls when file is to be converted to extents and
  37785. + * back to tail
  37786. + */
  37787. + struct formatting_plugin *tplug;
  37788. + /* if this is set, file is in exclusive use */
  37789. + int exclusive_use;
  37790. +#if REISER4_DEBUG
  37791. + /* pointer to task struct of thread owning exclusive access to file */
  37792. + void *ea_owner;
  37793. + atomic_t nr_neas;
  37794. + void *last_reader;
  37795. +#endif
  37796. +};
  37797. +
  37798. +struct unix_file_info *unix_file_inode_data(const struct inode *inode);
  37799. +void get_exclusive_access(struct unix_file_info *);
  37800. +void drop_exclusive_access(struct unix_file_info *);
  37801. +void get_nonexclusive_access(struct unix_file_info *);
  37802. +void drop_nonexclusive_access(struct unix_file_info *);
  37803. +int try_to_get_nonexclusive_access(struct unix_file_info *);
  37804. +int find_file_item(hint_t *, const reiser4_key *, znode_lock_mode,
  37805. + struct inode *);
  37806. +int find_file_item_nohint(coord_t *, lock_handle *,
  37807. + const reiser4_key *, znode_lock_mode,
  37808. + struct inode *);
  37809. +
  37810. +int load_file_hint(struct file *, hint_t *);
  37811. +void save_file_hint(struct file *, const hint_t *);
  37812. +
  37813. +#include "../item/extent.h"
  37814. +#include "../item/tail.h"
  37815. +#include "../item/ctail.h"
  37816. +
  37817. +struct uf_coord {
  37818. + coord_t coord;
  37819. + lock_handle *lh;
  37820. + int valid;
  37821. + union {
  37822. + struct extent_coord_extension extent;
  37823. + struct tail_coord_extension tail;
  37824. + struct ctail_coord_extension ctail;
  37825. + } extension;
  37826. +};
  37827. +
  37828. +#include "../../forward.h"
  37829. +#include "../../seal.h"
  37830. +#include "../../lock.h"
  37831. +
  37832. +/*
  37833. + * This structure is used to speed up file operations (reads and writes). A
  37834. + * hint is a suggestion about where a key resolved to last time. A seal
  37835. + * indicates whether a node has been modified since a hint was last recorded.
  37836. + * You check the seal, and if the seal is still valid, you can use the hint
  37837. + * without traversing the tree again.
  37838. + */
  37839. +struct hint {
  37840. + seal_t seal; /* a seal over last file item accessed */
  37841. + uf_coord_t ext_coord;
  37842. + loff_t offset;
  37843. + znode_lock_mode mode;
  37844. + lock_handle lh;
  37845. +};
  37846. +
  37847. +static inline int hint_is_valid(hint_t * hint)
  37848. +{
  37849. + return hint->ext_coord.valid;
  37850. +}
  37851. +
  37852. +static inline void hint_set_valid(hint_t * hint)
  37853. +{
  37854. + hint->ext_coord.valid = 1;
  37855. +}
  37856. +
  37857. +static inline void hint_clr_valid(hint_t * hint)
  37858. +{
  37859. + hint->ext_coord.valid = 0;
  37860. +}
  37861. +
  37862. +int load_file_hint(struct file *, hint_t *);
  37863. +void save_file_hint(struct file *, const hint_t *);
  37864. +void hint_init_zero(hint_t *);
  37865. +void reiser4_set_hint(hint_t *, const reiser4_key *, znode_lock_mode);
  37866. +int hint_is_set(const hint_t *);
  37867. +void reiser4_unset_hint(hint_t *);
  37868. +
  37869. +int reiser4_update_file_size(struct inode *, loff_t, int update_sd);
  37870. +int cut_file_items(struct inode *, loff_t new_size,
  37871. + int update_sd, loff_t cur_size,
  37872. + int (*update_actor) (struct inode *, loff_t, int));
  37873. +#if REISER4_DEBUG
  37874. +
  37875. +/* return 1 is exclusive access is obtained, 0 - otherwise */
  37876. +static inline int ea_obtained(struct unix_file_info * uf_info)
  37877. +{
  37878. + int ret;
  37879. +
  37880. + ret = down_read_trylock(&uf_info->latch);
  37881. + if (ret)
  37882. + up_read(&uf_info->latch);
  37883. + return !ret;
  37884. +}
  37885. +
  37886. +#endif
  37887. +
  37888. +#define WRITE_GRANULARITY 32
  37889. +
  37890. +int tail2extent(struct unix_file_info *);
  37891. +int extent2tail(struct file *, struct unix_file_info *);
  37892. +
  37893. +int goto_right_neighbor(coord_t *, lock_handle *);
  37894. +int find_or_create_extent(struct page *);
  37895. +int equal_to_ldk(znode *, const reiser4_key *);
  37896. +
  37897. +void init_uf_coord(uf_coord_t *uf_coord, lock_handle *lh);
  37898. +
  37899. +static inline int cbk_errored(int cbk_result)
  37900. +{
  37901. + return (cbk_result != CBK_COORD_NOTFOUND
  37902. + && cbk_result != CBK_COORD_FOUND);
  37903. +}
  37904. +
  37905. +/* __REISER4_FILE_H__ */
  37906. +#endif
  37907. +
  37908. +/*
  37909. + * Local variables:
  37910. + * c-indentation-style: "K&R"
  37911. + * mode-name: "LC"
  37912. + * c-basic-offset: 8
  37913. + * tab-width: 8
  37914. + * fill-column: 79
  37915. + * scroll-step: 1
  37916. + * End:
  37917. +*/
  37918. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/file/Makefile linux-4.14.2/fs/reiser4/plugin/file/Makefile
  37919. --- linux-4.14.2.orig/fs/reiser4/plugin/file/Makefile 1970-01-01 01:00:00.000000000 +0100
  37920. +++ linux-4.14.2/fs/reiser4/plugin/file/Makefile 2017-11-26 22:13:09.000000000 +0100
  37921. @@ -0,0 +1,7 @@
  37922. +obj-$(CONFIG_REISER4_FS) += file_plugins.o
  37923. +
  37924. +file_plugins-objs := \
  37925. + file.o \
  37926. + tail_conversion.o \
  37927. + symlink.o \
  37928. + cryptcompress.o
  37929. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/file/symfile.c linux-4.14.2/fs/reiser4/plugin/file/symfile.c
  37930. --- linux-4.14.2.orig/fs/reiser4/plugin/file/symfile.c 1970-01-01 01:00:00.000000000 +0100
  37931. +++ linux-4.14.2/fs/reiser4/plugin/file/symfile.c 2017-11-26 22:13:09.000000000 +0100
  37932. @@ -0,0 +1,87 @@
  37933. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  37934. +
  37935. +/* Symfiles are a generalization of Unix symlinks.
  37936. +
  37937. + A symfile when read behaves as though you took its contents and
  37938. + substituted them into the reiser4 naming system as the right hand side
  37939. + of an assignment, and then read that which you had assigned to it.
  37940. +
  37941. + A key issue for symfiles is how to implement writes through to
  37942. + subfiles. In general, one must have some method of determining what
  37943. + of that which is written to the symfile is written to what subfile.
  37944. + This can be done by use of custom plugin methods written by users, or
  37945. + by using a few general methods we provide for those willing to endure
  37946. + the insertion of delimiters into what is read.
  37947. +
  37948. + Writing to symfiles without delimiters to denote what is written to
  37949. + what subfile is not supported by any plugins we provide in this
  37950. + release. Our most sophisticated support for writes is that embodied
  37951. + by the invert plugin (see invert.c).
  37952. +
  37953. + A read only version of the /etc/passwd file might be
  37954. + constructed as a symfile whose contents are as follows:
  37955. +
  37956. + /etc/passwd/userlines/*
  37957. +
  37958. + or
  37959. +
  37960. + /etc/passwd/userlines/demidov+/etc/passwd/userlines/edward+/etc/passwd/userlines/reiser+/etc/passwd/userlines/root
  37961. +
  37962. + or
  37963. +
  37964. + /etc/passwd/userlines/(demidov+edward+reiser+root)
  37965. +
  37966. + A symfile with contents
  37967. +
  37968. + /filenameA+"(some text stored in the uninvertable symfile)+/filenameB
  37969. +
  37970. + will return when read
  37971. +
  37972. + The contents of filenameAsome text stored in the uninvertable symfileThe contents of filenameB
  37973. +
  37974. + and write of what has been read will not be possible to implement as
  37975. + an identity operation because there are no delimiters denoting the
  37976. + boundaries of what is to be written to what subfile.
  37977. +
  37978. + Note that one could make this a read/write symfile if one specified
  37979. + delimiters, and the write method understood those delimiters delimited
  37980. + what was written to subfiles.
  37981. +
  37982. + So, specifying the symfile in a manner that allows writes:
  37983. +
  37984. + /etc/passwd/userlines/demidov+"(
  37985. + )+/etc/passwd/userlines/edward+"(
  37986. + )+/etc/passwd/userlines/reiser+"(
  37987. + )+/etc/passwd/userlines/root+"(
  37988. + )
  37989. +
  37990. + or
  37991. +
  37992. + /etc/passwd/userlines/(demidov+"(
  37993. + )+edward+"(
  37994. + )+reiser+"(
  37995. + )+root+"(
  37996. + ))
  37997. +
  37998. + and the file demidov might be specified as:
  37999. +
  38000. + /etc/passwd/userlines/demidov/username+"(:)+/etc/passwd/userlines/demidov/password+"(:)+/etc/passwd/userlines/demidov/userid+"(:)+/etc/passwd/userlines/demidov/groupid+"(:)+/etc/passwd/userlines/demidov/gecos+"(:)+/etc/passwd/userlines/demidov/home+"(:)+/etc/passwd/userlines/demidov/shell
  38001. +
  38002. + or
  38003. +
  38004. + /etc/passwd/userlines/demidov/(username+"(:)+password+"(:)+userid+"(:)+groupid+"(:)+gecos+"(:)+home+"(:)+shell)
  38005. +
  38006. + Notice that if the file demidov has a carriage return in it, the
  38007. + parsing fails, but then if you put carriage returns in the wrong place
  38008. + in a normal /etc/passwd file it breaks things also.
  38009. +
  38010. + Note that it is forbidden to have no text between two interpolations
  38011. + if one wants to be able to define what parts of a write go to what
  38012. + subfiles referenced in an interpolation.
  38013. +
  38014. + If one wants to be able to add new lines by writing to the file, one
  38015. + must either write a custom plugin for /etc/passwd that knows how to
  38016. + name an added line, or one must use an invert, or one must use a more
  38017. + sophisticated symfile syntax that we are not planning to write for
  38018. + version 4.0.
  38019. +*/
  38020. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/file/symlink.c linux-4.14.2/fs/reiser4/plugin/file/symlink.c
  38021. --- linux-4.14.2.orig/fs/reiser4/plugin/file/symlink.c 1970-01-01 01:00:00.000000000 +0100
  38022. +++ linux-4.14.2/fs/reiser4/plugin/file/symlink.c 2017-11-26 22:13:09.000000000 +0100
  38023. @@ -0,0 +1,95 @@
  38024. +/* Copyright 2002, 2003, 2005 by Hans Reiser, licensing governed by reiser4/README */
  38025. +
  38026. +#include "../../inode.h"
  38027. +
  38028. +#include <linux/types.h>
  38029. +#include <linux/fs.h>
  38030. +
  38031. +/* file plugin methods specific for symlink files
  38032. + (SYMLINK_FILE_PLUGIN_ID) */
  38033. +
  38034. +/* this is implementation of create_object method of file plugin for
  38035. + SYMLINK_FILE_PLUGIN_ID
  38036. + */
  38037. +
  38038. +/**
  38039. + * reiser4_create_symlink - create_object of file plugin for SYMLINK_FILE_PLUGIN_ID
  38040. + * @symlink: inode of symlink object
  38041. + * @dir: inode of parent directory
  38042. + * @info: parameters of new object
  38043. + *
  38044. + * Inserts stat data with symlink extension where into the tree.
  38045. + */
  38046. +int reiser4_create_symlink(struct inode *symlink,
  38047. + struct inode *dir UNUSED_ARG,
  38048. + reiser4_object_create_data *data /* info passed to us
  38049. + * this is filled by
  38050. + * reiser4() syscall
  38051. + * in particular */)
  38052. +{
  38053. + int result;
  38054. +
  38055. + assert("nikita-680", symlink != NULL);
  38056. + assert("nikita-681", S_ISLNK(symlink->i_mode));
  38057. + assert("nikita-685", reiser4_inode_get_flag(symlink, REISER4_NO_SD));
  38058. + assert("nikita-682", dir != NULL);
  38059. + assert("nikita-684", data != NULL);
  38060. + assert("nikita-686", data->id == SYMLINK_FILE_PLUGIN_ID);
  38061. +
  38062. + /*
  38063. + * stat data of symlink has symlink extension in which we store
  38064. + * symlink content, that is, path symlink is pointing to.
  38065. + */
  38066. + reiser4_inode_data(symlink)->extmask |= (1 << SYMLINK_STAT);
  38067. +
  38068. + assert("vs-838", symlink->i_private == NULL);
  38069. + symlink->i_private = (void *)data->name;
  38070. +
  38071. + assert("vs-843", symlink->i_size == 0);
  38072. + INODE_SET_FIELD(symlink, i_size, strlen(data->name));
  38073. +
  38074. + /* insert stat data appended with data->name */
  38075. + result = inode_file_plugin(symlink)->write_sd_by_inode(symlink);
  38076. + if (result) {
  38077. + /* FIXME-VS: Make sure that symlink->i_private is not attached
  38078. + to kmalloced data */
  38079. + INODE_SET_FIELD(symlink, i_size, 0);
  38080. + } else {
  38081. + assert("vs-849", symlink->i_private
  38082. + && reiser4_inode_get_flag(symlink,
  38083. + REISER4_GENERIC_PTR_USED));
  38084. + assert("vs-850",
  38085. + !memcmp((char *)symlink->i_private, data->name,
  38086. + (size_t) symlink->i_size + 1));
  38087. + }
  38088. + return result;
  38089. +}
  38090. +
  38091. +/* this is implementation of destroy_inode method of file plugin for
  38092. + SYMLINK_FILE_PLUGIN_ID
  38093. + */
  38094. +void destroy_inode_symlink(struct inode *inode)
  38095. +{
  38096. + assert("edward-799",
  38097. + inode_file_plugin(inode) ==
  38098. + file_plugin_by_id(SYMLINK_FILE_PLUGIN_ID));
  38099. + assert("edward-800", !is_bad_inode(inode) && is_inode_loaded(inode));
  38100. + assert("edward-801", reiser4_inode_get_flag(inode,
  38101. + REISER4_GENERIC_PTR_USED));
  38102. + assert("vs-839", S_ISLNK(inode->i_mode));
  38103. +
  38104. + kfree(inode->i_private);
  38105. + inode->i_private = NULL;
  38106. + reiser4_inode_clr_flag(inode, REISER4_GENERIC_PTR_USED);
  38107. +}
  38108. +
  38109. +/*
  38110. + Local variables:
  38111. + c-indentation-style: "K&R"
  38112. + mode-name: "LC"
  38113. + c-basic-offset: 8
  38114. + tab-width: 8
  38115. + fill-column: 80
  38116. + scroll-step: 1
  38117. + End:
  38118. +*/
  38119. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/file/tail_conversion.c linux-4.14.2/fs/reiser4/plugin/file/tail_conversion.c
  38120. --- linux-4.14.2.orig/fs/reiser4/plugin/file/tail_conversion.c 1970-01-01 01:00:00.000000000 +0100
  38121. +++ linux-4.14.2/fs/reiser4/plugin/file/tail_conversion.c 2017-11-26 22:13:09.000000000 +0100
  38122. @@ -0,0 +1,763 @@
  38123. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  38124. +
  38125. +#include "../../inode.h"
  38126. +#include "../../super.h"
  38127. +#include "../../page_cache.h"
  38128. +#include "../../carry.h"
  38129. +#include "../../safe_link.h"
  38130. +#include "../../vfs_ops.h"
  38131. +
  38132. +#include <linux/writeback.h>
  38133. +
  38134. +/* this file contains:
  38135. + tail2extent and extent2tail */
  38136. +
  38137. +/* exclusive access to a file is acquired when file state changes: tail2extent, empty2tail, extent2tail, etc */
  38138. +void get_exclusive_access(struct unix_file_info * uf_info)
  38139. +{
  38140. + assert("nikita-3028", reiser4_schedulable());
  38141. + assert("nikita-3047", LOCK_CNT_NIL(inode_sem_w));
  38142. + assert("nikita-3048", LOCK_CNT_NIL(inode_sem_r));
  38143. + /*
  38144. + * "deadlock avoidance": sometimes we commit a transaction under
  38145. + * rw-semaphore on a file. Such commit can deadlock with another
  38146. + * thread that captured some block (hence preventing atom from being
  38147. + * committed) and waits on rw-semaphore.
  38148. + */
  38149. + reiser4_txn_restart_current();
  38150. + LOCK_CNT_INC(inode_sem_w);
  38151. + down_write(&uf_info->latch);
  38152. + uf_info->exclusive_use = 1;
  38153. + assert("vs-1713", uf_info->ea_owner == NULL);
  38154. + assert("vs-1713", atomic_read(&uf_info->nr_neas) == 0);
  38155. + ON_DEBUG(uf_info->ea_owner = current);
  38156. +}
  38157. +
  38158. +void drop_exclusive_access(struct unix_file_info * uf_info)
  38159. +{
  38160. + assert("vs-1714", uf_info->ea_owner == current);
  38161. + assert("vs-1715", atomic_read(&uf_info->nr_neas) == 0);
  38162. + ON_DEBUG(uf_info->ea_owner = NULL);
  38163. + uf_info->exclusive_use = 0;
  38164. + up_write(&uf_info->latch);
  38165. + assert("nikita-3049", LOCK_CNT_NIL(inode_sem_r));
  38166. + assert("nikita-3049", LOCK_CNT_GTZ(inode_sem_w));
  38167. + LOCK_CNT_DEC(inode_sem_w);
  38168. + reiser4_txn_restart_current();
  38169. +}
  38170. +
  38171. +/**
  38172. + * nea_grabbed - do something when file semaphore is down_read-ed
  38173. + * @uf_info:
  38174. + *
  38175. + * This is called when nonexclisive access is obtained on file. All it does is
  38176. + * for debugging purposes.
  38177. + */
  38178. +static void nea_grabbed(struct unix_file_info *uf_info)
  38179. +{
  38180. +#if REISER4_DEBUG
  38181. + LOCK_CNT_INC(inode_sem_r);
  38182. + assert("vs-1716", uf_info->ea_owner == NULL);
  38183. + atomic_inc(&uf_info->nr_neas);
  38184. + uf_info->last_reader = current;
  38185. +#endif
  38186. +}
  38187. +
  38188. +/**
  38189. + * get_nonexclusive_access - get nonexclusive access to a file
  38190. + * @uf_info: unix file specific part of inode to obtain access to
  38191. + *
  38192. + * Nonexclusive access is obtained on a file before read, write, readpage.
  38193. + */
  38194. +void get_nonexclusive_access(struct unix_file_info *uf_info)
  38195. +{
  38196. + assert("nikita-3029", reiser4_schedulable());
  38197. + assert("nikita-3361", get_current_context()->trans->atom == NULL);
  38198. +
  38199. + down_read(&uf_info->latch);
  38200. + nea_grabbed(uf_info);
  38201. +}
  38202. +
  38203. +/**
  38204. + * try_to_get_nonexclusive_access - try to get nonexclusive access to a file
  38205. + * @uf_info: unix file specific part of inode to obtain access to
  38206. + *
  38207. + * Non-blocking version of nonexclusive access obtaining.
  38208. + */
  38209. +int try_to_get_nonexclusive_access(struct unix_file_info *uf_info)
  38210. +{
  38211. + int result;
  38212. +
  38213. + result = down_read_trylock(&uf_info->latch);
  38214. + if (result)
  38215. + nea_grabbed(uf_info);
  38216. + return result;
  38217. +}
  38218. +
  38219. +void drop_nonexclusive_access(struct unix_file_info * uf_info)
  38220. +{
  38221. + assert("vs-1718", uf_info->ea_owner == NULL);
  38222. + assert("vs-1719", atomic_read(&uf_info->nr_neas) > 0);
  38223. + ON_DEBUG(atomic_dec(&uf_info->nr_neas));
  38224. +
  38225. + up_read(&uf_info->latch);
  38226. +
  38227. + LOCK_CNT_DEC(inode_sem_r);
  38228. + reiser4_txn_restart_current();
  38229. +}
  38230. +
  38231. +/* part of tail2extent. Cut all items covering @count bytes starting from
  38232. + @offset */
  38233. +/* Audited by: green(2002.06.15) */
  38234. +static int cut_formatting_items(struct inode *inode, loff_t offset, int count)
  38235. +{
  38236. + reiser4_key from, to;
  38237. +
  38238. + /* AUDIT: How about putting an assertion here, what would check
  38239. + all provided range is covered by tail items only? */
  38240. + /* key of first byte in the range to be cut */
  38241. + inode_file_plugin(inode)->key_by_inode(inode, offset, &from);
  38242. +
  38243. + /* key of last byte in that range */
  38244. + to = from;
  38245. + set_key_offset(&to, (__u64) (offset + count - 1));
  38246. +
  38247. + /* cut everything between those keys */
  38248. + return reiser4_cut_tree(reiser4_tree_by_inode(inode), &from, &to,
  38249. + inode, 0);
  38250. +}
  38251. +
  38252. +static void release_all_pages(struct page **pages, unsigned nr_pages)
  38253. +{
  38254. + unsigned i;
  38255. +
  38256. + for (i = 0; i < nr_pages; i++) {
  38257. + if (pages[i] == NULL) {
  38258. +#if REISER4_DEBUG
  38259. + unsigned j;
  38260. + for (j = i + 1; j < nr_pages; j++)
  38261. + assert("vs-1620", pages[j] == NULL);
  38262. +#endif
  38263. + break;
  38264. + }
  38265. + put_page(pages[i]);
  38266. + pages[i] = NULL;
  38267. + }
  38268. +}
  38269. +
  38270. +/* part of tail2extent. replace tail items with extent one. Content of tail
  38271. + items (@count bytes) being cut are copied already into
  38272. + pages. extent_writepage method is called to create extents corresponding to
  38273. + those pages */
  38274. +static int replace(struct inode *inode, struct page **pages, unsigned nr_pages, int count)
  38275. +{
  38276. + int result;
  38277. + unsigned i;
  38278. + STORE_COUNTERS;
  38279. +
  38280. + if (nr_pages == 0)
  38281. + return 0;
  38282. +
  38283. + assert("vs-596", pages[0]);
  38284. +
  38285. + /* cut copied items */
  38286. + result = cut_formatting_items(inode, page_offset(pages[0]), count);
  38287. + if (result)
  38288. + return result;
  38289. +
  38290. + CHECK_COUNTERS;
  38291. +
  38292. + /* put into tree replacement for just removed items: extent item, namely */
  38293. + for (i = 0; i < nr_pages; i++) {
  38294. + result = add_to_page_cache_lru(pages[i], inode->i_mapping,
  38295. + pages[i]->index,
  38296. + mapping_gfp_mask(inode->
  38297. + i_mapping));
  38298. + if (result)
  38299. + break;
  38300. + SetPageUptodate(pages[i]);
  38301. + set_page_dirty_notag(pages[i]);
  38302. + unlock_page(pages[i]);
  38303. + result = find_or_create_extent(pages[i]);
  38304. + if (result) {
  38305. + /*
  38306. + * Unsuccess in critical place:
  38307. + * tail has been removed,
  38308. + * but extent hasn't been created
  38309. + */
  38310. + warning("edward-1572",
  38311. + "Report the error code %i to developers. Run FSCK",
  38312. + result);
  38313. + break;
  38314. + }
  38315. + }
  38316. + return result;
  38317. +}
  38318. +
  38319. +#define TAIL2EXTENT_PAGE_NUM 3 /* number of pages to fill before cutting tail
  38320. + * items */
  38321. +
  38322. +static int reserve_tail2extent_iteration(struct inode *inode)
  38323. +{
  38324. + reiser4_block_nr unformatted_nodes;
  38325. + reiser4_tree *tree;
  38326. +
  38327. + tree = reiser4_tree_by_inode(inode);
  38328. +
  38329. + /* number of unformatted nodes which will be created */
  38330. + unformatted_nodes = TAIL2EXTENT_PAGE_NUM;
  38331. +
  38332. + /*
  38333. + * space required for one iteration of extent->tail conversion:
  38334. + *
  38335. + * 1. kill N tail items
  38336. + *
  38337. + * 2. insert TAIL2EXTENT_PAGE_NUM unformatted nodes
  38338. + *
  38339. + * 3. insert TAIL2EXTENT_PAGE_NUM (worst-case single-block
  38340. + * extents) extent units.
  38341. + *
  38342. + * 4. drilling to the leaf level by coord_by_key()
  38343. + *
  38344. + * 5. possible update of stat-data
  38345. + *
  38346. + */
  38347. + grab_space_enable();
  38348. + return reiser4_grab_space
  38349. + (2 * tree->height +
  38350. + TAIL2EXTENT_PAGE_NUM +
  38351. + TAIL2EXTENT_PAGE_NUM * estimate_one_insert_into_item(tree) +
  38352. + 1 + estimate_one_insert_item(tree) +
  38353. + inode_file_plugin(inode)->estimate.update(inode), BA_CAN_COMMIT);
  38354. +}
  38355. +
  38356. +/* clear stat data's flag indicating that conversion is being converted */
  38357. +static int complete_conversion(struct inode *inode)
  38358. +{
  38359. + int result;
  38360. +
  38361. + grab_space_enable();
  38362. + result =
  38363. + reiser4_grab_space(inode_file_plugin(inode)->estimate.update(inode),
  38364. + BA_CAN_COMMIT);
  38365. + if (result == 0) {
  38366. + reiser4_inode_clr_flag(inode, REISER4_PART_MIXED);
  38367. + result = reiser4_update_sd(inode);
  38368. + }
  38369. + if (result)
  38370. + warning("vs-1696", "Failed to clear converting bit of %llu: %i",
  38371. + (unsigned long long)get_inode_oid(inode), result);
  38372. + return 0;
  38373. +}
  38374. +
  38375. +/**
  38376. + * find_start
  38377. + * @inode:
  38378. + * @id:
  38379. + * @offset:
  38380. + *
  38381. + * this is used by tail2extent and extent2tail to detect where previous
  38382. + * uncompleted conversion stopped
  38383. + */
  38384. +static int find_start(struct inode *inode, reiser4_plugin_id id, __u64 *offset)
  38385. +{
  38386. + int result;
  38387. + lock_handle lh;
  38388. + coord_t coord;
  38389. + struct unix_file_info *ufo;
  38390. + int found;
  38391. + reiser4_key key;
  38392. +
  38393. + ufo = unix_file_inode_data(inode);
  38394. + init_lh(&lh);
  38395. + result = 0;
  38396. + found = 0;
  38397. + inode_file_plugin(inode)->key_by_inode(inode, *offset, &key);
  38398. + do {
  38399. + init_lh(&lh);
  38400. + result = find_file_item_nohint(&coord, &lh, &key,
  38401. + ZNODE_READ_LOCK, inode);
  38402. +
  38403. + if (result == CBK_COORD_FOUND) {
  38404. + if (coord.between == AT_UNIT) {
  38405. + /*coord_clear_iplug(&coord); */
  38406. + result = zload(coord.node);
  38407. + if (result == 0) {
  38408. + if (item_id_by_coord(&coord) == id)
  38409. + found = 1;
  38410. + else
  38411. + item_plugin_by_coord(&coord)->s.
  38412. + file.append_key(&coord,
  38413. + &key);
  38414. + zrelse(coord.node);
  38415. + }
  38416. + } else
  38417. + result = RETERR(-ENOENT);
  38418. + }
  38419. + done_lh(&lh);
  38420. + } while (result == 0 && !found);
  38421. + *offset = get_key_offset(&key);
  38422. + return result;
  38423. +}
  38424. +
  38425. +/**
  38426. + * tail2extent
  38427. + * @uf_info:
  38428. + *
  38429. + *
  38430. + */
  38431. +int tail2extent(struct unix_file_info *uf_info)
  38432. +{
  38433. + int result;
  38434. + reiser4_key key; /* key of next byte to be moved to page */
  38435. + char *p_data; /* data of page */
  38436. + unsigned page_off = 0, /* offset within the page where to copy data */
  38437. + count; /* number of bytes of item which can be
  38438. + * copied to page */
  38439. + struct page *pages[TAIL2EXTENT_PAGE_NUM];
  38440. + struct page *page;
  38441. + int done; /* set to 1 when all file is read */
  38442. + char *item;
  38443. + int i;
  38444. + struct inode *inode;
  38445. + int first_iteration;
  38446. + int bytes;
  38447. + __u64 offset;
  38448. +
  38449. + assert("nikita-3362", ea_obtained(uf_info));
  38450. + inode = unix_file_info_to_inode(uf_info);
  38451. + assert("nikita-3412", !IS_RDONLY(inode));
  38452. + assert("vs-1649", uf_info->container != UF_CONTAINER_EXTENTS);
  38453. + assert("", !reiser4_inode_get_flag(inode, REISER4_PART_IN_CONV));
  38454. +
  38455. + offset = 0;
  38456. + first_iteration = 1;
  38457. + result = 0;
  38458. + if (reiser4_inode_get_flag(inode, REISER4_PART_MIXED)) {
  38459. + /*
  38460. + * file is marked on disk as there was a conversion which did
  38461. + * not complete due to either crash or some error. Find which
  38462. + * offset tail conversion stopped at
  38463. + */
  38464. + result = find_start(inode, FORMATTING_ID, &offset);
  38465. + if (result == -ENOENT) {
  38466. + /* no tail items found, everything is converted */
  38467. + uf_info->container = UF_CONTAINER_EXTENTS;
  38468. + complete_conversion(inode);
  38469. + return 0;
  38470. + } else if (result != 0)
  38471. + /* some other error */
  38472. + return result;
  38473. + first_iteration = 0;
  38474. + }
  38475. +
  38476. + reiser4_inode_set_flag(inode, REISER4_PART_IN_CONV);
  38477. +
  38478. + /* get key of first byte of a file */
  38479. + inode_file_plugin(inode)->key_by_inode(inode, offset, &key);
  38480. +
  38481. + done = 0;
  38482. + while (done == 0) {
  38483. + memset(pages, 0, sizeof(pages));
  38484. + result = reserve_tail2extent_iteration(inode);
  38485. + if (result != 0) {
  38486. + reiser4_inode_clr_flag(inode, REISER4_PART_IN_CONV);
  38487. + goto out;
  38488. + }
  38489. + if (first_iteration) {
  38490. + reiser4_inode_set_flag(inode, REISER4_PART_MIXED);
  38491. + reiser4_update_sd(inode);
  38492. + first_iteration = 0;
  38493. + }
  38494. + bytes = 0;
  38495. + for (i = 0; i < sizeof_array(pages) && done == 0; i++) {
  38496. + assert("vs-598",
  38497. + (get_key_offset(&key) & ~PAGE_MASK) == 0);
  38498. + page = alloc_page(reiser4_ctx_gfp_mask_get());
  38499. + if (!page) {
  38500. + result = RETERR(-ENOMEM);
  38501. + goto error;
  38502. + }
  38503. +
  38504. + page->index =
  38505. + (unsigned long)(get_key_offset(&key) >>
  38506. + PAGE_SHIFT);
  38507. + /*
  38508. + * usually when one is going to longterm lock znode (as
  38509. + * find_file_item does, for instance) he must not hold
  38510. + * locked pages. However, there is an exception for
  38511. + * case tail2extent. Pages appearing here are not
  38512. + * reachable to everyone else, they are clean, they do
  38513. + * not have jnodes attached so keeping them locked do
  38514. + * not risk deadlock appearance
  38515. + */
  38516. + assert("vs-983", !PagePrivate(page));
  38517. + reiser4_invalidate_pages(inode->i_mapping, page->index,
  38518. + 1, 0);
  38519. +
  38520. + for (page_off = 0; page_off < PAGE_SIZE;) {
  38521. + coord_t coord;
  38522. + lock_handle lh;
  38523. +
  38524. + /* get next item */
  38525. + /* FIXME: we might want to readahead here */
  38526. + init_lh(&lh);
  38527. + result =
  38528. + find_file_item_nohint(&coord, &lh, &key,
  38529. + ZNODE_READ_LOCK,
  38530. + inode);
  38531. + if (result != CBK_COORD_FOUND) {
  38532. + /*
  38533. + * error happened of not items of file
  38534. + * were found
  38535. + */
  38536. + done_lh(&lh);
  38537. + put_page(page);
  38538. + goto error;
  38539. + }
  38540. +
  38541. + if (coord.between == AFTER_UNIT) {
  38542. + /*
  38543. + * end of file is reached. Padd page
  38544. + * with zeros
  38545. + */
  38546. + done_lh(&lh);
  38547. + done = 1;
  38548. + p_data = kmap_atomic(page);
  38549. + memset(p_data + page_off, 0,
  38550. + PAGE_SIZE - page_off);
  38551. + kunmap_atomic(p_data);
  38552. + break;
  38553. + }
  38554. +
  38555. + result = zload(coord.node);
  38556. + if (result) {
  38557. + put_page(page);
  38558. + done_lh(&lh);
  38559. + goto error;
  38560. + }
  38561. + assert("vs-856", coord.between == AT_UNIT);
  38562. + item = ((char *)item_body_by_coord(&coord)) +
  38563. + coord.unit_pos;
  38564. +
  38565. + /* how many bytes to copy */
  38566. + count =
  38567. + item_length_by_coord(&coord) -
  38568. + coord.unit_pos;
  38569. + /* limit length of copy to end of page */
  38570. + if (count > PAGE_SIZE - page_off)
  38571. + count = PAGE_SIZE - page_off;
  38572. +
  38573. + /*
  38574. + * copy item (as much as will fit starting from
  38575. + * the beginning of the item) into the page
  38576. + */
  38577. + p_data = kmap_atomic(page);
  38578. + memcpy(p_data + page_off, item, count);
  38579. + kunmap_atomic(p_data);
  38580. +
  38581. + page_off += count;
  38582. + bytes += count;
  38583. + set_key_offset(&key,
  38584. + get_key_offset(&key) + count);
  38585. +
  38586. + zrelse(coord.node);
  38587. + done_lh(&lh);
  38588. + } /* end of loop which fills one page by content of
  38589. + * formatting items */
  38590. +
  38591. + if (page_off) {
  38592. + /* something was copied into page */
  38593. + pages[i] = page;
  38594. + } else {
  38595. + put_page(page);
  38596. + assert("vs-1648", done == 1);
  38597. + break;
  38598. + }
  38599. + } /* end of loop through pages of one conversion iteration */
  38600. +
  38601. + if (i > 0) {
  38602. + result = replace(inode, pages, i, bytes);
  38603. + release_all_pages(pages, sizeof_array(pages));
  38604. + if (result)
  38605. + goto error;
  38606. + /*
  38607. + * We have to drop exclusive access to avoid deadlock
  38608. + * which may happen because called by reiser4_writepages
  38609. + * capture_unix_file requires to get non-exclusive
  38610. + * access to a file. It is safe to drop EA in the middle
  38611. + * of tail2extent conversion because write_unix_file,
  38612. + * setattr_unix_file(truncate), mmap_unix_file,
  38613. + * release_unix_file(extent2tail) checks if conversion
  38614. + * is not in progress (see comments before
  38615. + * get_exclusive_access_careful().
  38616. + * Other processes that acquire non-exclusive access
  38617. + * (read_unix_file, reiser4_writepages, etc) should work
  38618. + * on partially converted files.
  38619. + */
  38620. + drop_exclusive_access(uf_info);
  38621. + /* throttle the conversion */
  38622. + reiser4_throttle_write(inode);
  38623. + get_exclusive_access(uf_info);
  38624. +
  38625. + /*
  38626. + * nobody is allowed to complete conversion but a
  38627. + * process which started it
  38628. + */
  38629. + assert("", reiser4_inode_get_flag(inode,
  38630. + REISER4_PART_MIXED));
  38631. + }
  38632. + }
  38633. + if (result == 0) {
  38634. + /* file is converted to extent items */
  38635. + reiser4_inode_clr_flag(inode, REISER4_PART_IN_CONV);
  38636. + assert("vs-1697", reiser4_inode_get_flag(inode,
  38637. + REISER4_PART_MIXED));
  38638. +
  38639. + uf_info->container = UF_CONTAINER_EXTENTS;
  38640. + complete_conversion(inode);
  38641. + } else {
  38642. + /*
  38643. + * conversion is not complete. Inode was already marked as
  38644. + * REISER4_PART_MIXED and stat-data were updated at the first
  38645. + * iteration of the loop above.
  38646. + */
  38647. + error:
  38648. + release_all_pages(pages, sizeof_array(pages));
  38649. + reiser4_inode_clr_flag(inode, REISER4_PART_IN_CONV);
  38650. + warning("edward-1548", "Partial conversion of %llu: %i",
  38651. + (unsigned long long)get_inode_oid(inode), result);
  38652. + }
  38653. +
  38654. + out:
  38655. + /* this flag should be cleared, otherwise get_exclusive_access_careful()
  38656. + will fall into infinite loop */
  38657. + assert("edward-1549", !reiser4_inode_get_flag(inode,
  38658. + REISER4_PART_IN_CONV));
  38659. + return result;
  38660. +}
  38661. +
  38662. +static int reserve_extent2tail_iteration(struct inode *inode)
  38663. +{
  38664. + reiser4_tree *tree;
  38665. +
  38666. + tree = reiser4_tree_by_inode(inode);
  38667. + /*
  38668. + * reserve blocks for (in this order):
  38669. + *
  38670. + * 1. removal of extent item
  38671. + *
  38672. + * 2. insertion of tail by insert_flow()
  38673. + *
  38674. + * 3. drilling to the leaf level by coord_by_key()
  38675. + *
  38676. + * 4. possible update of stat-data
  38677. + */
  38678. + grab_space_enable();
  38679. + return reiser4_grab_space
  38680. + (estimate_one_item_removal(tree) +
  38681. + estimate_insert_flow(tree->height) +
  38682. + 1 + estimate_one_insert_item(tree) +
  38683. + inode_file_plugin(inode)->estimate.update(inode), BA_CAN_COMMIT);
  38684. +}
  38685. +
  38686. +/* for every page of file: read page, cut part of extent pointing to this page,
  38687. + put data of page tree by tail item */
  38688. +int extent2tail(struct file * file, struct unix_file_info *uf_info)
  38689. +{
  38690. + int result;
  38691. + struct inode *inode;
  38692. + struct page *page;
  38693. + unsigned long num_pages, i;
  38694. + unsigned long start_page;
  38695. + reiser4_key from;
  38696. + reiser4_key to;
  38697. + unsigned count;
  38698. + __u64 offset;
  38699. +
  38700. + assert("nikita-3362", ea_obtained(uf_info));
  38701. + inode = unix_file_info_to_inode(uf_info);
  38702. + assert("nikita-3412", !IS_RDONLY(inode));
  38703. + assert("vs-1649", uf_info->container != UF_CONTAINER_TAILS);
  38704. + assert("", !reiser4_inode_get_flag(inode, REISER4_PART_IN_CONV));
  38705. +
  38706. + offset = 0;
  38707. + if (reiser4_inode_get_flag(inode, REISER4_PART_MIXED)) {
  38708. + /*
  38709. + * file is marked on disk as there was a conversion which did
  38710. + * not complete due to either crash or some error. Find which
  38711. + * offset tail conversion stopped at
  38712. + */
  38713. + result = find_start(inode, EXTENT_POINTER_ID, &offset);
  38714. + if (result == -ENOENT) {
  38715. + /* no extent found, everything is converted */
  38716. + uf_info->container = UF_CONTAINER_TAILS;
  38717. + complete_conversion(inode);
  38718. + return 0;
  38719. + } else if (result != 0)
  38720. + /* some other error */
  38721. + return result;
  38722. + }
  38723. + reiser4_inode_set_flag(inode, REISER4_PART_IN_CONV);
  38724. +
  38725. + /* number of pages in the file */
  38726. + num_pages =
  38727. + (inode->i_size + - offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
  38728. + start_page = offset >> PAGE_SHIFT;
  38729. +
  38730. + inode_file_plugin(inode)->key_by_inode(inode, offset, &from);
  38731. + to = from;
  38732. +
  38733. + result = 0;
  38734. + for (i = 0; i < num_pages; i++) {
  38735. + __u64 start_byte;
  38736. +
  38737. + result = reserve_extent2tail_iteration(inode);
  38738. + if (result != 0)
  38739. + break;
  38740. + if (i == 0 && offset == 0) {
  38741. + reiser4_inode_set_flag(inode, REISER4_PART_MIXED);
  38742. + reiser4_update_sd(inode);
  38743. + }
  38744. +
  38745. + page = read_mapping_page(inode->i_mapping,
  38746. + (unsigned)(i + start_page), NULL);
  38747. + if (IS_ERR(page)) {
  38748. + result = PTR_ERR(page);
  38749. + warning("edward-1569",
  38750. + "Can not read page %lu of %lu: %i",
  38751. + i, num_pages, result);
  38752. + break;
  38753. + }
  38754. +
  38755. + wait_on_page_locked(page);
  38756. +
  38757. + if (!PageUptodate(page)) {
  38758. + put_page(page);
  38759. + result = RETERR(-EIO);
  38760. + break;
  38761. + }
  38762. +
  38763. + /* cut part of file we have read */
  38764. + start_byte = (__u64) ((i + start_page) << PAGE_SHIFT);
  38765. + set_key_offset(&from, start_byte);
  38766. + set_key_offset(&to, start_byte + PAGE_SIZE - 1);
  38767. + /*
  38768. + * reiser4_cut_tree_object() returns -E_REPEAT to allow atom
  38769. + * commits during over-long truncates. But
  38770. + * extent->tail conversion should be performed in one
  38771. + * transaction.
  38772. + */
  38773. + result = reiser4_cut_tree(reiser4_tree_by_inode(inode), &from,
  38774. + &to, inode, 0);
  38775. +
  38776. + if (result) {
  38777. + put_page(page);
  38778. + warning("edward-1570",
  38779. + "Can not delete converted chunk: %i",
  38780. + result);
  38781. + break;
  38782. + }
  38783. +
  38784. + /* put page data into tree via tail_write */
  38785. + count = PAGE_SIZE;
  38786. + if ((i == (num_pages - 1)) &&
  38787. + (inode->i_size & ~PAGE_MASK))
  38788. + /* last page can be incompleted */
  38789. + count = (inode->i_size & ~PAGE_MASK);
  38790. + while (count) {
  38791. + loff_t pos = start_byte;
  38792. +
  38793. + assert("edward-1537",
  38794. + file != NULL && file->f_path.dentry != NULL);
  38795. + assert("edward-1538",
  38796. + file_inode(file) == inode);
  38797. +
  38798. + result = reiser4_write_tail_noreserve(file, inode,
  38799. + (char __user *)kmap(page),
  38800. + count, &pos);
  38801. + kunmap(page);
  38802. + /* FIXME:
  38803. + may be put_file_hint() instead ? */
  38804. + reiser4_free_file_fsdata(file);
  38805. + if (result <= 0) {
  38806. + /*
  38807. + * Unsuccess in critical place:
  38808. + * extent has been removed,
  38809. + * but tail hasn't been created
  38810. + */
  38811. + warning("edward-1571",
  38812. + "Report the error code %i to developers. Run FSCK",
  38813. + result);
  38814. + put_page(page);
  38815. + reiser4_inode_clr_flag(inode,
  38816. + REISER4_PART_IN_CONV);
  38817. + return result;
  38818. + }
  38819. + count -= result;
  38820. + }
  38821. +
  38822. + /* release page */
  38823. + lock_page(page);
  38824. + /* page is already detached from jnode and mapping. */
  38825. + assert("vs-1086", page->mapping == NULL);
  38826. + assert("nikita-2690",
  38827. + (!PagePrivate(page) && jprivate(page) == 0));
  38828. + /* waiting for writeback completion with page lock held is
  38829. + * perfectly valid. */
  38830. + wait_on_page_writeback(page);
  38831. + reiser4_drop_page(page);
  38832. + /* release reference taken by read_cache_page() above */
  38833. + put_page(page);
  38834. +
  38835. + drop_exclusive_access(uf_info);
  38836. + /* throttle the conversion */
  38837. + reiser4_throttle_write(inode);
  38838. + get_exclusive_access(uf_info);
  38839. + /*
  38840. + * nobody is allowed to complete conversion but a process which
  38841. + * started it
  38842. + */
  38843. + assert("", reiser4_inode_get_flag(inode, REISER4_PART_MIXED));
  38844. + }
  38845. +
  38846. + reiser4_inode_clr_flag(inode, REISER4_PART_IN_CONV);
  38847. +
  38848. + if (i == num_pages) {
  38849. + /* file is converted to formatted items */
  38850. + assert("vs-1698", reiser4_inode_get_flag(inode,
  38851. + REISER4_PART_MIXED));
  38852. + assert("vs-1260",
  38853. + inode_has_no_jnodes(reiser4_inode_data(inode)));
  38854. +
  38855. + uf_info->container = UF_CONTAINER_TAILS;
  38856. + complete_conversion(inode);
  38857. + return 0;
  38858. + }
  38859. + /*
  38860. + * conversion is not complete. Inode was already marked as
  38861. + * REISER4_PART_MIXED and stat-data were updated at the first
  38862. + * iteration of the loop above.
  38863. + */
  38864. + warning("nikita-2282",
  38865. + "Partial conversion of %llu: %lu of %lu: %i",
  38866. + (unsigned long long)get_inode_oid(inode), i,
  38867. + num_pages, result);
  38868. +
  38869. + /* this flag should be cleared, otherwise get_exclusive_access_careful()
  38870. + will fall into infinite loop */
  38871. + assert("edward-1550", !reiser4_inode_get_flag(inode,
  38872. + REISER4_PART_IN_CONV));
  38873. + return result;
  38874. +}
  38875. +
  38876. +/*
  38877. + * Local variables:
  38878. + * c-indentation-style: "K&R"
  38879. + * mode-name: "LC"
  38880. + * c-basic-offset: 8
  38881. + * tab-width: 8
  38882. + * fill-column: 79
  38883. + * scroll-step: 1
  38884. + * End:
  38885. + */
  38886. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/file_ops.c linux-4.14.2/fs/reiser4/plugin/file_ops.c
  38887. --- linux-4.14.2.orig/fs/reiser4/plugin/file_ops.c 1970-01-01 01:00:00.000000000 +0100
  38888. +++ linux-4.14.2/fs/reiser4/plugin/file_ops.c 2017-11-26 22:13:09.000000000 +0100
  38889. @@ -0,0 +1,119 @@
  38890. +/* Copyright 2005 by Hans Reiser, licensing governed by
  38891. + reiser4/README */
  38892. +
  38893. +/* this file contains typical implementations for some of methods of
  38894. + struct file_operations and of struct address_space_operations
  38895. +*/
  38896. +
  38897. +#include "../inode.h"
  38898. +#include "object.h"
  38899. +
  38900. +/* file operations */
  38901. +
  38902. +/* implementation of vfs's llseek method of struct file_operations for
  38903. + typical directory can be found in file_ops_readdir.c
  38904. +*/
  38905. +loff_t reiser4_llseek_dir_common(struct file *, loff_t, int origin);
  38906. +
  38907. +/* implementation of vfs's iterate method of struct file_operations for
  38908. + typical directory can be found in file_ops_readdir.c
  38909. +*/
  38910. +int reiser4_iterate_common(struct file *, struct dir_context *);
  38911. +
  38912. +/**
  38913. + * reiser4_release_dir_common - release of struct file_operations
  38914. + * @inode: inode of released file
  38915. + * @file: file to release
  38916. + *
  38917. + * Implementation of release method of struct file_operations for typical
  38918. + * directory. All it does is freeing of reiser4 specific file data.
  38919. +*/
  38920. +int reiser4_release_dir_common(struct inode *inode, struct file *file)
  38921. +{
  38922. + reiser4_context *ctx;
  38923. +
  38924. + ctx = reiser4_init_context(inode->i_sb);
  38925. + if (IS_ERR(ctx))
  38926. + return PTR_ERR(ctx);
  38927. + reiser4_free_file_fsdata(file);
  38928. + reiser4_exit_context(ctx);
  38929. + return 0;
  38930. +}
  38931. +
  38932. +/* this is common implementation of vfs's fsync method of struct
  38933. + file_operations
  38934. +*/
  38935. +int reiser4_sync_common(struct file *file, loff_t start,
  38936. + loff_t end, int datasync)
  38937. +{
  38938. + reiser4_context *ctx;
  38939. + int result;
  38940. + struct dentry *dentry = file->f_path.dentry;
  38941. +
  38942. + ctx = reiser4_init_context(dentry->d_inode->i_sb);
  38943. + if (IS_ERR(ctx))
  38944. + return PTR_ERR(ctx);
  38945. + result = txnmgr_force_commit_all(dentry->d_inode->i_sb, 0);
  38946. +
  38947. + context_set_commit_async(ctx);
  38948. + reiser4_exit_context(ctx);
  38949. + return result;
  38950. +}
  38951. +
  38952. +/*
  38953. + * common sync method for regular files.
  38954. + *
  38955. + * We are trying to be smart here. Instead of committing all atoms (original
  38956. + * solution), we scan dirty pages of this file and commit all atoms they are
  38957. + * part of.
  38958. + *
  38959. + * Situation is complicated by anonymous pages: i.e., extent-less pages
  38960. + * dirtied through mmap. Fortunately sys_fsync() first calls
  38961. + * filemap_fdatawrite() that will ultimately call reiser4_writepages_dispatch,
  38962. + * insert all missing extents and capture anonymous pages.
  38963. + */
  38964. +int reiser4_sync_file_common(struct file *file, loff_t start, loff_t end, int datasync)
  38965. +{
  38966. + reiser4_context *ctx;
  38967. + txn_atom *atom;
  38968. + reiser4_block_nr reserve;
  38969. + struct dentry *dentry = file->f_path.dentry;
  38970. + struct inode *inode = file->f_mapping->host;
  38971. +
  38972. + int err = filemap_write_and_wait_range(file->f_mapping->host->i_mapping, start, end);
  38973. + if (err)
  38974. + return err;
  38975. +
  38976. + ctx = reiser4_init_context(dentry->d_inode->i_sb);
  38977. + if (IS_ERR(ctx))
  38978. + return PTR_ERR(ctx);
  38979. +
  38980. + inode_lock(inode);
  38981. +
  38982. + reserve = estimate_update_common(dentry->d_inode);
  38983. + if (reiser4_grab_space(reserve, BA_CAN_COMMIT)) {
  38984. + reiser4_exit_context(ctx);
  38985. + inode_unlock(inode);
  38986. + return RETERR(-ENOSPC);
  38987. + }
  38988. + write_sd_by_inode_common(dentry->d_inode);
  38989. +
  38990. + atom = get_current_atom_locked();
  38991. + spin_lock_txnh(ctx->trans);
  38992. + force_commit_atom(ctx->trans);
  38993. + reiser4_exit_context(ctx);
  38994. + inode_unlock(inode);
  38995. +
  38996. + return 0;
  38997. +}
  38998. +
  38999. +/*
  39000. + * Local variables:
  39001. + * c-indentation-style: "K&R"
  39002. + * mode-name: "LC"
  39003. + * c-basic-offset: 8
  39004. + * tab-width: 8
  39005. + * fill-column: 79
  39006. + * scroll-step: 1
  39007. + * End:
  39008. + */
  39009. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/file_ops_readdir.c linux-4.14.2/fs/reiser4/plugin/file_ops_readdir.c
  39010. --- linux-4.14.2.orig/fs/reiser4/plugin/file_ops_readdir.c 1970-01-01 01:00:00.000000000 +0100
  39011. +++ linux-4.14.2/fs/reiser4/plugin/file_ops_readdir.c 2017-11-26 22:13:09.000000000 +0100
  39012. @@ -0,0 +1,658 @@
  39013. +/* Copyright 2005 by Hans Reiser, licensing governed by
  39014. + * reiser4/README */
  39015. +
  39016. +#include "../inode.h"
  39017. +
  39018. +/* return true, iff @coord points to the valid directory item that is part of
  39019. + * @inode directory. */
  39020. +static int is_valid_dir_coord(struct inode *inode, coord_t *coord)
  39021. +{
  39022. + return plugin_of_group(item_plugin_by_coord(coord),
  39023. + DIR_ENTRY_ITEM_TYPE) &&
  39024. + inode_file_plugin(inode)->owns_item(inode, coord);
  39025. +}
  39026. +
  39027. +/* compare two logical positions within the same directory */
  39028. +static cmp_t dir_pos_cmp(const struct dir_pos *p1, const struct dir_pos *p2)
  39029. +{
  39030. + cmp_t result;
  39031. +
  39032. + assert("nikita-2534", p1 != NULL);
  39033. + assert("nikita-2535", p2 != NULL);
  39034. +
  39035. + result = de_id_cmp(&p1->dir_entry_key, &p2->dir_entry_key);
  39036. + if (result == EQUAL_TO) {
  39037. + int diff;
  39038. +
  39039. + diff = p1->pos - p2->pos;
  39040. + result =
  39041. + (diff < 0) ? LESS_THAN : (diff ? GREATER_THAN : EQUAL_TO);
  39042. + }
  39043. + return result;
  39044. +}
  39045. +
  39046. +/* see comment before reiser4_readdir_common() for overview of why "adjustment"
  39047. + * is necessary. */
  39048. +static void
  39049. +adjust_dir_pos(struct file *dir, struct readdir_pos *readdir_spot,
  39050. + const struct dir_pos *mod_point, int adj)
  39051. +{
  39052. + struct dir_pos *pos;
  39053. +
  39054. + /*
  39055. + * new directory entry was added (adj == +1) or removed (adj == -1) at
  39056. + * the @mod_point. Directory file descriptor @dir is doing readdir and
  39057. + * is currently positioned at @readdir_spot. Latter has to be updated
  39058. + * to maintain stable readdir.
  39059. + */
  39060. + /* directory is positioned to the beginning. */
  39061. + if (readdir_spot->entry_no == 0)
  39062. + return;
  39063. +
  39064. + pos = &readdir_spot->position;
  39065. + switch (dir_pos_cmp(mod_point, pos)) {
  39066. + case LESS_THAN:
  39067. + /* @mod_pos is _before_ @readdir_spot, that is, entry was
  39068. + * added/removed on the left (in key order) of current
  39069. + * position. */
  39070. + /* logical number of directory entry readdir is "looking" at
  39071. + * changes */
  39072. + readdir_spot->entry_no += adj;
  39073. + assert("nikita-2577",
  39074. + ergo(dir != NULL,
  39075. + reiser4_get_dir_fpos(dir, dir->f_pos) + adj >= 0));
  39076. + if (de_id_cmp(&pos->dir_entry_key,
  39077. + &mod_point->dir_entry_key) == EQUAL_TO) {
  39078. + assert("nikita-2575", mod_point->pos < pos->pos);
  39079. + /*
  39080. + * if entry added/removed has the same key as current
  39081. + * for readdir, update counter of duplicate keys in
  39082. + * @readdir_spot.
  39083. + */
  39084. + pos->pos += adj;
  39085. + }
  39086. + break;
  39087. + case GREATER_THAN:
  39088. + /* directory is modified after @pos: nothing to do. */
  39089. + break;
  39090. + case EQUAL_TO:
  39091. + /* cannot insert an entry readdir is looking at, because it
  39092. + already exists. */
  39093. + assert("nikita-2576", adj < 0);
  39094. + /* directory entry to which @pos points to is being
  39095. + removed.
  39096. +
  39097. + NOTE-NIKITA: Right thing to do is to update @pos to point
  39098. + to the next entry. This is complex (we are under spin-lock
  39099. + for one thing). Just rewind it to the beginning. Next
  39100. + readdir will have to scan the beginning of
  39101. + directory. Proper solution is to use semaphore in
  39102. + spin lock's stead and use rewind_right() here.
  39103. +
  39104. + NOTE-NIKITA: now, semaphore is used, so...
  39105. + */
  39106. + memset(readdir_spot, 0, sizeof *readdir_spot);
  39107. + }
  39108. +}
  39109. +
  39110. +/* scan all file-descriptors for this directory and adjust their
  39111. + positions respectively. Should be used by implementations of
  39112. + add_entry and rem_entry of dir plugin */
  39113. +void reiser4_adjust_dir_file(struct inode *dir, const struct dentry *de,
  39114. + int offset, int adj)
  39115. +{
  39116. + reiser4_file_fsdata *scan;
  39117. + struct dir_pos mod_point;
  39118. +
  39119. + assert("nikita-2536", dir != NULL);
  39120. + assert("nikita-2538", de != NULL);
  39121. + assert("nikita-2539", adj != 0);
  39122. +
  39123. + build_de_id(dir, &de->d_name, &mod_point.dir_entry_key);
  39124. + mod_point.pos = offset;
  39125. +
  39126. + spin_lock_inode(dir);
  39127. +
  39128. + /*
  39129. + * new entry was added/removed in directory @dir. Scan all file
  39130. + * descriptors for @dir that are currently involved into @readdir and
  39131. + * update them.
  39132. + */
  39133. +
  39134. + list_for_each_entry(scan, get_readdir_list(dir), dir.linkage)
  39135. + adjust_dir_pos(scan->back, &scan->dir.readdir, &mod_point, adj);
  39136. +
  39137. + spin_unlock_inode(dir);
  39138. +}
  39139. +
  39140. +/*
  39141. + * traverse tree to start/continue readdir from the readdir position @pos.
  39142. + */
  39143. +static int dir_go_to(struct file *dir, struct readdir_pos *pos, tap_t *tap)
  39144. +{
  39145. + reiser4_key key;
  39146. + int result;
  39147. + struct inode *inode;
  39148. +
  39149. + assert("nikita-2554", pos != NULL);
  39150. +
  39151. + inode = file_inode(dir);
  39152. + result = inode_dir_plugin(inode)->build_readdir_key(dir, &key);
  39153. + if (result != 0)
  39154. + return result;
  39155. + result = reiser4_object_lookup(inode,
  39156. + &key,
  39157. + tap->coord,
  39158. + tap->lh,
  39159. + tap->mode,
  39160. + FIND_EXACT,
  39161. + LEAF_LEVEL, LEAF_LEVEL,
  39162. + 0, &tap->ra_info);
  39163. + if (result == CBK_COORD_FOUND)
  39164. + result = rewind_right(tap, (int)pos->position.pos);
  39165. + else {
  39166. + tap->coord->node = NULL;
  39167. + done_lh(tap->lh);
  39168. + result = RETERR(-EIO);
  39169. + }
  39170. + return result;
  39171. +}
  39172. +
  39173. +/*
  39174. + * handling of non-unique keys: calculate at what ordinal position within
  39175. + * sequence of directory items with identical keys @pos is.
  39176. + */
  39177. +static int set_pos(struct inode *inode, struct readdir_pos *pos, tap_t *tap)
  39178. +{
  39179. + int result;
  39180. + coord_t coord;
  39181. + lock_handle lh;
  39182. + tap_t scan;
  39183. + de_id *did;
  39184. + reiser4_key de_key;
  39185. +
  39186. + coord_init_zero(&coord);
  39187. + init_lh(&lh);
  39188. + reiser4_tap_init(&scan, &coord, &lh, ZNODE_READ_LOCK);
  39189. + reiser4_tap_copy(&scan, tap);
  39190. + reiser4_tap_load(&scan);
  39191. + pos->position.pos = 0;
  39192. +
  39193. + did = &pos->position.dir_entry_key;
  39194. +
  39195. + if (is_valid_dir_coord(inode, scan.coord)) {
  39196. +
  39197. + build_de_id_by_key(unit_key_by_coord(scan.coord, &de_key), did);
  39198. +
  39199. + while (1) {
  39200. +
  39201. + result = go_prev_unit(&scan);
  39202. + if (result != 0)
  39203. + break;
  39204. +
  39205. + if (!is_valid_dir_coord(inode, scan.coord)) {
  39206. + result = -EINVAL;
  39207. + break;
  39208. + }
  39209. +
  39210. + /* get key of directory entry */
  39211. + unit_key_by_coord(scan.coord, &de_key);
  39212. + if (de_id_key_cmp(did, &de_key) != EQUAL_TO) {
  39213. + /* duplicate-sequence is over */
  39214. + break;
  39215. + }
  39216. + pos->position.pos++;
  39217. + }
  39218. + } else
  39219. + result = RETERR(-ENOENT);
  39220. + reiser4_tap_relse(&scan);
  39221. + reiser4_tap_done(&scan);
  39222. + return result;
  39223. +}
  39224. +
  39225. +/*
  39226. + * "rewind" directory to @offset, i.e., set @pos and @tap correspondingly.
  39227. + */
  39228. +static int dir_rewind(struct file *dir, loff_t *fpos, struct readdir_pos *pos, tap_t *tap)
  39229. +{
  39230. + __u64 destination;
  39231. + __s64 shift;
  39232. + int result;
  39233. + struct inode *inode;
  39234. + loff_t dirpos;
  39235. +
  39236. + assert("nikita-2553", dir != NULL);
  39237. + assert("nikita-2548", pos != NULL);
  39238. + assert("nikita-2551", tap->coord != NULL);
  39239. + assert("nikita-2552", tap->lh != NULL);
  39240. +
  39241. + dirpos = reiser4_get_dir_fpos(dir, *fpos);
  39242. + shift = dirpos - pos->fpos;
  39243. + /* this is logical directory entry within @dir which we are rewinding
  39244. + * to */
  39245. + destination = pos->entry_no + shift;
  39246. +
  39247. + inode = file_inode(dir);
  39248. + if (dirpos < 0)
  39249. + return RETERR(-EINVAL);
  39250. + else if (destination == 0ll || dirpos == 0) {
  39251. + /* rewind to the beginning of directory */
  39252. + memset(pos, 0, sizeof *pos);
  39253. + return dir_go_to(dir, pos, tap);
  39254. + } else if (destination >= inode->i_size)
  39255. + return RETERR(-ENOENT);
  39256. +
  39257. + if (shift < 0) {
  39258. + /* I am afraid of negative numbers */
  39259. + shift = -shift;
  39260. + /* rewinding to the left */
  39261. + if (shift <= (int)pos->position.pos) {
  39262. + /* destination is within sequence of entries with
  39263. + duplicate keys. */
  39264. + result = dir_go_to(dir, pos, tap);
  39265. + } else {
  39266. + shift -= pos->position.pos;
  39267. + while (1) {
  39268. + /* repetitions: deadlock is possible when
  39269. + going to the left. */
  39270. + result = dir_go_to(dir, pos, tap);
  39271. + if (result == 0) {
  39272. + result = rewind_left(tap, shift);
  39273. + if (result == -E_DEADLOCK) {
  39274. + reiser4_tap_done(tap);
  39275. + continue;
  39276. + }
  39277. + }
  39278. + break;
  39279. + }
  39280. + }
  39281. + } else {
  39282. + /* rewinding to the right */
  39283. + result = dir_go_to(dir, pos, tap);
  39284. + if (result == 0)
  39285. + result = rewind_right(tap, shift);
  39286. + }
  39287. + if (result == 0) {
  39288. + result = set_pos(inode, pos, tap);
  39289. + if (result == 0) {
  39290. + /* update pos->position.pos */
  39291. + pos->entry_no = destination;
  39292. + pos->fpos = dirpos;
  39293. + }
  39294. + }
  39295. + return result;
  39296. +}
  39297. +
  39298. +/*
  39299. + * Function that is called by common_readdir() on each directory entry while
  39300. + * doing readdir. ->filldir callback may block, so we had to release long term
  39301. + * lock while calling it. To avoid repeating tree traversal, seal is used. If
  39302. + * seal is broken, we return -E_REPEAT. Node is unlocked in this case.
  39303. + *
  39304. + * Whether node is unlocked in case of any other error is undefined. It is
  39305. + * guaranteed to be still locked if success (0) is returned.
  39306. + *
  39307. + * When ->filldir() wants no more, feed_entry() returns 1, and node is
  39308. + * unlocked.
  39309. + */
  39310. +static int
  39311. +feed_entry(tap_t *tap, struct dir_context *context)
  39312. +{
  39313. + item_plugin *iplug;
  39314. + char *name;
  39315. + reiser4_key sd_key;
  39316. + int result;
  39317. + char buf[DE_NAME_BUF_LEN];
  39318. + char name_buf[32];
  39319. + char *local_name;
  39320. + unsigned file_type;
  39321. + seal_t seal;
  39322. + coord_t *coord;
  39323. + reiser4_key entry_key;
  39324. +
  39325. + coord = tap->coord;
  39326. + iplug = item_plugin_by_coord(coord);
  39327. +
  39328. + /* pointer to name within the node */
  39329. + name = iplug->s.dir.extract_name(coord, buf);
  39330. + assert("nikita-1371", name != NULL);
  39331. +
  39332. + /* key of object the entry points to */
  39333. + if (iplug->s.dir.extract_key(coord, &sd_key) != 0)
  39334. + return RETERR(-EIO);
  39335. +
  39336. + /* we must release longterm znode lock before calling filldir to avoid
  39337. + deadlock which may happen if filldir causes page fault. So, copy
  39338. + name to intermediate buffer */
  39339. + if (strlen(name) + 1 > sizeof(name_buf)) {
  39340. + local_name = kmalloc(strlen(name) + 1,
  39341. + reiser4_ctx_gfp_mask_get());
  39342. + if (local_name == NULL)
  39343. + return RETERR(-ENOMEM);
  39344. + } else
  39345. + local_name = name_buf;
  39346. +
  39347. + strcpy(local_name, name);
  39348. + file_type = iplug->s.dir.extract_file_type(coord);
  39349. +
  39350. + unit_key_by_coord(coord, &entry_key);
  39351. + reiser4_seal_init(&seal, coord, &entry_key);
  39352. +
  39353. + longterm_unlock_znode(tap->lh);
  39354. +
  39355. + /*
  39356. + * send information about directory entry to the ->filldir() filler
  39357. + * supplied to us by caller (VFS).
  39358. + *
  39359. + * ->filldir is entitled to do weird things. For example, ->filldir
  39360. + * supplied by knfsd re-enters file system. Make sure no locks are
  39361. + * held.
  39362. + */
  39363. + assert("nikita-3436", lock_stack_isclean(get_current_lock_stack()));
  39364. +
  39365. + reiser4_txn_restart_current();
  39366. + if (!dir_emit(context, name, (int)strlen(name),
  39367. + /* inode number of object bounden by this entry */
  39368. + oid_to_uino(get_key_objectid(&sd_key)), file_type))
  39369. + /* ->filldir() is satisfied. (no space in buffer, IOW) */
  39370. + result = 1;
  39371. + else
  39372. + result = reiser4_seal_validate(&seal, coord, &entry_key,
  39373. + tap->lh, tap->mode,
  39374. + ZNODE_LOCK_HIPRI);
  39375. +
  39376. + if (local_name != name_buf)
  39377. + kfree(local_name);
  39378. +
  39379. + return result;
  39380. +}
  39381. +
  39382. +static void move_entry(struct readdir_pos *pos, coord_t *coord)
  39383. +{
  39384. + reiser4_key de_key;
  39385. + de_id *did;
  39386. +
  39387. + /* update @pos */
  39388. + ++pos->entry_no;
  39389. + did = &pos->position.dir_entry_key;
  39390. +
  39391. + /* get key of directory entry */
  39392. + unit_key_by_coord(coord, &de_key);
  39393. +
  39394. + if (de_id_key_cmp(did, &de_key) == EQUAL_TO)
  39395. + /* we are within sequence of directory entries
  39396. + with duplicate keys. */
  39397. + ++pos->position.pos;
  39398. + else {
  39399. + pos->position.pos = 0;
  39400. + build_de_id_by_key(&de_key, did);
  39401. + }
  39402. + ++pos->fpos;
  39403. +}
  39404. +
  39405. +/*
  39406. + * STATELESS READDIR
  39407. + *
  39408. + * readdir support in reiser4 relies on ability to update readdir_pos embedded
  39409. + * into reiser4_file_fsdata on each directory modification (name insertion and
  39410. + * removal), see reiser4_readdir_common() function below. This obviously doesn't
  39411. + * work when reiser4 is accessed over NFS, because NFS doesn't keep any state
  39412. + * across client READDIR requests for the same directory.
  39413. + *
  39414. + * To address this we maintain a "pool" of detached reiser4_file_fsdata
  39415. + * (d_cursor). Whenever NFS readdir request comes, we detect this, and try to
  39416. + * find detached reiser4_file_fsdata corresponding to previous readdir
  39417. + * request. In other words, additional state is maintained on the
  39418. + * server. (This is somewhat contrary to the design goals of NFS protocol.)
  39419. + *
  39420. + * To efficiently detect when our ->readdir() method is called by NFS server,
  39421. + * dentry is marked as "stateless" in reiser4_decode_fh() (this is checked by
  39422. + * file_is_stateless() function).
  39423. + *
  39424. + * To find out d_cursor in the pool, we encode client id (cid) in the highest
  39425. + * bits of NFS readdir cookie: when first readdir request comes to the given
  39426. + * directory from the given client, cookie is set to 0. This situation is
  39427. + * detected, global cid_counter is incremented, and stored in highest bits of
  39428. + * all direntry offsets returned to the client, including last one. As the
  39429. + * only valid readdir cookie is one obtained as direntry->offset, we are
  39430. + * guaranteed that next readdir request (continuing current one) will have
  39431. + * current cid in the highest bits of starting readdir cookie. All d_cursors
  39432. + * are hashed into per-super-block hash table by (oid, cid) key.
  39433. + *
  39434. + * In addition d_cursors are placed into per-super-block radix tree where they
  39435. + * are keyed by oid alone. This is necessary to efficiently remove them during
  39436. + * rmdir.
  39437. + *
  39438. + * At last, currently unused d_cursors are linked into special list. This list
  39439. + * is used d_cursor_shrink to reclaim d_cursors on memory pressure.
  39440. + *
  39441. + */
  39442. +
  39443. +/*
  39444. + * prepare for readdir.
  39445. + *
  39446. + * NOTE: @f->f_pos may be out-of-date (iterate() vs readdir()).
  39447. + * @fpos is effective position.
  39448. + */
  39449. +static int dir_readdir_init(struct file *f, loff_t* fpos, tap_t *tap,
  39450. + struct readdir_pos **pos)
  39451. +{
  39452. + struct inode *inode;
  39453. + reiser4_file_fsdata *fsdata;
  39454. + int result;
  39455. +
  39456. + assert("nikita-1359", f != NULL);
  39457. + inode = file_inode(f);
  39458. + assert("nikita-1360", inode != NULL);
  39459. +
  39460. + if (!S_ISDIR(inode->i_mode))
  39461. + return RETERR(-ENOTDIR);
  39462. +
  39463. + /* try to find detached readdir state */
  39464. + result = reiser4_attach_fsdata(f, fpos, inode);
  39465. + if (result != 0)
  39466. + return result;
  39467. +
  39468. + fsdata = reiser4_get_file_fsdata(f);
  39469. + assert("nikita-2571", fsdata != NULL);
  39470. + if (IS_ERR(fsdata))
  39471. + return PTR_ERR(fsdata);
  39472. +
  39473. + /* add file descriptor to the readdir list hanging of directory
  39474. + * inode. This list is used to scan "readdirs-in-progress" while
  39475. + * inserting or removing names in the directory. */
  39476. + spin_lock_inode(inode);
  39477. + if (list_empty_careful(&fsdata->dir.linkage))
  39478. + list_add(&fsdata->dir.linkage, get_readdir_list(inode));
  39479. + *pos = &fsdata->dir.readdir;
  39480. + spin_unlock_inode(inode);
  39481. +
  39482. + /* move @tap to the current position */
  39483. + return dir_rewind(f, fpos, *pos, tap);
  39484. +}
  39485. +
  39486. +/* this is implementation of vfs's llseek method of struct file_operations for
  39487. + typical directory
  39488. + See comment before reiser4_iterate_common() for explanation.
  39489. +*/
  39490. +loff_t reiser4_llseek_dir_common(struct file *file, loff_t off, int origin)
  39491. +{
  39492. + reiser4_context *ctx;
  39493. + loff_t result;
  39494. + struct inode *inode;
  39495. +
  39496. + inode = file_inode(file);
  39497. +
  39498. + ctx = reiser4_init_context(inode->i_sb);
  39499. + if (IS_ERR(ctx))
  39500. + return PTR_ERR(ctx);
  39501. +
  39502. + inode_lock(inode);
  39503. +
  39504. + /* update ->f_pos */
  39505. + result = default_llseek_unlocked(file, off, origin);
  39506. + if (result >= 0) {
  39507. + int ff;
  39508. + coord_t coord;
  39509. + lock_handle lh;
  39510. + tap_t tap;
  39511. + struct readdir_pos *pos;
  39512. +
  39513. + coord_init_zero(&coord);
  39514. + init_lh(&lh);
  39515. + reiser4_tap_init(&tap, &coord, &lh, ZNODE_READ_LOCK);
  39516. +
  39517. + ff = dir_readdir_init(file, &file->f_pos, &tap, &pos);
  39518. + reiser4_detach_fsdata(file);
  39519. + if (ff != 0)
  39520. + result = (loff_t) ff;
  39521. + reiser4_tap_done(&tap);
  39522. + }
  39523. + reiser4_detach_fsdata(file);
  39524. + inode_unlock(inode);
  39525. +
  39526. + reiser4_exit_context(ctx);
  39527. + return result;
  39528. +}
  39529. +
  39530. +/* this is common implementation of vfs's readdir method of struct
  39531. + file_operations
  39532. +
  39533. + readdir problems:
  39534. +
  39535. + readdir(2)/getdents(2) interface is based on implicit assumption that
  39536. + readdir can be restarted from any particular point by supplying file system
  39537. + with off_t-full of data. That is, file system fills ->d_off field in struct
  39538. + dirent and later user passes ->d_off to the seekdir(3), which is, actually,
  39539. + implemented by glibc as lseek(2) on directory.
  39540. +
  39541. + Reiser4 cannot restart readdir from 64 bits of data, because two last
  39542. + components of the key of directory entry are unknown, which given 128 bits:
  39543. + locality and type fields in the key of directory entry are always known, to
  39544. + start readdir() from given point objectid and offset fields have to be
  39545. + filled.
  39546. +
  39547. + Traditional UNIX API for scanning through directory
  39548. + (readdir/seekdir/telldir/opendir/closedir/rewindir/getdents) is based on the
  39549. + assumption that directory is structured very much like regular file, in
  39550. + particular, it is implied that each name within given directory (directory
  39551. + entry) can be uniquely identified by scalar offset and that such offset is
  39552. + stable across the life-time of the name is identifies.
  39553. +
  39554. + This is manifestly not so for reiser4. In reiser4 the only stable unique
  39555. + identifies for the directory entry is its key that doesn't fit into
  39556. + seekdir/telldir API.
  39557. +
  39558. + solution:
  39559. +
  39560. + Within each file descriptor participating in readdir-ing of directory
  39561. + plugin/dir/dir.h:readdir_pos is maintained. This structure keeps track of
  39562. + the "current" directory entry that file descriptor looks at. It contains a
  39563. + key of directory entry (plus some additional info to deal with non-unique
  39564. + keys that we wouldn't dwell onto here) and a logical position of this
  39565. + directory entry starting from the beginning of the directory, that is
  39566. + ordinal number of this entry in the readdir order.
  39567. +
  39568. + Obviously this logical position is not stable in the face of directory
  39569. + modifications. To work around this, on each addition or removal of directory
  39570. + entry all file descriptors for directory inode are scanned and their
  39571. + readdir_pos are updated accordingly (adjust_dir_pos()).
  39572. +*/
  39573. +int reiser4_iterate_common(struct file *f /* directory file being read */,
  39574. + struct dir_context *context /* callback data passed to us by VFS */)
  39575. +{
  39576. + reiser4_context *ctx;
  39577. + int result;
  39578. + struct inode *inode;
  39579. + coord_t coord;
  39580. + lock_handle lh;
  39581. + tap_t tap;
  39582. + struct readdir_pos *pos;
  39583. +
  39584. + assert("nikita-1359", f != NULL);
  39585. + inode = file_inode(f);
  39586. + assert("nikita-1360", inode != NULL);
  39587. +
  39588. + if (!S_ISDIR(inode->i_mode))
  39589. + return RETERR(-ENOTDIR);
  39590. +
  39591. + ctx = reiser4_init_context(inode->i_sb);
  39592. + if (IS_ERR(ctx))
  39593. + return PTR_ERR(ctx);
  39594. +
  39595. + coord_init_zero(&coord);
  39596. + init_lh(&lh);
  39597. + reiser4_tap_init(&tap, &coord, &lh, ZNODE_READ_LOCK);
  39598. +
  39599. + reiser4_readdir_readahead_init(inode, &tap);
  39600. +
  39601. +repeat:
  39602. + result = dir_readdir_init(f, &context->pos, &tap, &pos);
  39603. + if (result == 0) {
  39604. + result = reiser4_tap_load(&tap);
  39605. + /* scan entries one by one feeding them to @filld */
  39606. + while (result == 0) {
  39607. + coord_t *coord;
  39608. +
  39609. + coord = tap.coord;
  39610. + assert("nikita-2572", coord_is_existing_unit(coord));
  39611. + assert("nikita-3227", is_valid_dir_coord(inode, coord));
  39612. +
  39613. + result = feed_entry(&tap, context);
  39614. + if (result > 0) {
  39615. + break;
  39616. + } else if (result == 0) {
  39617. + ++context->pos;
  39618. + result = go_next_unit(&tap);
  39619. + if (result == -E_NO_NEIGHBOR ||
  39620. + result == -ENOENT) {
  39621. + result = 0;
  39622. + break;
  39623. + } else if (result == 0) {
  39624. + if (is_valid_dir_coord(inode, coord))
  39625. + move_entry(pos, coord);
  39626. + else
  39627. + break;
  39628. + }
  39629. + } else if (result == -E_REPEAT) {
  39630. + /* feed_entry() had to restart. */
  39631. + ++context->pos;
  39632. + reiser4_tap_relse(&tap);
  39633. + goto repeat;
  39634. + } else
  39635. + warning("vs-1617",
  39636. + "reiser4_readdir_common: unexpected error %d",
  39637. + result);
  39638. + }
  39639. + reiser4_tap_relse(&tap);
  39640. +
  39641. + if (result >= 0)
  39642. + f->f_version = inode->i_version;
  39643. + } else if (result == -E_NO_NEIGHBOR || result == -ENOENT)
  39644. + result = 0;
  39645. + reiser4_tap_done(&tap);
  39646. + reiser4_detach_fsdata(f);
  39647. +
  39648. + /* try to update directory's atime */
  39649. + if (reiser4_grab_space_force(inode_file_plugin(inode)->estimate.update(inode),
  39650. + BA_CAN_COMMIT) != 0)
  39651. + warning("", "failed to update atime on readdir: %llu",
  39652. + get_inode_oid(inode));
  39653. + else
  39654. + file_accessed(f);
  39655. +
  39656. + context_set_commit_async(ctx);
  39657. + reiser4_exit_context(ctx);
  39658. +
  39659. + return (result <= 0) ? result : 0;
  39660. +}
  39661. +
  39662. +/*
  39663. + * Local variables:
  39664. + * c-indentation-style: "K&R"
  39665. + * mode-name: "LC"
  39666. + * c-basic-offset: 8
  39667. + * tab-width: 8
  39668. + * fill-column: 79
  39669. + * End:
  39670. + */
  39671. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/file_plugin_common.c linux-4.14.2/fs/reiser4/plugin/file_plugin_common.c
  39672. --- linux-4.14.2.orig/fs/reiser4/plugin/file_plugin_common.c 1970-01-01 01:00:00.000000000 +0100
  39673. +++ linux-4.14.2/fs/reiser4/plugin/file_plugin_common.c 2017-11-26 22:13:09.000000000 +0100
  39674. @@ -0,0 +1,1004 @@
  39675. +/* Copyright 2005 by Hans Reiser, licensing governed by
  39676. + reiser4/README */
  39677. +
  39678. +/* this file contains typical implementations for most of methods of
  39679. + file plugin
  39680. +*/
  39681. +
  39682. +#include "../inode.h"
  39683. +#include "object.h"
  39684. +#include "../safe_link.h"
  39685. +
  39686. +static int insert_new_sd(struct inode *inode);
  39687. +static int update_sd(struct inode *inode);
  39688. +
  39689. +/* this is common implementation of write_sd_by_inode method of file plugin
  39690. + either insert stat data or update it
  39691. + */
  39692. +int write_sd_by_inode_common(struct inode *inode/* object to save */)
  39693. +{
  39694. + int result;
  39695. +
  39696. + assert("nikita-730", inode != NULL);
  39697. +
  39698. + if (reiser4_inode_get_flag(inode, REISER4_NO_SD))
  39699. + /* object doesn't have stat-data yet */
  39700. + result = insert_new_sd(inode);
  39701. + else
  39702. + result = update_sd(inode);
  39703. + if (result != 0 && result != -ENAMETOOLONG && result != -ENOMEM)
  39704. + /* Don't issue warnings about "name is too long" */
  39705. + warning("nikita-2221", "Failed to save sd for %llu: %i",
  39706. + (unsigned long long)get_inode_oid(inode), result);
  39707. + return result;
  39708. +}
  39709. +
  39710. +/* this is common implementation of key_by_inode method of file plugin
  39711. + */
  39712. +int
  39713. +key_by_inode_and_offset_common(struct inode *inode, loff_t off,
  39714. + reiser4_key * key)
  39715. +{
  39716. + reiser4_key_init(key);
  39717. + set_key_locality(key, reiser4_inode_data(inode)->locality_id);
  39718. + set_key_ordering(key, get_inode_ordering(inode));
  39719. + set_key_objectid(key, get_inode_oid(inode)); /*FIXME: inode->i_ino */
  39720. + set_key_type(key, KEY_BODY_MINOR);
  39721. + set_key_offset(key, (__u64) off);
  39722. + return 0;
  39723. +}
  39724. +
  39725. +/* this is common implementation of set_plug_in_inode method of file plugin
  39726. + */
  39727. +int set_plug_in_inode_common(struct inode *object /* inode to set plugin on */ ,
  39728. + struct inode *parent /* parent object */ ,
  39729. + reiser4_object_create_data * data /* creational
  39730. + * data */ )
  39731. +{
  39732. + __u64 mask;
  39733. +
  39734. + object->i_mode = data->mode;
  39735. + /* this should be plugin decision */
  39736. + object->i_uid = current_fsuid();
  39737. + object->i_mtime = object->i_atime = object->i_ctime = current_time(object);
  39738. +
  39739. + /* support for BSD style group-id assignment. See mount's manual page
  39740. + description of bsdgroups ext2 mount options for more details */
  39741. + if (reiser4_is_set(object->i_sb, REISER4_BSD_GID))
  39742. + object->i_gid = parent->i_gid;
  39743. + else if (parent->i_mode & S_ISGID) {
  39744. + /* parent directory has sguid bit */
  39745. + object->i_gid = parent->i_gid;
  39746. + if (S_ISDIR(object->i_mode))
  39747. + /* sguid is inherited by sub-directories */
  39748. + object->i_mode |= S_ISGID;
  39749. + } else
  39750. + object->i_gid = current_fsgid();
  39751. +
  39752. + /* this object doesn't have stat-data yet */
  39753. + reiser4_inode_set_flag(object, REISER4_NO_SD);
  39754. +#if 0
  39755. + /* this is now called after all inode plugins are initialized:
  39756. + do_create_vfs_child after adjust_to_parent */
  39757. + /* setup inode and file-operations for this inode */
  39758. + setup_inode_ops(object, data);
  39759. +#endif
  39760. + reiser4_seal_init(&reiser4_inode_data(object)->sd_seal, NULL, NULL);
  39761. + mask = (1 << UNIX_STAT) | (1 << LIGHT_WEIGHT_STAT);
  39762. + if (!reiser4_is_set(object->i_sb, REISER4_32_BIT_TIMES))
  39763. + mask |= (1 << LARGE_TIMES_STAT);
  39764. +
  39765. + reiser4_inode_data(object)->extmask = mask;
  39766. + return 0;
  39767. +}
  39768. +
  39769. +/* this is common implementation of adjust_to_parent method of file plugin for
  39770. + regular files
  39771. + */
  39772. +int adjust_to_parent_common(struct inode *object /* new object */ ,
  39773. + struct inode *parent /* parent directory */ ,
  39774. + struct inode *root/* root directory */)
  39775. +{
  39776. + assert("nikita-2165", object != NULL);
  39777. + if (parent == NULL)
  39778. + parent = root;
  39779. + assert("nikita-2069", parent != NULL);
  39780. +
  39781. + /*
  39782. + * inherit missing plugins from parent
  39783. + */
  39784. +
  39785. + grab_plugin_pset(object, parent, PSET_FILE);
  39786. + grab_plugin_pset(object, parent, PSET_SD);
  39787. + grab_plugin_pset(object, parent, PSET_FORMATTING);
  39788. + grab_plugin_pset(object, parent, PSET_PERM);
  39789. + return 0;
  39790. +}
  39791. +
  39792. +/* this is common implementation of adjust_to_parent method of file plugin for
  39793. + typical directories
  39794. + */
  39795. +int adjust_to_parent_common_dir(struct inode *object /* new object */ ,
  39796. + struct inode *parent /* parent directory */ ,
  39797. + struct inode *root/* root directory */)
  39798. +{
  39799. + int result = 0;
  39800. + pset_member memb;
  39801. +
  39802. + assert("nikita-2166", object != NULL);
  39803. + if (parent == NULL)
  39804. + parent = root;
  39805. + assert("nikita-2167", parent != NULL);
  39806. +
  39807. + /*
  39808. + * inherit missing plugins from parent
  39809. + */
  39810. + for (memb = 0; memb < PSET_LAST; ++memb) {
  39811. + result = grab_plugin_pset(object, parent, memb);
  39812. + if (result != 0)
  39813. + break;
  39814. + }
  39815. + return result;
  39816. +}
  39817. +
  39818. +int adjust_to_parent_cryptcompress(struct inode *object /* new object */ ,
  39819. + struct inode *parent /* parent directory */,
  39820. + struct inode *root/* root directory */)
  39821. +{
  39822. + int result;
  39823. + result = adjust_to_parent_common(object, parent, root);
  39824. + if (result)
  39825. + return result;
  39826. + assert("edward-1416", parent != NULL);
  39827. +
  39828. + grab_plugin_pset(object, parent, PSET_CLUSTER);
  39829. + grab_plugin_pset(object, parent, PSET_CIPHER);
  39830. + grab_plugin_pset(object, parent, PSET_DIGEST);
  39831. + grab_plugin_pset(object, parent, PSET_COMPRESSION);
  39832. + grab_plugin_pset(object, parent, PSET_COMPRESSION_MODE);
  39833. +
  39834. + return 0;
  39835. +}
  39836. +
  39837. +/* this is common implementation of create_object method of file plugin
  39838. + */
  39839. +int reiser4_create_object_common(struct inode *object, struct inode *parent,
  39840. + reiser4_object_create_data * data)
  39841. +{
  39842. + reiser4_block_nr reserve;
  39843. + assert("nikita-744", object != NULL);
  39844. + assert("nikita-745", parent != NULL);
  39845. + assert("nikita-747", data != NULL);
  39846. + assert("nikita-748", reiser4_inode_get_flag(object, REISER4_NO_SD));
  39847. +
  39848. + reserve = estimate_create_common(object);
  39849. + if (reiser4_grab_space(reserve, BA_CAN_COMMIT))
  39850. + return RETERR(-ENOSPC);
  39851. + return write_sd_by_inode_common(object);
  39852. +}
  39853. +
  39854. +static int common_object_delete_no_reserve(struct inode *inode);
  39855. +
  39856. +/**
  39857. + * reiser4_delete_object_common - delete_object of file_plugin
  39858. + * @inode: inode to be deleted
  39859. + *
  39860. + * This is common implementation of delete_object method of file_plugin. It
  39861. + * applies to object its deletion consists of removing two items - stat data
  39862. + * and safe-link.
  39863. + */
  39864. +int reiser4_delete_object_common(struct inode *inode)
  39865. +{
  39866. + int result;
  39867. +
  39868. + assert("nikita-1477", inode != NULL);
  39869. + /* FIXME: if file body deletion failed (i/o error, for instance),
  39870. + inode->i_size can be != 0 here */
  39871. + assert("nikita-3420", inode->i_size == 0 || S_ISLNK(inode->i_mode));
  39872. + assert("nikita-3421", inode->i_nlink == 0);
  39873. +
  39874. + if (!reiser4_inode_get_flag(inode, REISER4_NO_SD)) {
  39875. + reiser4_block_nr reserve;
  39876. +
  39877. + /* grab space which is needed to remove 2 items from the tree:
  39878. + stat data and safe-link */
  39879. + reserve = 2 *
  39880. + estimate_one_item_removal(reiser4_tree_by_inode(inode));
  39881. + if (reiser4_grab_space_force(reserve,
  39882. + BA_RESERVED | BA_CAN_COMMIT))
  39883. + return RETERR(-ENOSPC);
  39884. + result = common_object_delete_no_reserve(inode);
  39885. + } else
  39886. + result = 0;
  39887. + return result;
  39888. +}
  39889. +
  39890. +/**
  39891. + * reiser4_delete_dir_common - delete_object of file_plugin
  39892. + * @inode: inode to be deleted
  39893. + *
  39894. + * This is common implementation of delete_object method of file_plugin for
  39895. + * typical directory. It calls done method of dir_plugin to remove "." and
  39896. + * removes stat data and safe-link.
  39897. + */
  39898. +int reiser4_delete_dir_common(struct inode *inode)
  39899. +{
  39900. + int result;
  39901. + dir_plugin *dplug;
  39902. +
  39903. + assert("", (get_current_context() &&
  39904. + get_current_context()->trans->atom == NULL));
  39905. +
  39906. + dplug = inode_dir_plugin(inode);
  39907. + assert("vs-1101", dplug && dplug->done);
  39908. +
  39909. + /* kill cursors which might be attached to inode */
  39910. + reiser4_kill_cursors(inode);
  39911. +
  39912. + /* grab space enough for removing two items */
  39913. + if (reiser4_grab_space
  39914. + (2 * estimate_one_item_removal(reiser4_tree_by_inode(inode)),
  39915. + BA_RESERVED | BA_CAN_COMMIT))
  39916. + return RETERR(-ENOSPC);
  39917. +
  39918. + result = dplug->done(inode);
  39919. + if (!result)
  39920. + result = common_object_delete_no_reserve(inode);
  39921. + return result;
  39922. +}
  39923. +
  39924. +/* this is common implementation of add_link method of file plugin
  39925. + */
  39926. +int reiser4_add_link_common(struct inode *object, struct inode *parent)
  39927. +{
  39928. + /*
  39929. + * increment ->i_nlink and update ->i_ctime
  39930. + */
  39931. +
  39932. + INODE_INC_NLINK(object);
  39933. + object->i_ctime = current_time(object);
  39934. + return 0;
  39935. +}
  39936. +
  39937. +/* this is common implementation of rem_link method of file plugin
  39938. + */
  39939. +int reiser4_rem_link_common(struct inode *object, struct inode *parent)
  39940. +{
  39941. + assert("nikita-2021", object != NULL);
  39942. + assert("nikita-2163", object->i_nlink > 0);
  39943. +
  39944. + /*
  39945. + * decrement ->i_nlink and update ->i_ctime
  39946. + */
  39947. +
  39948. + INODE_DROP_NLINK(object);
  39949. + object->i_ctime = current_time(object);
  39950. + return 0;
  39951. +}
  39952. +
  39953. +/* this is common implementation of rem_link method of file plugin for typical
  39954. + directory
  39955. +*/
  39956. +int rem_link_common_dir(struct inode *object, struct inode *parent UNUSED_ARG)
  39957. +{
  39958. + assert("nikita-20211", object != NULL);
  39959. + assert("nikita-21631", object->i_nlink > 0);
  39960. +
  39961. + /*
  39962. + * decrement ->i_nlink and update ->i_ctime
  39963. + */
  39964. + if(object->i_nlink == 2)
  39965. + INODE_SET_NLINK(object, 0);
  39966. +
  39967. + else
  39968. + INODE_DROP_NLINK(object);
  39969. + object->i_ctime = current_time(object);
  39970. + return 0;
  39971. +}
  39972. +
  39973. +/* this is common implementation of owns_item method of file plugin
  39974. + compare objectids of keys in inode and coord */
  39975. +int owns_item_common(const struct inode *inode, /* object to check
  39976. + * against */
  39977. + const coord_t *coord/* coord to check */)
  39978. +{
  39979. + reiser4_key item_key;
  39980. + reiser4_key file_key;
  39981. +
  39982. + assert("nikita-760", inode != NULL);
  39983. + assert("nikita-761", coord != NULL);
  39984. +
  39985. + return coord_is_existing_item(coord) &&
  39986. + (get_key_objectid(build_sd_key(inode, &file_key)) ==
  39987. + get_key_objectid(item_key_by_coord(coord, &item_key)));
  39988. +}
  39989. +
  39990. +/* this is common implementation of owns_item method of file plugin
  39991. + for typical directory
  39992. +*/
  39993. +int owns_item_common_dir(const struct inode *inode,/* object to check against */
  39994. + const coord_t *coord/* coord of item to check */)
  39995. +{
  39996. + reiser4_key item_key;
  39997. +
  39998. + assert("nikita-1335", inode != NULL);
  39999. + assert("nikita-1334", coord != NULL);
  40000. +
  40001. + if (plugin_of_group(item_plugin_by_coord(coord), DIR_ENTRY_ITEM_TYPE))
  40002. + return get_key_locality(item_key_by_coord(coord, &item_key)) ==
  40003. + get_inode_oid(inode);
  40004. + else
  40005. + return owns_item_common(inode, coord);
  40006. +}
  40007. +
  40008. +/* this is common implementation of can_add_link method of file plugin
  40009. + checks whether yet another hard links to this object can be added
  40010. +*/
  40011. +int can_add_link_common(const struct inode *object/* object to check */)
  40012. +{
  40013. + assert("nikita-732", object != NULL);
  40014. +
  40015. + /* inode->i_nlink is unsigned int, so just check for integer
  40016. + overflow */
  40017. + return object->i_nlink + 1 != 0;
  40018. +}
  40019. +
  40020. +/* this is common implementation of can_rem_link method of file plugin for
  40021. + typical directory
  40022. +*/
  40023. +int can_rem_link_common_dir(const struct inode *inode)
  40024. +{
  40025. + /* is_dir_empty() returns 0 is dir is empty */
  40026. + return !is_dir_empty(inode);
  40027. +}
  40028. +
  40029. +/* this is common implementation of detach method of file plugin for typical
  40030. + directory
  40031. +*/
  40032. +int reiser4_detach_common_dir(struct inode *child, struct inode *parent)
  40033. +{
  40034. + dir_plugin *dplug;
  40035. +
  40036. + dplug = inode_dir_plugin(child);
  40037. + assert("nikita-2883", dplug != NULL);
  40038. + assert("nikita-2884", dplug->detach != NULL);
  40039. + return dplug->detach(child, parent);
  40040. +}
  40041. +
  40042. +/* this is common implementation of bind method of file plugin for typical
  40043. + directory
  40044. +*/
  40045. +int reiser4_bind_common_dir(struct inode *child, struct inode *parent)
  40046. +{
  40047. + dir_plugin *dplug;
  40048. +
  40049. + dplug = inode_dir_plugin(child);
  40050. + assert("nikita-2646", dplug != NULL);
  40051. + return dplug->attach(child, parent);
  40052. +}
  40053. +
  40054. +static int process_truncate(struct inode *, __u64 size);
  40055. +
  40056. +/* this is common implementation of safelink method of file plugin
  40057. + */
  40058. +int safelink_common(struct inode *object, reiser4_safe_link_t link, __u64 value)
  40059. +{
  40060. + int result;
  40061. +
  40062. + assert("vs-1705", get_current_context()->trans->atom == NULL);
  40063. + if (link == SAFE_UNLINK)
  40064. + /* nothing to do. iput() in the caller (process_safelink) will
  40065. + * finish with file */
  40066. + result = 0;
  40067. + else if (link == SAFE_TRUNCATE)
  40068. + result = process_truncate(object, value);
  40069. + else {
  40070. + warning("nikita-3438", "Unrecognized safe-link type: %i", link);
  40071. + result = RETERR(-EIO);
  40072. + }
  40073. + return result;
  40074. +}
  40075. +
  40076. +/* this is common implementation of estimate.create method of file plugin
  40077. + can be used when object creation involves insertion of one item (usually stat
  40078. + data) into tree
  40079. +*/
  40080. +reiser4_block_nr estimate_create_common(const struct inode *object)
  40081. +{
  40082. + return estimate_one_insert_item(reiser4_tree_by_inode(object));
  40083. +}
  40084. +
  40085. +/* this is common implementation of estimate.create method of file plugin for
  40086. + typical directory
  40087. + can be used when directory creation involves insertion of two items (usually
  40088. + stat data and item containing "." and "..") into tree
  40089. +*/
  40090. +reiser4_block_nr estimate_create_common_dir(const struct inode *object)
  40091. +{
  40092. + return 2 * estimate_one_insert_item(reiser4_tree_by_inode(object));
  40093. +}
  40094. +
  40095. +/* this is common implementation of estimate.update method of file plugin
  40096. + can be used when stat data update does not do more than inserting a unit
  40097. + into a stat data item which is probably true for most cases
  40098. +*/
  40099. +reiser4_block_nr estimate_update_common(const struct inode *inode)
  40100. +{
  40101. + return estimate_one_insert_into_item(reiser4_tree_by_inode(inode));
  40102. +}
  40103. +
  40104. +/* this is common implementation of estimate.unlink method of file plugin
  40105. + */
  40106. +reiser4_block_nr
  40107. +estimate_unlink_common(const struct inode *object UNUSED_ARG,
  40108. + const struct inode *parent UNUSED_ARG)
  40109. +{
  40110. + return 0;
  40111. +}
  40112. +
  40113. +/* this is common implementation of estimate.unlink method of file plugin for
  40114. + typical directory
  40115. +*/
  40116. +reiser4_block_nr
  40117. +estimate_unlink_common_dir(const struct inode *object,
  40118. + const struct inode *parent)
  40119. +{
  40120. + dir_plugin *dplug;
  40121. +
  40122. + dplug = inode_dir_plugin(object);
  40123. + assert("nikita-2888", dplug != NULL);
  40124. + assert("nikita-2887", dplug->estimate.unlink != NULL);
  40125. + return dplug->estimate.unlink(object, parent);
  40126. +}
  40127. +
  40128. +char *wire_write_common(struct inode *inode, char *start)
  40129. +{
  40130. + return build_inode_onwire(inode, start);
  40131. +}
  40132. +
  40133. +char *wire_read_common(char *addr, reiser4_object_on_wire * obj)
  40134. +{
  40135. + if (!obj)
  40136. + return locate_obj_key_id_onwire(addr);
  40137. + return extract_obj_key_id_from_onwire(addr, &obj->u.std.key_id);
  40138. +}
  40139. +
  40140. +struct dentry *wire_get_common(struct super_block *sb,
  40141. + reiser4_object_on_wire * obj)
  40142. +{
  40143. + struct inode *inode;
  40144. + struct dentry *dentry;
  40145. + reiser4_key key;
  40146. +
  40147. + extract_key_from_id(&obj->u.std.key_id, &key);
  40148. + inode = reiser4_iget(sb, &key, 1);
  40149. + if (!IS_ERR(inode)) {
  40150. + reiser4_iget_complete(inode);
  40151. + dentry = d_obtain_alias(inode);
  40152. + if (!IS_ERR(dentry))
  40153. + dentry->d_op = &get_super_private(sb)->ops.dentry;
  40154. + } else if (PTR_ERR(inode) == -ENOENT)
  40155. + /*
  40156. + * inode wasn't found at the key encoded in the file
  40157. + * handle. Hence, file handle is stale.
  40158. + */
  40159. + dentry = ERR_PTR(RETERR(-ESTALE));
  40160. + else
  40161. + dentry = (void *)inode;
  40162. + return dentry;
  40163. +}
  40164. +
  40165. +int wire_size_common(struct inode *inode)
  40166. +{
  40167. + return inode_onwire_size(inode);
  40168. +}
  40169. +
  40170. +void wire_done_common(reiser4_object_on_wire * obj)
  40171. +{
  40172. + /* nothing to do */
  40173. +}
  40174. +
  40175. +/* helper function to print errors */
  40176. +static void key_warning(const reiser4_key * key /* key to print */ ,
  40177. + const struct inode *inode,
  40178. + int code/* error code to print */)
  40179. +{
  40180. + assert("nikita-716", key != NULL);
  40181. +
  40182. + if (code != -ENOMEM) {
  40183. + warning("nikita-717", "Error for inode %llu (%i)",
  40184. + (unsigned long long)get_key_objectid(key), code);
  40185. + reiser4_print_key("for key", key);
  40186. + }
  40187. +}
  40188. +
  40189. +/* NIKITA-FIXME-HANS: perhaps this function belongs in another file? */
  40190. +#if REISER4_DEBUG
  40191. +static void
  40192. +check_inode_seal(const struct inode *inode,
  40193. + const coord_t *coord, const reiser4_key * key)
  40194. +{
  40195. + reiser4_key unit_key;
  40196. +
  40197. + unit_key_by_coord(coord, &unit_key);
  40198. + assert("nikita-2752",
  40199. + WITH_DATA_RET(coord->node, 1, keyeq(key, &unit_key)));
  40200. + assert("nikita-2753", get_inode_oid(inode) == get_key_objectid(key));
  40201. +}
  40202. +
  40203. +static void check_sd_coord(coord_t *coord, const reiser4_key * key)
  40204. +{
  40205. + reiser4_key ukey;
  40206. +
  40207. + coord_clear_iplug(coord);
  40208. + if (zload(coord->node))
  40209. + return;
  40210. +
  40211. + if (!coord_is_existing_unit(coord) ||
  40212. + !item_plugin_by_coord(coord) ||
  40213. + !keyeq(unit_key_by_coord(coord, &ukey), key) ||
  40214. + (znode_get_level(coord->node) != LEAF_LEVEL) ||
  40215. + !item_is_statdata(coord)) {
  40216. + warning("nikita-1901", "Conspicuous seal");
  40217. + reiser4_print_key("key", key);
  40218. + print_coord("coord", coord, 1);
  40219. + impossible("nikita-2877", "no way");
  40220. + }
  40221. + zrelse(coord->node);
  40222. +}
  40223. +
  40224. +#else
  40225. +#define check_inode_seal(inode, coord, key) noop
  40226. +#define check_sd_coord(coord, key) noop
  40227. +#endif
  40228. +
  40229. +/* insert new stat-data into tree. Called with inode state
  40230. + locked. Return inode state locked. */
  40231. +static int insert_new_sd(struct inode *inode/* inode to create sd for */)
  40232. +{
  40233. + int result;
  40234. + reiser4_key key;
  40235. + coord_t coord;
  40236. + reiser4_item_data data;
  40237. + char *area;
  40238. + reiser4_inode *ref;
  40239. + lock_handle lh;
  40240. + oid_t oid;
  40241. +
  40242. + assert("nikita-723", inode != NULL);
  40243. + assert("nikita-3406", reiser4_inode_get_flag(inode, REISER4_NO_SD));
  40244. +
  40245. + ref = reiser4_inode_data(inode);
  40246. + spin_lock_inode(inode);
  40247. +
  40248. + if (ref->plugin_mask != 0)
  40249. + /* inode has non-standard plugins */
  40250. + inode_set_extension(inode, PLUGIN_STAT);
  40251. + /*
  40252. + * prepare specification of new item to be inserted
  40253. + */
  40254. +
  40255. + data.iplug = inode_sd_plugin(inode);
  40256. + data.length = data.iplug->s.sd.save_len(inode);
  40257. + spin_unlock_inode(inode);
  40258. +
  40259. + data.data = NULL;
  40260. + data.user = 0;
  40261. +/* could be optimized for case where there is only one node format in
  40262. + * use in the filesystem, probably there are lots of such
  40263. + * places we could optimize for only one node layout.... -Hans */
  40264. + if (data.length > reiser4_tree_by_inode(inode)->nplug->max_item_size()) {
  40265. + /* This is silly check, but we don't know actual node where
  40266. + insertion will go into. */
  40267. + return RETERR(-ENAMETOOLONG);
  40268. + }
  40269. + oid = oid_allocate(inode->i_sb);
  40270. +/* NIKITA-FIXME-HANS: what is your opinion on whether this error check should be
  40271. + * encapsulated into oid_allocate? */
  40272. + if (oid == ABSOLUTE_MAX_OID)
  40273. + return RETERR(-EOVERFLOW);
  40274. +
  40275. + set_inode_oid(inode, oid);
  40276. +
  40277. + coord_init_zero(&coord);
  40278. + init_lh(&lh);
  40279. +
  40280. + result = insert_by_key(reiser4_tree_by_inode(inode),
  40281. + build_sd_key(inode, &key), &data, &coord, &lh,
  40282. + /* stat data lives on a leaf level */
  40283. + LEAF_LEVEL, CBK_UNIQUE);
  40284. +
  40285. + /* we don't want to re-check that somebody didn't insert
  40286. + stat-data while we were doing io, because if it did,
  40287. + insert_by_key() returned error. */
  40288. + /* but what _is_ possible is that plugin for inode's stat-data,
  40289. + list of non-standard plugins or their state would change
  40290. + during io, so that stat-data wouldn't fit into sd. To avoid
  40291. + this race we keep inode_state lock. This lock has to be
  40292. + taken each time you access inode in a way that would cause
  40293. + changes in sd size: changing plugins etc.
  40294. + */
  40295. +
  40296. + if (result == IBK_INSERT_OK) {
  40297. + coord_clear_iplug(&coord);
  40298. + result = zload(coord.node);
  40299. + if (result == 0) {
  40300. + /* have we really inserted stat data? */
  40301. + assert("nikita-725", item_is_statdata(&coord));
  40302. +
  40303. + /* inode was just created. It is inserted into hash
  40304. + table, but no directory entry was yet inserted into
  40305. + parent. So, inode is inaccessible through
  40306. + ->lookup(). All places that directly grab inode
  40307. + from hash-table (like old knfsd), should check
  40308. + IMMUTABLE flag that is set by common_create_child.
  40309. + */
  40310. + assert("nikita-3240", data.iplug != NULL);
  40311. + assert("nikita-3241", data.iplug->s.sd.save != NULL);
  40312. + area = item_body_by_coord(&coord);
  40313. + result = data.iplug->s.sd.save(inode, &area);
  40314. + znode_make_dirty(coord.node);
  40315. + if (result == 0) {
  40316. + /* object has stat-data now */
  40317. + reiser4_inode_clr_flag(inode, REISER4_NO_SD);
  40318. + reiser4_inode_set_flag(inode,
  40319. + REISER4_SDLEN_KNOWN);
  40320. + /* initialise stat-data seal */
  40321. + reiser4_seal_init(&ref->sd_seal, &coord, &key);
  40322. + ref->sd_coord = coord;
  40323. + check_inode_seal(inode, &coord, &key);
  40324. + } else if (result != -ENOMEM)
  40325. + /*
  40326. + * convert any other error code to -EIO to
  40327. + * avoid confusing user level with unexpected
  40328. + * errors.
  40329. + */
  40330. + result = RETERR(-EIO);
  40331. + zrelse(coord.node);
  40332. + }
  40333. + }
  40334. + done_lh(&lh);
  40335. +
  40336. + if (result != 0)
  40337. + key_warning(&key, inode, result);
  40338. + else
  40339. + oid_count_allocated();
  40340. +
  40341. + return result;
  40342. +}
  40343. +
  40344. +/* find sd of inode in a tree, deal with errors */
  40345. +int lookup_sd(struct inode *inode /* inode to look sd for */ ,
  40346. + znode_lock_mode lock_mode /* lock mode */ ,
  40347. + coord_t *coord /* resulting coord */ ,
  40348. + lock_handle * lh /* resulting lock handle */ ,
  40349. + const reiser4_key * key /* resulting key */ ,
  40350. + int silent)
  40351. +{
  40352. + int result;
  40353. + __u32 flags;
  40354. +
  40355. + assert("nikita-1692", inode != NULL);
  40356. + assert("nikita-1693", coord != NULL);
  40357. + assert("nikita-1694", key != NULL);
  40358. +
  40359. + /* look for the object's stat data in a tree.
  40360. + This returns in "node" pointer to a locked znode and in "pos"
  40361. + position of an item found in node. Both are only valid if
  40362. + coord_found is returned. */
  40363. + flags = (lock_mode == ZNODE_WRITE_LOCK) ? CBK_FOR_INSERT : 0;
  40364. + flags |= CBK_UNIQUE;
  40365. + /*
  40366. + * traverse tree to find stat data. We cannot use vroot here, because
  40367. + * it only covers _body_ of the file, and stat data don't belong
  40368. + * there.
  40369. + */
  40370. + result = coord_by_key(reiser4_tree_by_inode(inode),
  40371. + key,
  40372. + coord,
  40373. + lh,
  40374. + lock_mode,
  40375. + FIND_EXACT, LEAF_LEVEL, LEAF_LEVEL, flags, NULL);
  40376. + if (REISER4_DEBUG && result == 0)
  40377. + check_sd_coord(coord, key);
  40378. +
  40379. + if (result != 0 && !silent)
  40380. + key_warning(key, inode, result);
  40381. + return result;
  40382. +}
  40383. +
  40384. +static int
  40385. +locate_inode_sd(struct inode *inode,
  40386. + reiser4_key * key, coord_t *coord, lock_handle * lh)
  40387. +{
  40388. + reiser4_inode *state;
  40389. + seal_t seal;
  40390. + int result;
  40391. +
  40392. + assert("nikita-3483", inode != NULL);
  40393. +
  40394. + state = reiser4_inode_data(inode);
  40395. + spin_lock_inode(inode);
  40396. + *coord = state->sd_coord;
  40397. + coord_clear_iplug(coord);
  40398. + seal = state->sd_seal;
  40399. + spin_unlock_inode(inode);
  40400. +
  40401. + build_sd_key(inode, key);
  40402. + /* first, try to use seal */
  40403. + if (reiser4_seal_is_set(&seal)) {
  40404. + result = reiser4_seal_validate(&seal,
  40405. + coord,
  40406. + key,
  40407. + lh, ZNODE_WRITE_LOCK,
  40408. + ZNODE_LOCK_LOPRI);
  40409. + if (result == 0) {
  40410. + check_sd_coord(coord, key);
  40411. + return 0;
  40412. + }
  40413. + }
  40414. + /* hint is invalid,
  40415. + * so traverse tree
  40416. + */
  40417. + coord_init_zero(coord);
  40418. + return lookup_sd(inode, ZNODE_WRITE_LOCK, coord, lh, key, 0);
  40419. +}
  40420. +
  40421. +#if REISER4_DEBUG
  40422. +static int all_but_offset_key_eq(const reiser4_key * k1, const reiser4_key * k2)
  40423. +{
  40424. + return (get_key_locality(k1) == get_key_locality(k2) &&
  40425. + get_key_type(k1) == get_key_type(k2) &&
  40426. + get_key_band(k1) == get_key_band(k2) &&
  40427. + get_key_ordering(k1) == get_key_ordering(k2) &&
  40428. + get_key_objectid(k1) == get_key_objectid(k2));
  40429. +}
  40430. +
  40431. +#include "../tree_walk.h"
  40432. +
  40433. +/* make some checks before and after stat-data resize operation */
  40434. +static int check_sd_resize(struct inode *inode, coord_t *coord,
  40435. + int length, int progress/* 1 means after resize */)
  40436. +{
  40437. + int ret = 0;
  40438. + lock_handle left_lock;
  40439. + coord_t left_coord;
  40440. + reiser4_key left_key;
  40441. + reiser4_key key;
  40442. +
  40443. + if (inode_file_plugin(inode) !=
  40444. + file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID))
  40445. + return 0;
  40446. + if (!length)
  40447. + return 0;
  40448. + if (coord->item_pos != 0)
  40449. + return 0;
  40450. +
  40451. + init_lh(&left_lock);
  40452. + ret = reiser4_get_left_neighbor(&left_lock,
  40453. + coord->node,
  40454. + ZNODE_WRITE_LOCK,
  40455. + GN_CAN_USE_UPPER_LEVELS);
  40456. + if (ret == -E_REPEAT || ret == -E_NO_NEIGHBOR ||
  40457. + ret == -ENOENT || ret == -EINVAL
  40458. + || ret == -E_DEADLOCK) {
  40459. + ret = 0;
  40460. + goto exit;
  40461. + }
  40462. + ret = zload(left_lock.node);
  40463. + if (ret)
  40464. + goto exit;
  40465. + coord_init_last_unit(&left_coord, left_lock.node);
  40466. + item_key_by_coord(&left_coord, &left_key);
  40467. + item_key_by_coord(coord, &key);
  40468. +
  40469. + if (all_but_offset_key_eq(&key, &left_key))
  40470. + /* corruption occured */
  40471. + ret = 1;
  40472. + zrelse(left_lock.node);
  40473. + exit:
  40474. + done_lh(&left_lock);
  40475. + return ret;
  40476. +}
  40477. +#endif
  40478. +
  40479. +/* update stat-data at @coord */
  40480. +static int
  40481. +update_sd_at(struct inode *inode, coord_t *coord, reiser4_key * key,
  40482. + lock_handle * lh)
  40483. +{
  40484. + int result;
  40485. + reiser4_item_data data;
  40486. + char *area;
  40487. + reiser4_inode *state;
  40488. + znode *loaded;
  40489. +
  40490. + state = reiser4_inode_data(inode);
  40491. +
  40492. + coord_clear_iplug(coord);
  40493. + result = zload(coord->node);
  40494. + if (result != 0)
  40495. + return result;
  40496. + loaded = coord->node;
  40497. +
  40498. + spin_lock_inode(inode);
  40499. + assert("nikita-728", inode_sd_plugin(inode) != NULL);
  40500. + data.iplug = inode_sd_plugin(inode);
  40501. +
  40502. + /* if inode has non-standard plugins, add appropriate stat data
  40503. + * extension */
  40504. + if (state->extmask & (1 << PLUGIN_STAT)) {
  40505. + if (state->plugin_mask == 0)
  40506. + inode_clr_extension(inode, PLUGIN_STAT);
  40507. + } else if (state->plugin_mask != 0)
  40508. + inode_set_extension(inode, PLUGIN_STAT);
  40509. +
  40510. + if (state->extmask & (1 << HEIR_STAT)) {
  40511. + if (state->heir_mask == 0)
  40512. + inode_clr_extension(inode, HEIR_STAT);
  40513. + } else if (state->heir_mask != 0)
  40514. + inode_set_extension(inode, HEIR_STAT);
  40515. +
  40516. + /* data.length is how much space to add to (or remove
  40517. + from if negative) sd */
  40518. + if (!reiser4_inode_get_flag(inode, REISER4_SDLEN_KNOWN)) {
  40519. + /* recalculate stat-data length */
  40520. + data.length =
  40521. + data.iplug->s.sd.save_len(inode) -
  40522. + item_length_by_coord(coord);
  40523. + reiser4_inode_set_flag(inode, REISER4_SDLEN_KNOWN);
  40524. + } else
  40525. + data.length = 0;
  40526. + spin_unlock_inode(inode);
  40527. +
  40528. + /* if on-disk stat data is of different length than required
  40529. + for this inode, resize it */
  40530. +
  40531. + if (data.length != 0) {
  40532. + data.data = NULL;
  40533. + data.user = 0;
  40534. +
  40535. + assert("edward-1441",
  40536. + !check_sd_resize(inode, coord,
  40537. + data.length, 0/* before resize */));
  40538. +
  40539. + /* insertion code requires that insertion point (coord) was
  40540. + * between units. */
  40541. + coord->between = AFTER_UNIT;
  40542. + result = reiser4_resize_item(coord, &data, key, lh,
  40543. + COPI_DONT_SHIFT_LEFT);
  40544. + if (result != 0) {
  40545. + key_warning(key, inode, result);
  40546. + zrelse(loaded);
  40547. + return result;
  40548. + }
  40549. + if (loaded != coord->node) {
  40550. + /* reiser4_resize_item moved coord to another node.
  40551. + Zload it */
  40552. + zrelse(loaded);
  40553. + coord_clear_iplug(coord);
  40554. + result = zload(coord->node);
  40555. + if (result != 0)
  40556. + return result;
  40557. + loaded = coord->node;
  40558. + }
  40559. + assert("edward-1442",
  40560. + !check_sd_resize(inode, coord,
  40561. + data.length, 1/* after resize */));
  40562. + }
  40563. + area = item_body_by_coord(coord);
  40564. + spin_lock_inode(inode);
  40565. + result = data.iplug->s.sd.save(inode, &area);
  40566. + znode_make_dirty(coord->node);
  40567. +
  40568. + /* re-initialise stat-data seal */
  40569. +
  40570. + /*
  40571. + * coord.between was possibly skewed from AT_UNIT when stat-data size
  40572. + * was changed and new extensions were pasted into item.
  40573. + */
  40574. + coord->between = AT_UNIT;
  40575. + reiser4_seal_init(&state->sd_seal, coord, key);
  40576. + state->sd_coord = *coord;
  40577. + spin_unlock_inode(inode);
  40578. + check_inode_seal(inode, coord, key);
  40579. + zrelse(loaded);
  40580. + return result;
  40581. +}
  40582. +
  40583. +/* Update existing stat-data in a tree. Called with inode state locked. Return
  40584. + inode state locked. */
  40585. +static int update_sd(struct inode *inode/* inode to update sd for */)
  40586. +{
  40587. + int result;
  40588. + reiser4_key key;
  40589. + coord_t coord;
  40590. + lock_handle lh;
  40591. +
  40592. + assert("nikita-726", inode != NULL);
  40593. +
  40594. + /* no stat-data, nothing to update?! */
  40595. + assert("nikita-3482", !reiser4_inode_get_flag(inode, REISER4_NO_SD));
  40596. +
  40597. + init_lh(&lh);
  40598. +
  40599. + result = locate_inode_sd(inode, &key, &coord, &lh);
  40600. + if (result == 0)
  40601. + result = update_sd_at(inode, &coord, &key, &lh);
  40602. + done_lh(&lh);
  40603. +
  40604. + return result;
  40605. +}
  40606. +
  40607. +/* helper for reiser4_delete_object_common and reiser4_delete_dir_common.
  40608. + Remove object stat data. Space for that must be reserved by caller before
  40609. +*/
  40610. +static int
  40611. +common_object_delete_no_reserve(struct inode *inode/* object to remove */)
  40612. +{
  40613. + int result;
  40614. +
  40615. + assert("nikita-1477", inode != NULL);
  40616. +
  40617. + if (!reiser4_inode_get_flag(inode, REISER4_NO_SD)) {
  40618. + reiser4_key sd_key;
  40619. +
  40620. + build_sd_key(inode, &sd_key);
  40621. + result =
  40622. + reiser4_cut_tree(reiser4_tree_by_inode(inode),
  40623. + &sd_key, &sd_key, NULL, 0);
  40624. + if (result == 0) {
  40625. + reiser4_inode_set_flag(inode, REISER4_NO_SD);
  40626. + result = oid_release(inode->i_sb, get_inode_oid(inode));
  40627. + if (result == 0) {
  40628. + oid_count_released();
  40629. +
  40630. + result = safe_link_del(reiser4_tree_by_inode(inode),
  40631. + get_inode_oid(inode),
  40632. + SAFE_UNLINK);
  40633. + }
  40634. + }
  40635. + } else
  40636. + result = 0;
  40637. + return result;
  40638. +}
  40639. +
  40640. +/* helper for safelink_common */
  40641. +static int process_truncate(struct inode *inode, __u64 size)
  40642. +{
  40643. + int result;
  40644. + struct iattr attr;
  40645. + file_plugin *fplug;
  40646. + reiser4_context *ctx;
  40647. + struct dentry dentry;
  40648. +
  40649. + assert("vs-21", is_in_reiser4_context());
  40650. + ctx = reiser4_init_context(inode->i_sb);
  40651. + assert("vs-22", !IS_ERR(ctx));
  40652. +
  40653. + attr.ia_size = size;
  40654. + attr.ia_valid = ATTR_SIZE | ATTR_CTIME;
  40655. + fplug = inode_file_plugin(inode);
  40656. +
  40657. + inode_lock(inode);
  40658. + assert("vs-1704", get_current_context()->trans->atom == NULL);
  40659. + dentry.d_inode = inode;
  40660. + result = inode->i_op->setattr(&dentry, &attr);
  40661. + inode_unlock(inode);
  40662. +
  40663. + context_set_commit_async(ctx);
  40664. + reiser4_exit_context(ctx);
  40665. +
  40666. + return result;
  40667. +}
  40668. +
  40669. +/*
  40670. + Local variables:
  40671. + c-indentation-style: "K&R"
  40672. + mode-name: "LC"
  40673. + c-basic-offset: 8
  40674. + tab-width: 8
  40675. + fill-column: 80
  40676. + scroll-step: 1
  40677. + End:
  40678. +*/
  40679. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/hash.c linux-4.14.2/fs/reiser4/plugin/hash.c
  40680. --- linux-4.14.2.orig/fs/reiser4/plugin/hash.c 1970-01-01 01:00:00.000000000 +0100
  40681. +++ linux-4.14.2/fs/reiser4/plugin/hash.c 2017-11-26 22:13:09.000000000 +0100
  40682. @@ -0,0 +1,352 @@
  40683. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  40684. + * reiser4/README */
  40685. +
  40686. +/* Hash functions */
  40687. +
  40688. +#include "../debug.h"
  40689. +#include "plugin_header.h"
  40690. +#include "plugin.h"
  40691. +#include "../super.h"
  40692. +#include "../inode.h"
  40693. +
  40694. +#include <linux/types.h>
  40695. +
  40696. +/* old rupasov (yura) hash */
  40697. +static __u64 hash_rupasov(const unsigned char *name /* name to hash */ ,
  40698. + int len/* @name's length */)
  40699. +{
  40700. + int i;
  40701. + int j;
  40702. + int pow;
  40703. + __u64 a;
  40704. + __u64 c;
  40705. +
  40706. + assert("nikita-672", name != NULL);
  40707. + assert("nikita-673", len >= 0);
  40708. +
  40709. + for (pow = 1, i = 1; i < len; ++i)
  40710. + pow = pow * 10;
  40711. +
  40712. + if (len == 1)
  40713. + a = name[0] - 48;
  40714. + else
  40715. + a = (name[0] - 48) * pow;
  40716. +
  40717. + for (i = 1; i < len; ++i) {
  40718. + c = name[i] - 48;
  40719. + for (pow = 1, j = i; j < len - 1; ++j)
  40720. + pow = pow * 10;
  40721. + a = a + c * pow;
  40722. + }
  40723. + for (; i < 40; ++i) {
  40724. + c = '0' - 48;
  40725. + for (pow = 1, j = i; j < len - 1; ++j)
  40726. + pow = pow * 10;
  40727. + a = a + c * pow;
  40728. + }
  40729. +
  40730. + for (; i < 256; ++i) {
  40731. + c = i;
  40732. + for (pow = 1, j = i; j < len - 1; ++j)
  40733. + pow = pow * 10;
  40734. + a = a + c * pow;
  40735. + }
  40736. +
  40737. + a = a << 7;
  40738. + return a;
  40739. +}
  40740. +
  40741. +/* r5 hash */
  40742. +static __u64 hash_r5(const unsigned char *name /* name to hash */ ,
  40743. + int len UNUSED_ARG/* @name's length */)
  40744. +{
  40745. + __u64 a = 0;
  40746. +
  40747. + assert("nikita-674", name != NULL);
  40748. + assert("nikita-675", len >= 0);
  40749. +
  40750. + while (*name) {
  40751. + a += *name << 4;
  40752. + a += *name >> 4;
  40753. + a *= 11;
  40754. + name++;
  40755. + }
  40756. + return a;
  40757. +}
  40758. +
  40759. +/* Keyed 32-bit hash function using TEA in a Davis-Meyer function
  40760. + H0 = Key
  40761. + Hi = E Mi(Hi-1) + Hi-1
  40762. +
  40763. + (see Applied Cryptography, 2nd edition, p448).
  40764. +
  40765. + Jeremy Fitzhardinge <jeremy@zip.com.au> 1998
  40766. +
  40767. + Jeremy has agreed to the contents of reiserfs/README. -Hans
  40768. +
  40769. + This code was blindly upgraded to __u64 by s/__u32/__u64/g.
  40770. +*/
  40771. +static __u64 hash_tea(const unsigned char *name /* name to hash */ ,
  40772. + int len/* @name's length */)
  40773. +{
  40774. + __u64 k[] = { 0x9464a485u, 0x542e1a94u, 0x3e846bffu, 0xb75bcfc3u };
  40775. +
  40776. + __u64 h0 = k[0], h1 = k[1];
  40777. + __u64 a, b, c, d;
  40778. + __u64 pad;
  40779. + int i;
  40780. +
  40781. + assert("nikita-676", name != NULL);
  40782. + assert("nikita-677", len >= 0);
  40783. +
  40784. +#define DELTA 0x9E3779B9u
  40785. +#define FULLROUNDS 10 /* 32 is overkill, 16 is strong crypto */
  40786. +#define PARTROUNDS 6 /* 6 gets complete mixing */
  40787. +
  40788. +/* a, b, c, d - data; h0, h1 - accumulated hash */
  40789. +#define TEACORE(rounds) \
  40790. + do { \
  40791. + __u64 sum = 0; \
  40792. + int n = rounds; \
  40793. + __u64 b0, b1; \
  40794. + \
  40795. + b0 = h0; \
  40796. + b1 = h1; \
  40797. + \
  40798. + do { \
  40799. + sum += DELTA; \
  40800. + b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); \
  40801. + b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); \
  40802. + } while (--n); \
  40803. + \
  40804. + h0 += b0; \
  40805. + h1 += b1; \
  40806. + } while (0)
  40807. +
  40808. + pad = (__u64) len | ((__u64) len << 8);
  40809. + pad |= pad << 16;
  40810. +
  40811. + while (len >= 16) {
  40812. + a = (__u64) name[0] | (__u64) name[1] << 8 | (__u64) name[2] <<
  40813. + 16 | (__u64) name[3] << 24;
  40814. + b = (__u64) name[4] | (__u64) name[5] << 8 | (__u64) name[6] <<
  40815. + 16 | (__u64) name[7] << 24;
  40816. + c = (__u64) name[8] | (__u64) name[9] << 8 | (__u64) name[10] <<
  40817. + 16 | (__u64) name[11] << 24;
  40818. + d = (__u64) name[12] | (__u64) name[13] << 8 | (__u64) name[14]
  40819. + << 16 | (__u64) name[15] << 24;
  40820. +
  40821. + TEACORE(PARTROUNDS);
  40822. +
  40823. + len -= 16;
  40824. + name += 16;
  40825. + }
  40826. +
  40827. + if (len >= 12) {
  40828. + /* assert(len < 16); */
  40829. + if (len >= 16)
  40830. + *(int *)0 = 0;
  40831. +
  40832. + a = (__u64) name[0] | (__u64) name[1] << 8 | (__u64) name[2] <<
  40833. + 16 | (__u64) name[3] << 24;
  40834. + b = (__u64) name[4] | (__u64) name[5] << 8 | (__u64) name[6] <<
  40835. + 16 | (__u64) name[7] << 24;
  40836. + c = (__u64) name[8] | (__u64) name[9] << 8 | (__u64) name[10] <<
  40837. + 16 | (__u64) name[11] << 24;
  40838. +
  40839. + d = pad;
  40840. + for (i = 12; i < len; i++) {
  40841. + d <<= 8;
  40842. + d |= name[i];
  40843. + }
  40844. + } else if (len >= 8) {
  40845. + /* assert(len < 12); */
  40846. + if (len >= 12)
  40847. + *(int *)0 = 0;
  40848. + a = (__u64) name[0] | (__u64) name[1] << 8 | (__u64) name[2] <<
  40849. + 16 | (__u64) name[3] << 24;
  40850. + b = (__u64) name[4] | (__u64) name[5] << 8 | (__u64) name[6] <<
  40851. + 16 | (__u64) name[7] << 24;
  40852. +
  40853. + c = d = pad;
  40854. + for (i = 8; i < len; i++) {
  40855. + c <<= 8;
  40856. + c |= name[i];
  40857. + }
  40858. + } else if (len >= 4) {
  40859. + /* assert(len < 8); */
  40860. + if (len >= 8)
  40861. + *(int *)0 = 0;
  40862. + a = (__u64) name[0] | (__u64) name[1] << 8 | (__u64) name[2] <<
  40863. + 16 | (__u64) name[3] << 24;
  40864. +
  40865. + b = c = d = pad;
  40866. + for (i = 4; i < len; i++) {
  40867. + b <<= 8;
  40868. + b |= name[i];
  40869. + }
  40870. + } else {
  40871. + /* assert(len < 4); */
  40872. + if (len >= 4)
  40873. + *(int *)0 = 0;
  40874. + a = b = c = d = pad;
  40875. + for (i = 0; i < len; i++) {
  40876. + a <<= 8;
  40877. + a |= name[i];
  40878. + }
  40879. + }
  40880. +
  40881. + TEACORE(FULLROUNDS);
  40882. +
  40883. +/* return 0;*/
  40884. + return h0 ^ h1;
  40885. +
  40886. +}
  40887. +
  40888. +/* classical 64 bit Fowler/Noll/Vo-1 (FNV-1) hash.
  40889. +
  40890. + See http://www.isthe.com/chongo/tech/comp/fnv/ for details.
  40891. +
  40892. + Excerpts:
  40893. +
  40894. + FNV hashes are designed to be fast while maintaining a low collision
  40895. + rate.
  40896. +
  40897. + [This version also seems to preserve lexicographical order locally.]
  40898. +
  40899. + FNV hash algorithms and source code have been released into the public
  40900. + domain.
  40901. +
  40902. +*/
  40903. +static __u64 hash_fnv1(const unsigned char *name /* name to hash */ ,
  40904. + int len UNUSED_ARG/* @name's length */)
  40905. +{
  40906. + unsigned long long a = 0xcbf29ce484222325ull;
  40907. + const unsigned long long fnv_64_prime = 0x100000001b3ull;
  40908. +
  40909. + assert("nikita-678", name != NULL);
  40910. + assert("nikita-679", len >= 0);
  40911. +
  40912. + /* FNV-1 hash each octet in the buffer */
  40913. + for (; *name; ++name) {
  40914. + /* multiply by the 32 bit FNV magic prime mod 2^64 */
  40915. + a *= fnv_64_prime;
  40916. + /* xor the bottom with the current octet */
  40917. + a ^= (unsigned long long)(*name);
  40918. + }
  40919. + /* return our new hash value */
  40920. + return a;
  40921. +}
  40922. +
  40923. +/* degenerate hash function used to simplify testing of non-unique key
  40924. + handling */
  40925. +static __u64 hash_deg(const unsigned char *name UNUSED_ARG /* name to hash */ ,
  40926. + int len UNUSED_ARG/* @name's length */)
  40927. +{
  40928. + return 0xc0c0c0c010101010ull;
  40929. +}
  40930. +
  40931. +static int change_hash(struct inode *inode,
  40932. + reiser4_plugin * plugin,
  40933. + pset_member memb)
  40934. +{
  40935. + int result;
  40936. +
  40937. + assert("nikita-3503", inode != NULL);
  40938. + assert("nikita-3504", plugin != NULL);
  40939. +
  40940. + assert("nikita-3505", is_reiser4_inode(inode));
  40941. + assert("nikita-3507", plugin->h.type_id == REISER4_HASH_PLUGIN_TYPE);
  40942. +
  40943. + if (!plugin_of_group(inode_file_plugin(inode), REISER4_DIRECTORY_FILE))
  40944. + return RETERR(-EINVAL);
  40945. +
  40946. + result = 0;
  40947. + if (inode_hash_plugin(inode) == NULL ||
  40948. + inode_hash_plugin(inode)->h.id != plugin->h.id) {
  40949. + if (is_dir_empty(inode) == 0)
  40950. + result = aset_set_unsafe(&reiser4_inode_data(inode)->pset,
  40951. + PSET_HASH, plugin);
  40952. + else
  40953. + result = RETERR(-ENOTEMPTY);
  40954. +
  40955. + }
  40956. + return result;
  40957. +}
  40958. +
  40959. +static reiser4_plugin_ops hash_plugin_ops = {
  40960. + .init = NULL,
  40961. + .load = NULL,
  40962. + .save_len = NULL,
  40963. + .save = NULL,
  40964. + .change = change_hash
  40965. +};
  40966. +
  40967. +/* hash plugins */
  40968. +hash_plugin hash_plugins[LAST_HASH_ID] = {
  40969. + [RUPASOV_HASH_ID] = {
  40970. + .h = {
  40971. + .type_id = REISER4_HASH_PLUGIN_TYPE,
  40972. + .id = RUPASOV_HASH_ID,
  40973. + .pops = &hash_plugin_ops,
  40974. + .label = "rupasov",
  40975. + .desc = "Original Yura's hash",
  40976. + .linkage = {NULL, NULL}
  40977. + },
  40978. + .hash = hash_rupasov
  40979. + },
  40980. + [R5_HASH_ID] = {
  40981. + .h = {
  40982. + .type_id = REISER4_HASH_PLUGIN_TYPE,
  40983. + .id = R5_HASH_ID,
  40984. + .pops = &hash_plugin_ops,
  40985. + .label = "r5",
  40986. + .desc = "r5 hash",
  40987. + .linkage = {NULL, NULL}
  40988. + },
  40989. + .hash = hash_r5
  40990. + },
  40991. + [TEA_HASH_ID] = {
  40992. + .h = {
  40993. + .type_id = REISER4_HASH_PLUGIN_TYPE,
  40994. + .id = TEA_HASH_ID,
  40995. + .pops = &hash_plugin_ops,
  40996. + .label = "tea",
  40997. + .desc = "tea hash",
  40998. + .linkage = {NULL, NULL}
  40999. + },
  41000. + .hash = hash_tea
  41001. + },
  41002. + [FNV1_HASH_ID] = {
  41003. + .h = {
  41004. + .type_id = REISER4_HASH_PLUGIN_TYPE,
  41005. + .id = FNV1_HASH_ID,
  41006. + .pops = &hash_plugin_ops,
  41007. + .label = "fnv1",
  41008. + .desc = "fnv1 hash",
  41009. + .linkage = {NULL, NULL}
  41010. + },
  41011. + .hash = hash_fnv1
  41012. + },
  41013. + [DEGENERATE_HASH_ID] = {
  41014. + .h = {
  41015. + .type_id = REISER4_HASH_PLUGIN_TYPE,
  41016. + .id = DEGENERATE_HASH_ID,
  41017. + .pops = &hash_plugin_ops,
  41018. + .label = "degenerate hash",
  41019. + .desc = "Degenerate hash: only for testing",
  41020. + .linkage = {NULL, NULL}
  41021. + },
  41022. + .hash = hash_deg
  41023. + }
  41024. +};
  41025. +
  41026. +/* Make Linus happy.
  41027. + Local variables:
  41028. + c-indentation-style: "K&R"
  41029. + mode-name: "LC"
  41030. + c-basic-offset: 8
  41031. + tab-width: 8
  41032. + fill-column: 120
  41033. + End:
  41034. +*/
  41035. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/inode_ops.c linux-4.14.2/fs/reiser4/plugin/inode_ops.c
  41036. --- linux-4.14.2.orig/fs/reiser4/plugin/inode_ops.c 1970-01-01 01:00:00.000000000 +0100
  41037. +++ linux-4.14.2/fs/reiser4/plugin/inode_ops.c 2017-11-26 22:13:09.000000000 +0100
  41038. @@ -0,0 +1,891 @@
  41039. +/*
  41040. + * Copyright 2005 by Hans Reiser, licensing governed by reiser4/README
  41041. + */
  41042. +
  41043. +/*
  41044. + * this file contains typical implementations for most of methods of struct
  41045. + * inode_operations
  41046. + */
  41047. +
  41048. +#include "../inode.h"
  41049. +#include "../safe_link.h"
  41050. +
  41051. +#include <linux/namei.h>
  41052. +
  41053. +static int create_vfs_object(struct inode *parent, struct dentry *dentry,
  41054. + reiser4_object_create_data *data);
  41055. +
  41056. +/**
  41057. + * reiser4_create_common - create of inode operations
  41058. + * @parent: inode of parent directory
  41059. + * @dentry: dentry of new object to create
  41060. + * @mode: the permissions to use
  41061. + * @exclusive:
  41062. + *
  41063. + * This is common implementation of vfs's create method of struct
  41064. + * inode_operations.
  41065. + * Creates regular file using file plugin from parent directory plugin set.
  41066. + */
  41067. +int reiser4_create_common(struct inode *parent, struct dentry *dentry,
  41068. + umode_t mode, bool exclusive)
  41069. +{
  41070. + reiser4_object_create_data data;
  41071. + file_plugin *fplug;
  41072. +
  41073. + memset(&data, 0, sizeof data);
  41074. + data.mode = S_IFREG | mode;
  41075. + fplug = child_create_plugin(parent) ? : inode_create_plugin(parent);
  41076. + if (!plugin_of_group(fplug, REISER4_REGULAR_FILE)) {
  41077. + warning("vpf-1900", "'%s' is not a regular file plugin.",
  41078. + fplug->h.label);
  41079. + return RETERR(-EIO);
  41080. + }
  41081. + data.id = fplug->h.id;
  41082. + return create_vfs_object(parent, dentry, &data);
  41083. +}
  41084. +
  41085. +int reiser4_lookup_name(struct inode *dir, struct dentry *, reiser4_key *);
  41086. +void check_light_weight(struct inode *inode, struct inode *parent);
  41087. +
  41088. +/**
  41089. + * reiser4_lookup_common - lookup of inode operations
  41090. + * @parent: inode of directory to lookup into
  41091. + * @dentry: name to look for
  41092. + * @flags:
  41093. + *
  41094. + * This is common implementation of vfs's lookup method of struct
  41095. + * inode_operations.
  41096. + */
  41097. +struct dentry *reiser4_lookup_common(struct inode *parent,
  41098. + struct dentry *dentry,
  41099. + unsigned int flags)
  41100. +{
  41101. + reiser4_context *ctx;
  41102. + int result;
  41103. + struct dentry *new;
  41104. + struct inode *inode;
  41105. + reiser4_dir_entry_desc entry;
  41106. +
  41107. + ctx = reiser4_init_context(parent->i_sb);
  41108. + if (IS_ERR(ctx))
  41109. + return (struct dentry *)ctx;
  41110. +
  41111. + /* set up operations on dentry. */
  41112. + dentry->d_op = &get_super_private(parent->i_sb)->ops.dentry;
  41113. +
  41114. + result = reiser4_lookup_name(parent, dentry, &entry.key);
  41115. + if (result) {
  41116. + context_set_commit_async(ctx);
  41117. + reiser4_exit_context(ctx);
  41118. + if (result == -ENOENT) {
  41119. + /* object not found */
  41120. + if (!IS_DEADDIR(parent))
  41121. + d_add(dentry, NULL);
  41122. + return NULL;
  41123. + }
  41124. + return ERR_PTR(result);
  41125. + }
  41126. +
  41127. + inode = reiser4_iget(parent->i_sb, &entry.key, 0);
  41128. + if (IS_ERR(inode)) {
  41129. + context_set_commit_async(ctx);
  41130. + reiser4_exit_context(ctx);
  41131. + return ERR_PTR(PTR_ERR(inode));
  41132. + }
  41133. +
  41134. + /* success */
  41135. + check_light_weight(inode, parent);
  41136. + new = d_splice_alias(inode, dentry);
  41137. + reiser4_iget_complete(inode);
  41138. +
  41139. + /* prevent balance_dirty_pages() from being called: we don't want to
  41140. + * do this under directory i_mutex. */
  41141. + context_set_commit_async(ctx);
  41142. + reiser4_exit_context(ctx);
  41143. + return new;
  41144. +}
  41145. +
  41146. +static reiser4_block_nr common_estimate_link(struct inode *parent,
  41147. + struct inode *object);
  41148. +int reiser4_update_dir(struct inode *);
  41149. +
  41150. +static inline void reiser4_check_immutable(struct inode *inode)
  41151. +{
  41152. + do {
  41153. + if (!reiser4_inode_get_flag(inode, REISER4_IMMUTABLE))
  41154. + break;
  41155. + yield();
  41156. + } while (1);
  41157. +}
  41158. +
  41159. +/**
  41160. + * reiser4_link_common - link of inode operations
  41161. + * @existing: dentry of object which is to get new name
  41162. + * @parent: directory where new name is to be created
  41163. + * @newname: new name
  41164. + *
  41165. + * This is common implementation of vfs's link method of struct
  41166. + * inode_operations.
  41167. + */
  41168. +int reiser4_link_common(struct dentry *existing, struct inode *parent,
  41169. + struct dentry *newname)
  41170. +{
  41171. + reiser4_context *ctx;
  41172. + int result;
  41173. + struct inode *object;
  41174. + dir_plugin *parent_dplug;
  41175. + reiser4_dir_entry_desc entry;
  41176. + reiser4_object_create_data data;
  41177. + reiser4_block_nr reserve;
  41178. +
  41179. + ctx = reiser4_init_context(parent->i_sb);
  41180. + if (IS_ERR(ctx))
  41181. + return PTR_ERR(ctx);
  41182. +
  41183. + assert("nikita-1431", existing != NULL);
  41184. + assert("nikita-1432", parent != NULL);
  41185. + assert("nikita-1433", newname != NULL);
  41186. +
  41187. + object = existing->d_inode;
  41188. + assert("nikita-1434", object != NULL);
  41189. +
  41190. + /* check for race with create_object() */
  41191. + reiser4_check_immutable(object);
  41192. +
  41193. + parent_dplug = inode_dir_plugin(parent);
  41194. +
  41195. + memset(&entry, 0, sizeof entry);
  41196. + entry.obj = object;
  41197. +
  41198. + data.mode = object->i_mode;
  41199. + data.id = inode_file_plugin(object)->h.id;
  41200. +
  41201. + reserve = common_estimate_link(parent, existing->d_inode);
  41202. + if ((__s64) reserve < 0) {
  41203. + context_set_commit_async(ctx);
  41204. + reiser4_exit_context(ctx);
  41205. + return reserve;
  41206. + }
  41207. +
  41208. + if (reiser4_grab_space(reserve, BA_CAN_COMMIT)) {
  41209. + context_set_commit_async(ctx);
  41210. + reiser4_exit_context(ctx);
  41211. + return RETERR(-ENOSPC);
  41212. + }
  41213. +
  41214. + /*
  41215. + * Subtle race handling: sys_link() doesn't take i_mutex on @parent. It
  41216. + * means that link(2) can race against unlink(2) or rename(2), and
  41217. + * inode is dead (->i_nlink == 0) when reiser4_link() is entered.
  41218. + *
  41219. + * For such inode we have to undo special processing done in
  41220. + * reiser4_unlink() viz. creation of safe-link.
  41221. + */
  41222. + if (unlikely(object->i_nlink == 0)) {
  41223. + result = safe_link_del(reiser4_tree_by_inode(object),
  41224. + get_inode_oid(object), SAFE_UNLINK);
  41225. + if (result != 0) {
  41226. + context_set_commit_async(ctx);
  41227. + reiser4_exit_context(ctx);
  41228. + return result;
  41229. + }
  41230. + }
  41231. +
  41232. + /* increment nlink of @existing and update its stat data */
  41233. + result = reiser4_add_nlink(object, parent, 1);
  41234. + if (result == 0) {
  41235. + /* add entry to the parent */
  41236. + result =
  41237. + parent_dplug->add_entry(parent, newname, &data, &entry);
  41238. + if (result != 0) {
  41239. + /* failed to add entry to the parent, decrement nlink
  41240. + of @existing */
  41241. + reiser4_del_nlink(object, parent, 1);
  41242. + /*
  41243. + * now, if that failed, we have a file with too big
  41244. + * nlink---space leak, much better than directory
  41245. + * entry pointing to nowhere
  41246. + */
  41247. + }
  41248. + }
  41249. + if (result == 0) {
  41250. + atomic_inc(&object->i_count);
  41251. + /*
  41252. + * Upon successful completion, link() shall mark for update
  41253. + * the st_ctime field of the file. Also, the st_ctime and
  41254. + * st_mtime fields of the directory that contains the new
  41255. + * entry shall be marked for update. --SUS
  41256. + */
  41257. + result = reiser4_update_dir(parent);
  41258. + }
  41259. + if (result == 0)
  41260. + d_instantiate(newname, existing->d_inode);
  41261. +
  41262. + context_set_commit_async(ctx);
  41263. + reiser4_exit_context(ctx);
  41264. + return result;
  41265. +}
  41266. +
  41267. +static int unlink_check_and_grab(struct inode *parent, struct dentry *victim);
  41268. +
  41269. +/**
  41270. + * reiser4_unlink_common - unlink of inode operations
  41271. + * @parent: inode of directory to remove name from
  41272. + * @victim: name to be removed
  41273. + *
  41274. + * This is common implementation of vfs's unlink method of struct
  41275. + * inode_operations.
  41276. + */
  41277. +int reiser4_unlink_common(struct inode *parent, struct dentry *victim)
  41278. +{
  41279. + reiser4_context *ctx;
  41280. + int result;
  41281. + struct inode *object;
  41282. + file_plugin *fplug;
  41283. +
  41284. + ctx = reiser4_init_context(parent->i_sb);
  41285. + if (IS_ERR(ctx))
  41286. + return PTR_ERR(ctx);
  41287. +
  41288. + object = victim->d_inode;
  41289. + fplug = inode_file_plugin(object);
  41290. + assert("nikita-2882", fplug->detach != NULL);
  41291. +
  41292. + result = unlink_check_and_grab(parent, victim);
  41293. + if (result != 0) {
  41294. + context_set_commit_async(ctx);
  41295. + reiser4_exit_context(ctx);
  41296. + return result;
  41297. + }
  41298. +
  41299. + result = fplug->detach(object, parent);
  41300. + if (result == 0) {
  41301. + dir_plugin *parent_dplug;
  41302. + reiser4_dir_entry_desc entry;
  41303. +
  41304. + parent_dplug = inode_dir_plugin(parent);
  41305. + memset(&entry, 0, sizeof entry);
  41306. +
  41307. + /* first, delete directory entry */
  41308. + result = parent_dplug->rem_entry(parent, victim, &entry);
  41309. + if (result == 0) {
  41310. + /*
  41311. + * if name was removed successfully, we _have_ to
  41312. + * return 0 from this function, because upper level
  41313. + * caller (vfs_{rmdir,unlink}) expect this.
  41314. + *
  41315. + * now that directory entry is removed, update
  41316. + * stat-data
  41317. + */
  41318. + reiser4_del_nlink(object, parent, 1);
  41319. + /*
  41320. + * Upon successful completion, unlink() shall mark for
  41321. + * update the st_ctime and st_mtime fields of the
  41322. + * parent directory. Also, if the file's link count is
  41323. + * not 0, the st_ctime field of the file shall be
  41324. + * marked for update. --SUS
  41325. + */
  41326. + reiser4_update_dir(parent);
  41327. + /* add safe-link for this file */
  41328. + if (object->i_nlink == 0)
  41329. + safe_link_add(object, SAFE_UNLINK);
  41330. + }
  41331. + }
  41332. +
  41333. + if (unlikely(result != 0)) {
  41334. + if (result != -ENOMEM)
  41335. + warning("nikita-3398", "Cannot unlink %llu (%i)",
  41336. + (unsigned long long)get_inode_oid(object),
  41337. + result);
  41338. + /* if operation failed commit pending inode modifications to
  41339. + * the stat-data */
  41340. + reiser4_update_sd(object);
  41341. + reiser4_update_sd(parent);
  41342. + }
  41343. +
  41344. + reiser4_release_reserved(object->i_sb);
  41345. +
  41346. + /* @object's i_ctime was updated by ->rem_link() method(). */
  41347. +
  41348. + /* @victim can be already removed from the disk by this time. Inode is
  41349. + then marked so that iput() wouldn't try to remove stat data. But
  41350. + inode itself is still there.
  41351. + */
  41352. +
  41353. + /*
  41354. + * we cannot release directory semaphore here, because name has
  41355. + * already been deleted, but dentry (@victim) still exists. Prevent
  41356. + * balance_dirty_pages() from being called on exiting this context: we
  41357. + * don't want to do this under directory i_mutex.
  41358. + */
  41359. + context_set_commit_async(ctx);
  41360. + reiser4_exit_context(ctx);
  41361. + return result;
  41362. +}
  41363. +
  41364. +/**
  41365. + * reiser4_symlink_common - symlink of inode operations
  41366. + * @parent: inode of parent directory
  41367. + * @dentry: dentry of object to be created
  41368. + * @linkname: string symlink is to contain
  41369. + *
  41370. + * This is common implementation of vfs's symlink method of struct
  41371. + * inode_operations.
  41372. + * Creates object using file plugin SYMLINK_FILE_PLUGIN_ID.
  41373. + */
  41374. +int reiser4_symlink_common(struct inode *parent, struct dentry *dentry,
  41375. + const char *linkname)
  41376. +{
  41377. + reiser4_object_create_data data;
  41378. +
  41379. + memset(&data, 0, sizeof data);
  41380. + data.name = linkname;
  41381. + data.id = SYMLINK_FILE_PLUGIN_ID;
  41382. + data.mode = S_IFLNK | S_IRWXUGO;
  41383. + return create_vfs_object(parent, dentry, &data);
  41384. +}
  41385. +
  41386. +/**
  41387. + * reiser4_mkdir_common - mkdir of inode operations
  41388. + * @parent: inode of parent directory
  41389. + * @dentry: dentry of object to be created
  41390. + * @mode: the permissions to use
  41391. + *
  41392. + * This is common implementation of vfs's mkdir method of struct
  41393. + * inode_operations.
  41394. + * Creates object using file plugin DIRECTORY_FILE_PLUGIN_ID.
  41395. + */
  41396. +int reiser4_mkdir_common(struct inode *parent, struct dentry *dentry, umode_t mode)
  41397. +{
  41398. + reiser4_object_create_data data;
  41399. +
  41400. + memset(&data, 0, sizeof data);
  41401. + data.mode = S_IFDIR | mode;
  41402. + data.id = DIRECTORY_FILE_PLUGIN_ID;
  41403. + return create_vfs_object(parent, dentry, &data);
  41404. +}
  41405. +
  41406. +/**
  41407. + * reiser4_mknod_common - mknod of inode operations
  41408. + * @parent: inode of parent directory
  41409. + * @dentry: dentry of object to be created
  41410. + * @mode: the permissions to use and file type
  41411. + * @rdev: minor and major of new device file
  41412. + *
  41413. + * This is common implementation of vfs's mknod method of struct
  41414. + * inode_operations.
  41415. + * Creates object using file plugin SPECIAL_FILE_PLUGIN_ID.
  41416. + */
  41417. +int reiser4_mknod_common(struct inode *parent, struct dentry *dentry,
  41418. + umode_t mode, dev_t rdev)
  41419. +{
  41420. + reiser4_object_create_data data;
  41421. +
  41422. + memset(&data, 0, sizeof data);
  41423. + data.mode = mode;
  41424. + data.rdev = rdev;
  41425. + data.id = SPECIAL_FILE_PLUGIN_ID;
  41426. + return create_vfs_object(parent, dentry, &data);
  41427. +}
  41428. +
  41429. +/*
  41430. + * implementation of vfs's rename method of struct inode_operations for typical
  41431. + * directory is in inode_ops_rename.c
  41432. + */
  41433. +
  41434. +/**
  41435. + * reiser4_get_link_common: ->get_link() of inode_operations
  41436. + * @dentry: dentry of symlink
  41437. + *
  41438. + * Assumes that inode's i_private points to the content of symbolic link.
  41439. + */
  41440. +const char *reiser4_get_link_common(struct dentry *dentry,
  41441. + struct inode *inode,
  41442. + struct delayed_call *done)
  41443. +{
  41444. + if (!dentry)
  41445. + return ERR_PTR(-ECHILD);
  41446. +
  41447. + assert("vs-851", S_ISLNK(dentry->d_inode->i_mode));
  41448. +
  41449. + if (!dentry->d_inode->i_private ||
  41450. + !reiser4_inode_get_flag(dentry->d_inode, REISER4_GENERIC_PTR_USED))
  41451. + return ERR_PTR(RETERR(-EINVAL));
  41452. +
  41453. + return dentry->d_inode->i_private;
  41454. +}
  41455. +
  41456. +/**
  41457. + * reiser4_permission_common - permission of inode operations
  41458. + * @inode: inode to check permissions for
  41459. + * @mask: mode bits to check permissions for
  41460. + * @flags:
  41461. + *
  41462. + * Uses generic function to check for rwx permissions.
  41463. + */
  41464. +int reiser4_permission_common(struct inode *inode, int mask)
  41465. +{
  41466. + // generic_permission() says that it's rcu-aware...
  41467. +#if 0
  41468. + if (mask & MAY_NOT_BLOCK)
  41469. + return -ECHILD;
  41470. +#endif
  41471. + return generic_permission(inode, mask);
  41472. +}
  41473. +
  41474. +static int setattr_reserve(reiser4_tree *);
  41475. +
  41476. +/* this is common implementation of vfs's setattr method of struct
  41477. + inode_operations
  41478. +*/
  41479. +int reiser4_setattr_common(struct dentry *dentry, struct iattr *attr)
  41480. +{
  41481. + reiser4_context *ctx;
  41482. + struct inode *inode;
  41483. + int result;
  41484. +
  41485. + inode = dentry->d_inode;
  41486. + result = setattr_prepare(dentry, attr);
  41487. + if (result)
  41488. + return result;
  41489. +
  41490. + ctx = reiser4_init_context(inode->i_sb);
  41491. + if (IS_ERR(ctx))
  41492. + return PTR_ERR(ctx);
  41493. +
  41494. + assert("nikita-3119", !(attr->ia_valid & ATTR_SIZE));
  41495. +
  41496. + /*
  41497. + * grab disk space and call standard
  41498. + * setattr_copy();
  41499. + * mark_inode_dirty().
  41500. + */
  41501. + result = setattr_reserve(reiser4_tree_by_inode(inode));
  41502. + if (!result) {
  41503. + setattr_copy(inode, attr);
  41504. + mark_inode_dirty(inode);
  41505. + result = reiser4_update_sd(inode);
  41506. + }
  41507. + context_set_commit_async(ctx);
  41508. + reiser4_exit_context(ctx);
  41509. + return result;
  41510. +}
  41511. +
  41512. +/* this is common implementation of vfs's getattr method of struct
  41513. + inode_operations
  41514. +*/
  41515. +int reiser4_getattr_common(const struct path *path, struct kstat *stat,
  41516. + u32 request_mask, unsigned int flags)
  41517. +{
  41518. + struct inode *obj;
  41519. +
  41520. + assert("nikita-2298", path != NULL);
  41521. + assert("nikita-2299", stat != NULL);
  41522. +
  41523. + obj = d_inode(path->dentry);
  41524. +
  41525. + stat->dev = obj->i_sb->s_dev;
  41526. + stat->ino = oid_to_uino(get_inode_oid(obj));
  41527. + stat->mode = obj->i_mode;
  41528. + /* don't confuse userland with huge nlink. This is not entirely
  41529. + * correct, because nlink_t is not necessary 16 bit signed. */
  41530. + stat->nlink = min(obj->i_nlink, (typeof(obj->i_nlink)) 0x7fff);
  41531. + stat->uid = obj->i_uid;
  41532. + stat->gid = obj->i_gid;
  41533. + stat->rdev = obj->i_rdev;
  41534. + stat->atime = obj->i_atime;
  41535. + stat->mtime = obj->i_mtime;
  41536. + stat->ctime = obj->i_ctime;
  41537. + stat->size = obj->i_size;
  41538. + stat->blocks =
  41539. + (inode_get_bytes(obj) + VFS_BLKSIZE - 1) >> VFS_BLKSIZE_BITS;
  41540. + /* "preferred" blocksize for efficient file system I/O */
  41541. + stat->blksize = get_super_private(obj->i_sb)->optimal_io_size;
  41542. +
  41543. + return 0;
  41544. +}
  41545. +
  41546. +/* Estimate the maximum amount of nodes which might be allocated or changed on
  41547. + typical new object creation. Typical creation consists of calling create
  41548. + method of file plugin, adding directory entry to parent and update parent
  41549. + directory's stat data.
  41550. +*/
  41551. +static reiser4_block_nr estimate_create_vfs_object(struct inode *parent,
  41552. + /* parent object */
  41553. + struct inode *object
  41554. + /* object */)
  41555. +{
  41556. + assert("vpf-309", parent != NULL);
  41557. + assert("vpf-307", object != NULL);
  41558. +
  41559. + return
  41560. + /* object creation estimation */
  41561. + inode_file_plugin(object)->estimate.create(object) +
  41562. + /* stat data of parent directory estimation */
  41563. + inode_file_plugin(parent)->estimate.update(parent) +
  41564. + /* adding entry estimation */
  41565. + inode_dir_plugin(parent)->estimate.add_entry(parent) +
  41566. + /* to undo in the case of failure */
  41567. + inode_dir_plugin(parent)->estimate.rem_entry(parent);
  41568. +}
  41569. +
  41570. +/* Create child in directory.
  41571. +
  41572. + . get object's plugin
  41573. + . get fresh inode
  41574. + . initialize inode
  41575. + . add object's stat-data
  41576. + . initialize object's directory
  41577. + . add entry to the parent
  41578. + . instantiate dentry
  41579. +
  41580. +*/
  41581. +static int do_create_vfs_child(reiser4_object_create_data * data,/* parameters
  41582. + of new
  41583. + object */
  41584. + struct inode **retobj)
  41585. +{
  41586. + int result;
  41587. +
  41588. + struct dentry *dentry; /* parent object */
  41589. + struct inode *parent; /* new name */
  41590. +
  41591. + dir_plugin *par_dir; /* directory plugin on the parent */
  41592. + dir_plugin *obj_dir; /* directory plugin on the new object */
  41593. + file_plugin *obj_plug; /* object plugin on the new object */
  41594. + struct inode *object; /* new object */
  41595. + reiser4_block_nr reserve;
  41596. +
  41597. + reiser4_dir_entry_desc entry; /* new directory entry */
  41598. +
  41599. + assert("nikita-1420", data != NULL);
  41600. + parent = data->parent;
  41601. + dentry = data->dentry;
  41602. +
  41603. + assert("nikita-1418", parent != NULL);
  41604. + assert("nikita-1419", dentry != NULL);
  41605. +
  41606. + /* check, that name is acceptable for parent */
  41607. + par_dir = inode_dir_plugin(parent);
  41608. + if (par_dir->is_name_acceptable &&
  41609. + !par_dir->is_name_acceptable(parent,
  41610. + dentry->d_name.name,
  41611. + (int)dentry->d_name.len))
  41612. + return RETERR(-ENAMETOOLONG);
  41613. +
  41614. + result = 0;
  41615. + obj_plug = file_plugin_by_id((int)data->id);
  41616. + if (obj_plug == NULL) {
  41617. + warning("nikita-430", "Cannot find plugin %i", data->id);
  41618. + return RETERR(-ENOENT);
  41619. + }
  41620. + object = new_inode(parent->i_sb);
  41621. + if (object == NULL)
  41622. + return RETERR(-ENOMEM);
  41623. + /* new_inode() initializes i_ino to "arbitrary" value. Reset it to 0,
  41624. + * to simplify error handling: if some error occurs before i_ino is
  41625. + * initialized with oid, i_ino should already be set to some
  41626. + * distinguished value. */
  41627. + object->i_ino = 0;
  41628. +
  41629. + /* So that on error iput will be called. */
  41630. + *retobj = object;
  41631. +
  41632. + memset(&entry, 0, sizeof entry);
  41633. + entry.obj = object;
  41634. +
  41635. + set_plugin(&reiser4_inode_data(object)->pset, PSET_FILE,
  41636. + file_plugin_to_plugin(obj_plug));
  41637. + result = obj_plug->set_plug_in_inode(object, parent, data);
  41638. + if (result) {
  41639. + warning("nikita-431", "Cannot install plugin %i on %llx",
  41640. + data->id, (unsigned long long)get_inode_oid(object));
  41641. + return result;
  41642. + }
  41643. +
  41644. + /* reget plugin after installation */
  41645. + obj_plug = inode_file_plugin(object);
  41646. +
  41647. + if (obj_plug->create_object == NULL) {
  41648. + return RETERR(-EPERM);
  41649. + }
  41650. +
  41651. + /* if any of hash, tail, sd or permission plugins for newly created
  41652. + object are not set yet set them here inheriting them from parent
  41653. + directory
  41654. + */
  41655. + assert("nikita-2070", obj_plug->adjust_to_parent != NULL);
  41656. + result = obj_plug->adjust_to_parent(object,
  41657. + parent,
  41658. + object->i_sb->s_root->d_inode);
  41659. + if (result == 0)
  41660. + result = finish_pset(object);
  41661. + if (result != 0) {
  41662. + warning("nikita-432", "Cannot inherit from %llx to %llx",
  41663. + (unsigned long long)get_inode_oid(parent),
  41664. + (unsigned long long)get_inode_oid(object));
  41665. + return result;
  41666. + }
  41667. +
  41668. + /* setup inode and file-operations for this inode */
  41669. + setup_inode_ops(object, data);
  41670. +
  41671. + /* call file plugin's method to initialize plugin specific part of
  41672. + * inode */
  41673. + if (obj_plug->init_inode_data)
  41674. + obj_plug->init_inode_data(object, data, 1/*create */);
  41675. +
  41676. + /* obtain directory plugin (if any) for new object. */
  41677. + obj_dir = inode_dir_plugin(object);
  41678. + if (obj_dir != NULL && obj_dir->init == NULL) {
  41679. + return RETERR(-EPERM);
  41680. + }
  41681. +
  41682. + reiser4_inode_data(object)->locality_id = get_inode_oid(parent);
  41683. +
  41684. + reserve = estimate_create_vfs_object(parent, object);
  41685. + if (reiser4_grab_space(reserve, BA_CAN_COMMIT)) {
  41686. + return RETERR(-ENOSPC);
  41687. + }
  41688. +
  41689. + /* mark inode `immutable'. We disable changes to the file being
  41690. + created until valid directory entry for it is inserted. Otherwise,
  41691. + if file were expanded and insertion of directory entry fails, we
  41692. + have to remove file, but we only alloted enough space in
  41693. + transaction to remove _empty_ file. 3.x code used to remove stat
  41694. + data in different transaction thus possibly leaking disk space on
  41695. + crash. This all only matters if it's possible to access file
  41696. + without name, for example, by inode number
  41697. + */
  41698. + reiser4_inode_set_flag(object, REISER4_IMMUTABLE);
  41699. +
  41700. + /* create empty object, this includes allocation of new objectid. For
  41701. + directories this implies creation of dot and dotdot */
  41702. + assert("nikita-2265", reiser4_inode_get_flag(object, REISER4_NO_SD));
  41703. +
  41704. + /* mark inode as `loaded'. From this point onward
  41705. + reiser4_delete_inode() will try to remove its stat-data. */
  41706. + reiser4_inode_set_flag(object, REISER4_LOADED);
  41707. +
  41708. + result = obj_plug->create_object(object, parent, data);
  41709. + if (result != 0) {
  41710. + reiser4_inode_clr_flag(object, REISER4_IMMUTABLE);
  41711. + if (result != -ENAMETOOLONG && result != -ENOMEM)
  41712. + warning("nikita-2219",
  41713. + "Failed to create sd for %llu",
  41714. + (unsigned long long)get_inode_oid(object));
  41715. + return result;
  41716. + }
  41717. +
  41718. + if (obj_dir != NULL)
  41719. + result = obj_dir->init(object, parent, data);
  41720. + if (result == 0) {
  41721. + assert("nikita-434", !reiser4_inode_get_flag(object,
  41722. + REISER4_NO_SD));
  41723. + /* insert inode into VFS hash table */
  41724. + insert_inode_hash(object);
  41725. + /* create entry */
  41726. + result = par_dir->add_entry(parent, dentry, data, &entry);
  41727. + if (result == 0) {
  41728. + /* If O_CREAT is set and the file did not previously
  41729. + exist, upon successful completion, open() shall
  41730. + mark for update the st_atime, st_ctime, and
  41731. + st_mtime fields of the file and the st_ctime and
  41732. + st_mtime fields of the parent directory. --SUS
  41733. + */
  41734. + object->i_ctime = current_time(object);
  41735. + reiser4_update_dir(parent);
  41736. + }
  41737. + if (result != 0)
  41738. + /* cleanup failure to add entry */
  41739. + obj_plug->detach(object, parent);
  41740. + } else if (result != -ENOMEM)
  41741. + warning("nikita-2219", "Failed to initialize dir for %llu: %i",
  41742. + (unsigned long long)get_inode_oid(object), result);
  41743. +
  41744. + /*
  41745. + * update stat-data, committing all pending modifications to the inode
  41746. + * fields.
  41747. + */
  41748. + reiser4_update_sd(object);
  41749. + if (result != 0) {
  41750. + /* if everything was ok (result == 0), parent stat-data is
  41751. + * already updated above (update_parent_dir()) */
  41752. + reiser4_update_sd(parent);
  41753. + /* failure to create entry, remove object */
  41754. + obj_plug->delete_object(object);
  41755. + }
  41756. +
  41757. + /* file has name now, clear immutable flag */
  41758. + reiser4_inode_clr_flag(object, REISER4_IMMUTABLE);
  41759. +
  41760. + /* on error, iput() will call ->delete_inode(). We should keep track
  41761. + of the existence of stat-data for this inode and avoid attempt to
  41762. + remove it in reiser4_delete_inode(). This is accomplished through
  41763. + REISER4_NO_SD bit in inode.u.reiser4_i.plugin.flags
  41764. + */
  41765. + return result;
  41766. +}
  41767. +
  41768. +/* this is helper for common implementations of reiser4_mkdir, reiser4_create,
  41769. + reiser4_mknod and reiser4_symlink
  41770. +*/
  41771. +static int
  41772. +create_vfs_object(struct inode *parent,
  41773. + struct dentry *dentry, reiser4_object_create_data * data)
  41774. +{
  41775. + reiser4_context *ctx;
  41776. + int result;
  41777. + struct inode *child;
  41778. +
  41779. + ctx = reiser4_init_context(parent->i_sb);
  41780. + if (IS_ERR(ctx))
  41781. + return PTR_ERR(ctx);
  41782. + context_set_commit_async(ctx);
  41783. +
  41784. + data->parent = parent;
  41785. + data->dentry = dentry;
  41786. + child = NULL;
  41787. + result = do_create_vfs_child(data, &child);
  41788. + if (unlikely(result != 0)) {
  41789. + if (child != NULL) {
  41790. + /* for unlinked inode accounting in iput() */
  41791. + clear_nlink(child);
  41792. + reiser4_make_bad_inode(child);
  41793. + iput(child);
  41794. + }
  41795. + } else
  41796. + d_instantiate(dentry, child);
  41797. +
  41798. + reiser4_exit_context(ctx);
  41799. + return result;
  41800. +}
  41801. +
  41802. +/**
  41803. + * helper for link_common. Estimate disk space necessary to add a link
  41804. + * from @parent to @object
  41805. + */
  41806. +static reiser4_block_nr common_estimate_link(struct inode *parent /* parent
  41807. + * directory
  41808. + */,
  41809. + struct inode *object /* object to
  41810. + * which new
  41811. + * link is
  41812. + * being
  41813. + * created */)
  41814. +{
  41815. + reiser4_block_nr res = 0;
  41816. + file_plugin *fplug;
  41817. + dir_plugin *dplug;
  41818. +
  41819. + assert("vpf-317", object != NULL);
  41820. + assert("vpf-318", parent != NULL);
  41821. +
  41822. + fplug = inode_file_plugin(object);
  41823. + dplug = inode_dir_plugin(parent);
  41824. + /* VS-FIXME-HANS: why do we do fplug->estimate.update(object) twice
  41825. + * instead of multiplying by 2? */
  41826. + /* reiser4_add_nlink(object) */
  41827. + res += fplug->estimate.update(object);
  41828. + /* add_entry(parent) */
  41829. + res += dplug->estimate.add_entry(parent);
  41830. + /* reiser4_del_nlink(object) */
  41831. + res += fplug->estimate.update(object);
  41832. + /* update_dir(parent) */
  41833. + res += inode_file_plugin(parent)->estimate.update(parent);
  41834. + /* safe-link */
  41835. + res += estimate_one_item_removal(reiser4_tree_by_inode(object));
  41836. +
  41837. + return res;
  41838. +}
  41839. +
  41840. +/* Estimate disk space necessary to remove a link between @parent and
  41841. + @object.
  41842. +*/
  41843. +static reiser4_block_nr estimate_unlink(struct inode *parent /* parent
  41844. + * directory */,
  41845. + struct inode *object /* object to which
  41846. + * new link is
  41847. + * being created
  41848. + */)
  41849. +{
  41850. + reiser4_block_nr res = 0;
  41851. + file_plugin *fplug;
  41852. + dir_plugin *dplug;
  41853. +
  41854. + assert("vpf-317", object != NULL);
  41855. + assert("vpf-318", parent != NULL);
  41856. +
  41857. + fplug = inode_file_plugin(object);
  41858. + dplug = inode_dir_plugin(parent);
  41859. +
  41860. + /* rem_entry(parent) */
  41861. + res += dplug->estimate.rem_entry(parent);
  41862. + /* reiser4_del_nlink(object) */
  41863. + res += fplug->estimate.update(object);
  41864. + /* update_dir(parent) */
  41865. + res += inode_file_plugin(parent)->estimate.update(parent);
  41866. + /* fplug->unlink */
  41867. + res += fplug->estimate.unlink(object, parent);
  41868. + /* safe-link */
  41869. + res += estimate_one_insert_item(reiser4_tree_by_inode(object));
  41870. +
  41871. + return res;
  41872. +}
  41873. +
  41874. +/* helper for reiser4_unlink_common. Estimate and grab space for unlink. */
  41875. +static int unlink_check_and_grab(struct inode *parent, struct dentry *victim)
  41876. +{
  41877. + file_plugin *fplug;
  41878. + struct inode *child;
  41879. + int result;
  41880. +
  41881. + result = 0;
  41882. + child = victim->d_inode;
  41883. + fplug = inode_file_plugin(child);
  41884. +
  41885. + /* check for race with create_object() */
  41886. + reiser4_check_immutable(child);
  41887. +
  41888. + /* object being deleted should have stat data */
  41889. + assert("vs-949", !reiser4_inode_get_flag(child, REISER4_NO_SD));
  41890. +
  41891. + /* ask object plugin */
  41892. + if (fplug->can_rem_link != NULL && !fplug->can_rem_link(child))
  41893. + return RETERR(-ENOTEMPTY);
  41894. +
  41895. + result = (int)estimate_unlink(parent, child);
  41896. + if (result < 0)
  41897. + return result;
  41898. +
  41899. + return reiser4_grab_reserved(child->i_sb, result, BA_CAN_COMMIT);
  41900. +}
  41901. +
  41902. +/* helper for reiser4_setattr_common */
  41903. +static int setattr_reserve(reiser4_tree * tree)
  41904. +{
  41905. + assert("vs-1096", is_grab_enabled(get_current_context()));
  41906. + return reiser4_grab_space(estimate_one_insert_into_item(tree),
  41907. + BA_CAN_COMMIT);
  41908. +}
  41909. +
  41910. +/* helper function. Standards require that for many file-system operations
  41911. + on success ctime and mtime of parent directory is to be updated. */
  41912. +int reiser4_update_dir(struct inode *dir)
  41913. +{
  41914. + assert("nikita-2525", dir != NULL);
  41915. +
  41916. + dir->i_ctime = dir->i_mtime = current_time(dir);
  41917. + return reiser4_update_sd(dir);
  41918. +}
  41919. +
  41920. +/*
  41921. + Local variables:
  41922. + c-indentation-style: "K&R"
  41923. + mode-name: "LC"
  41924. + c-basic-offset: 8
  41925. + tab-width: 8
  41926. + fill-column: 80
  41927. + scroll-step: 1
  41928. + End:
  41929. +*/
  41930. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/inode_ops_rename.c linux-4.14.2/fs/reiser4/plugin/inode_ops_rename.c
  41931. --- linux-4.14.2.orig/fs/reiser4/plugin/inode_ops_rename.c 1970-01-01 01:00:00.000000000 +0100
  41932. +++ linux-4.14.2/fs/reiser4/plugin/inode_ops_rename.c 2017-11-26 22:13:09.000000000 +0100
  41933. @@ -0,0 +1,958 @@
  41934. +/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
  41935. + * reiser4/README */
  41936. +
  41937. +#include "../inode.h"
  41938. +#include "../safe_link.h"
  41939. +
  41940. +static const char *possible_leak = "Possible disk space leak.";
  41941. +
  41942. +/* re-bind existing name at @from_coord in @from_dir to point to @to_inode.
  41943. +
  41944. + Helper function called from hashed_rename() */
  41945. +static int replace_name(struct inode *to_inode, /* inode where @from_coord is
  41946. + * to be re-targeted at */
  41947. + struct inode *from_dir, /* directory where @from_coord
  41948. + * lives */
  41949. + struct inode *from_inode, /* inode @from_coord
  41950. + * originally point to */
  41951. + coord_t *from_coord, /* where directory entry is in
  41952. + * the tree */
  41953. + lock_handle * from_lh/* lock handle on @from_coord */)
  41954. +{
  41955. + item_plugin *from_item;
  41956. + int result;
  41957. + znode *node;
  41958. +
  41959. + coord_clear_iplug(from_coord);
  41960. + node = from_coord->node;
  41961. + result = zload(node);
  41962. + if (result != 0)
  41963. + return result;
  41964. + from_item = item_plugin_by_coord(from_coord);
  41965. + if (plugin_of_group(item_plugin_by_coord(from_coord),
  41966. + DIR_ENTRY_ITEM_TYPE)) {
  41967. + reiser4_key to_key;
  41968. +
  41969. + build_sd_key(to_inode, &to_key);
  41970. +
  41971. + /* everything is found and prepared to change directory entry
  41972. + at @from_coord to point to @to_inode.
  41973. +
  41974. + @to_inode is just about to get new name, so bump its link
  41975. + counter.
  41976. +
  41977. + */
  41978. + result = reiser4_add_nlink(to_inode, from_dir, 0);
  41979. + if (result != 0) {
  41980. + /* Don't issue warning: this may be plain -EMLINK */
  41981. + zrelse(node);
  41982. + return result;
  41983. + }
  41984. +
  41985. + result =
  41986. + from_item->s.dir.update_key(from_coord, &to_key, from_lh);
  41987. + if (result != 0) {
  41988. + reiser4_del_nlink(to_inode, from_dir, 0);
  41989. + zrelse(node);
  41990. + return result;
  41991. + }
  41992. +
  41993. + /* @from_inode just lost its name, he-he.
  41994. +
  41995. + If @from_inode was directory, it contained dotdot pointing
  41996. + to @from_dir. @from_dir i_nlink will be decreased when
  41997. + iput() will be called on @from_inode.
  41998. +
  41999. + If file-system is not ADG (hard-links are
  42000. + supported on directories), iput(from_inode) will not remove
  42001. + @from_inode, and thus above is incorrect, but hard-links on
  42002. + directories are problematic in many other respects.
  42003. + */
  42004. + result = reiser4_del_nlink(from_inode, from_dir, 0);
  42005. + if (result != 0) {
  42006. + warning("nikita-2330",
  42007. + "Cannot remove link from source: %i. %s",
  42008. + result, possible_leak);
  42009. + }
  42010. + /* Has to return success, because entry is already
  42011. + * modified. */
  42012. + result = 0;
  42013. +
  42014. + /* NOTE-NIKITA consider calling plugin method in stead of
  42015. + accessing inode fields directly. */
  42016. + from_dir->i_mtime = current_time(from_dir);
  42017. + } else {
  42018. + warning("nikita-2326", "Unexpected item type");
  42019. + result = RETERR(-EIO);
  42020. + }
  42021. + zrelse(node);
  42022. + return result;
  42023. +}
  42024. +
  42025. +/* add new entry pointing to @inode into @dir at @coord, locked by @lh
  42026. +
  42027. + Helper function used by hashed_rename(). */
  42028. +static int add_name(struct inode *inode, /* inode where @coord is to be
  42029. + * re-targeted at */
  42030. + struct inode *dir, /* directory where @coord lives */
  42031. + struct dentry *name, /* new name */
  42032. + coord_t *coord, /* where directory entry is in the tree
  42033. + */
  42034. + lock_handle * lh, /* lock handle on @coord */
  42035. + int is_dir/* true, if @inode is directory */)
  42036. +{
  42037. + int result;
  42038. + reiser4_dir_entry_desc entry;
  42039. +
  42040. + assert("nikita-2333", lh->node == coord->node);
  42041. + assert("nikita-2334", is_dir == S_ISDIR(inode->i_mode));
  42042. +
  42043. + memset(&entry, 0, sizeof entry);
  42044. + entry.obj = inode;
  42045. + /* build key of directory entry description */
  42046. + inode_dir_plugin(dir)->build_entry_key(dir, &name->d_name, &entry.key);
  42047. +
  42048. + /* ext2 does this in different order: first inserts new entry,
  42049. + then increases directory nlink. We don't want do this,
  42050. + because reiser4_add_nlink() calls ->add_link() plugin
  42051. + method that can fail for whatever reason, leaving as with
  42052. + cleanup problems.
  42053. + */
  42054. + /* @inode is getting new name */
  42055. + reiser4_add_nlink(inode, dir, 0);
  42056. + /* create @new_name in @new_dir pointing to
  42057. + @old_inode */
  42058. + result = WITH_COORD(coord,
  42059. + inode_dir_item_plugin(dir)->s.dir.add_entry(dir,
  42060. + coord,
  42061. + lh,
  42062. + name,
  42063. + &entry));
  42064. + if (result != 0) {
  42065. + int result2;
  42066. + result2 = reiser4_del_nlink(inode, dir, 0);
  42067. + if (result2 != 0) {
  42068. + warning("nikita-2327",
  42069. + "Cannot drop link on %lli %i. %s",
  42070. + (unsigned long long)get_inode_oid(inode),
  42071. + result2, possible_leak);
  42072. + }
  42073. + } else
  42074. + INODE_INC_FIELD(dir, i_size);
  42075. + return result;
  42076. +}
  42077. +
  42078. +static reiser4_block_nr estimate_rename(struct inode *old_dir, /* directory
  42079. + * where @old is
  42080. + * located */
  42081. + struct dentry *old_name,/* old name */
  42082. + struct inode *new_dir, /* directory
  42083. + * where @new is
  42084. + * located */
  42085. + struct dentry *new_name /* new name */)
  42086. +{
  42087. + reiser4_block_nr res1, res2;
  42088. + dir_plugin * p_parent_old, *p_parent_new;
  42089. + file_plugin * p_child_old, *p_child_new;
  42090. +
  42091. + assert("vpf-311", old_dir != NULL);
  42092. + assert("vpf-312", new_dir != NULL);
  42093. + assert("vpf-313", old_name != NULL);
  42094. + assert("vpf-314", new_name != NULL);
  42095. +
  42096. + p_parent_old = inode_dir_plugin(old_dir);
  42097. + p_parent_new = inode_dir_plugin(new_dir);
  42098. + p_child_old = inode_file_plugin(old_name->d_inode);
  42099. + if (new_name->d_inode)
  42100. + p_child_new = inode_file_plugin(new_name->d_inode);
  42101. + else
  42102. + p_child_new = NULL;
  42103. +
  42104. + /* find_entry - can insert one leaf. */
  42105. + res1 = res2 = 1;
  42106. +
  42107. + /* replace_name */
  42108. + {
  42109. + /* reiser4_add_nlink(p_child_old) and
  42110. + * reiser4_del_nlink(p_child_old) */
  42111. + res1 += 2 * p_child_old->estimate.update(old_name->d_inode);
  42112. + /* update key */
  42113. + res1 += 1;
  42114. + /* reiser4_del_nlink(p_child_new) */
  42115. + if (p_child_new)
  42116. + res1 += p_child_new->estimate.update(new_name->d_inode);
  42117. + }
  42118. +
  42119. + /* else add_name */
  42120. + {
  42121. + /* reiser4_add_nlink(p_parent_new) and
  42122. + * reiser4_del_nlink(p_parent_new) */
  42123. + res2 +=
  42124. + 2 * inode_file_plugin(new_dir)->estimate.update(new_dir);
  42125. + /* reiser4_add_nlink(p_parent_old) */
  42126. + res2 += p_child_old->estimate.update(old_name->d_inode);
  42127. + /* add_entry(p_parent_new) */
  42128. + res2 += p_parent_new->estimate.add_entry(new_dir);
  42129. + /* reiser4_del_nlink(p_parent_old) */
  42130. + res2 += p_child_old->estimate.update(old_name->d_inode);
  42131. + }
  42132. +
  42133. + res1 = res1 < res2 ? res2 : res1;
  42134. +
  42135. + /* reiser4_write_sd(p_parent_new) */
  42136. + res1 += inode_file_plugin(new_dir)->estimate.update(new_dir);
  42137. +
  42138. + /* reiser4_write_sd(p_child_new) */
  42139. + if (p_child_new)
  42140. + res1 += p_child_new->estimate.update(new_name->d_inode);
  42141. +
  42142. + /* hashed_rem_entry(p_parent_old) */
  42143. + res1 += p_parent_old->estimate.rem_entry(old_dir);
  42144. +
  42145. + /* reiser4_del_nlink(p_child_old) */
  42146. + res1 += p_child_old->estimate.update(old_name->d_inode);
  42147. +
  42148. + /* replace_name */
  42149. + {
  42150. + /* reiser4_add_nlink(p_parent_dir_new) */
  42151. + res1 += inode_file_plugin(new_dir)->estimate.update(new_dir);
  42152. + /* update_key */
  42153. + res1 += 1;
  42154. + /* reiser4_del_nlink(p_parent_new) */
  42155. + res1 += inode_file_plugin(new_dir)->estimate.update(new_dir);
  42156. + /* reiser4_del_nlink(p_parent_old) */
  42157. + res1 += inode_file_plugin(old_dir)->estimate.update(old_dir);
  42158. + }
  42159. +
  42160. + /* reiser4_write_sd(p_parent_old) */
  42161. + res1 += inode_file_plugin(old_dir)->estimate.update(old_dir);
  42162. +
  42163. + /* reiser4_write_sd(p_child_old) */
  42164. + res1 += p_child_old->estimate.update(old_name->d_inode);
  42165. +
  42166. + return res1;
  42167. +}
  42168. +
  42169. +static int hashed_rename_estimate_and_grab(struct inode *old_dir, /* directory
  42170. + * where @old
  42171. + * is located
  42172. + */
  42173. + struct dentry *old_name,/* old name
  42174. + */
  42175. + struct inode *new_dir, /* directory
  42176. + * where @new
  42177. + * is located
  42178. + */
  42179. + struct dentry *new_name /* new name
  42180. + */)
  42181. +{
  42182. + reiser4_block_nr reserve;
  42183. +
  42184. + reserve = estimate_rename(old_dir, old_name, new_dir, new_name);
  42185. +
  42186. + if (reiser4_grab_space(reserve, BA_CAN_COMMIT))
  42187. + return RETERR(-ENOSPC);
  42188. +
  42189. + return 0;
  42190. +}
  42191. +
  42192. +/* check whether @old_inode and @new_inode can be moved within file system
  42193. + * tree. This singles out attempts to rename pseudo-files, for example. */
  42194. +static int can_rename(struct inode *old_dir, struct inode *old_inode,
  42195. + struct inode *new_dir, struct inode *new_inode)
  42196. +{
  42197. + file_plugin *fplug;
  42198. + dir_plugin *dplug;
  42199. +
  42200. + assert("nikita-3370", old_inode != NULL);
  42201. +
  42202. + dplug = inode_dir_plugin(new_dir);
  42203. + fplug = inode_file_plugin(old_inode);
  42204. +
  42205. + if (dplug == NULL)
  42206. + return RETERR(-ENOTDIR);
  42207. + else if (new_dir->i_op->create == NULL)
  42208. + return RETERR(-EPERM);
  42209. + else if (!fplug->can_add_link(old_inode))
  42210. + return RETERR(-EMLINK);
  42211. + else if (new_inode != NULL) {
  42212. + fplug = inode_file_plugin(new_inode);
  42213. + if (fplug->can_rem_link != NULL &&
  42214. + !fplug->can_rem_link(new_inode))
  42215. + return RETERR(-EBUSY);
  42216. + }
  42217. + return 0;
  42218. +}
  42219. +
  42220. +int reiser4_find_entry(struct inode *, struct dentry *, lock_handle * ,
  42221. + znode_lock_mode, reiser4_dir_entry_desc *);
  42222. +int reiser4_update_dir(struct inode *);
  42223. +
  42224. +/* this is common implementation of vfs's rename2 method of struct
  42225. + inode_operations
  42226. + See comments in the body.
  42227. +
  42228. + It is arguable that this function can be made generic so, that it
  42229. + will be applicable to any kind of directory plugin that deals with
  42230. + directories composed out of directory entries. The only obstacle
  42231. + here is that we don't have any data-type to represent directory
  42232. + entry. This should be re-considered when more than one different
  42233. + directory plugin will be implemented.
  42234. +*/
  42235. +int reiser4_rename2_common(struct inode *old_dir /* directory where @old
  42236. + * is located */ ,
  42237. + struct dentry *old_name /* old name */ ,
  42238. + struct inode *new_dir /* directory where @new
  42239. + * is located */ ,
  42240. + struct dentry *new_name /* new name */ ,
  42241. + unsigned flags /* specific flags */)
  42242. +{
  42243. + /* From `The Open Group Base Specifications Issue 6'
  42244. +
  42245. + If either the old or new argument names a symbolic link, rename()
  42246. + shall operate on the symbolic link itself, and shall not resolve
  42247. + the last component of the argument. If the old argument and the new
  42248. + argument resolve to the same existing file, rename() shall return
  42249. + successfully and perform no other action.
  42250. +
  42251. + [this is done by VFS: vfs_rename()]
  42252. +
  42253. + If the old argument points to the pathname of a file that is not a
  42254. + directory, the new argument shall not point to the pathname of a
  42255. + directory.
  42256. +
  42257. + [checked by VFS: vfs_rename->may_delete()]
  42258. +
  42259. + If the link named by the new argument exists, it shall
  42260. + be removed and old renamed to new. In this case, a link named new
  42261. + shall remain visible to other processes throughout the renaming
  42262. + operation and refer either to the file referred to by new or old
  42263. + before the operation began.
  42264. +
  42265. + [we should assure this]
  42266. +
  42267. + Write access permission is required for
  42268. + both the directory containing old and the directory containing new.
  42269. +
  42270. + [checked by VFS: vfs_rename->may_delete(), may_create()]
  42271. +
  42272. + If the old argument points to the pathname of a directory, the new
  42273. + argument shall not point to the pathname of a file that is not a
  42274. + directory.
  42275. +
  42276. + [checked by VFS: vfs_rename->may_delete()]
  42277. +
  42278. + If the directory named by the new argument exists, it
  42279. + shall be removed and old renamed to new. In this case, a link named
  42280. + new shall exist throughout the renaming operation and shall refer
  42281. + either to the directory referred to by new or old before the
  42282. + operation began.
  42283. +
  42284. + [we should assure this]
  42285. +
  42286. + If new names an existing directory, it shall be
  42287. + required to be an empty directory.
  42288. +
  42289. + [we should check this]
  42290. +
  42291. + If the old argument points to a pathname of a symbolic link, the
  42292. + symbolic link shall be renamed. If the new argument points to a
  42293. + pathname of a symbolic link, the symbolic link shall be removed.
  42294. +
  42295. + The new pathname shall not contain a path prefix that names
  42296. + old. Write access permission is required for the directory
  42297. + containing old and the directory containing new. If the old
  42298. + argument points to the pathname of a directory, write access
  42299. + permission may be required for the directory named by old, and, if
  42300. + it exists, the directory named by new.
  42301. +
  42302. + [checked by VFS: vfs_rename(), vfs_rename_dir()]
  42303. +
  42304. + If the link named by the new argument exists and the file's link
  42305. + count becomes 0 when it is removed and no process has the file
  42306. + open, the space occupied by the file shall be freed and the file
  42307. + shall no longer be accessible. If one or more processes have the
  42308. + file open when the last link is removed, the link shall be removed
  42309. + before rename() returns, but the removal of the file contents shall
  42310. + be postponed until all references to the file are closed.
  42311. +
  42312. + [iput() handles this, but we can do this manually, a la
  42313. + reiser4_unlink()]
  42314. +
  42315. + Upon successful completion, rename() shall mark for update the
  42316. + st_ctime and st_mtime fields of the parent directory of each file.
  42317. +
  42318. + [N/A]
  42319. +
  42320. + */
  42321. +
  42322. + /* From Documentation/filesystems/vfs.txt:
  42323. +
  42324. + rename2: this has an additional flags argument compared to rename.
  42325. + f no flags are supported by the filesystem then this method
  42326. + need not be implemented. If some flags are supported then the
  42327. + filesystem must return -EINVAL for any unsupported or unknown
  42328. + flags. Currently the following flags are implemented:
  42329. + (1) RENAME_NOREPLACE: this flag indicates that if the target
  42330. + of the rename exists the rename should fail with -EEXIST
  42331. + instead of replacing the target. The VFS already checks for
  42332. + existence, so for local filesystems the RENAME_NOREPLACE
  42333. + implementation is equivalent to plain rename.
  42334. + (2) RENAME_EXCHANGE: exchange source and target. Both must
  42335. + exist; this is checked by the VFS. Unlike plain rename,
  42336. + source and target may be of different type.
  42337. + */
  42338. +
  42339. + static const unsigned supported_flags = RENAME_NOREPLACE;
  42340. +
  42341. + reiser4_context *ctx;
  42342. + int result;
  42343. + int is_dir; /* is @old_name directory */
  42344. +
  42345. + struct inode *old_inode;
  42346. + struct inode *new_inode;
  42347. + coord_t *new_coord;
  42348. +
  42349. + struct reiser4_dentry_fsdata *new_fsdata;
  42350. + dir_plugin *dplug;
  42351. + file_plugin *fplug;
  42352. +
  42353. + reiser4_dir_entry_desc *old_entry, *new_entry, *dotdot_entry;
  42354. + lock_handle * new_lh, *dotdot_lh;
  42355. + struct dentry *dotdot_name;
  42356. + struct reiser4_dentry_fsdata *dataonstack;
  42357. +
  42358. + ctx = reiser4_init_context(old_dir->i_sb);
  42359. + if (IS_ERR(ctx))
  42360. + return PTR_ERR(ctx);
  42361. +
  42362. + /*
  42363. + * Check rename2() flags.
  42364. + *
  42365. + * "If some flags are supported then the filesystem must return
  42366. + * -EINVAL for any unsupported or unknown flags."
  42367. + *
  42368. + * We support:
  42369. + * - RENAME_NOREPLACE (no-op)
  42370. + */
  42371. + if ((flags & supported_flags) != flags)
  42372. + return RETERR(-EINVAL);
  42373. +
  42374. + old_entry = kzalloc(3 * sizeof(*old_entry) + 2 * sizeof(*new_lh) +
  42375. + sizeof(*dotdot_name) + sizeof(*dataonstack),
  42376. + reiser4_ctx_gfp_mask_get());
  42377. + if (!old_entry) {
  42378. + context_set_commit_async(ctx);
  42379. + reiser4_exit_context(ctx);
  42380. + return RETERR(-ENOMEM);
  42381. + }
  42382. +
  42383. + new_entry = old_entry + 1;
  42384. + dotdot_entry = old_entry + 2;
  42385. + new_lh = (lock_handle *)(old_entry + 3);
  42386. + dotdot_lh = new_lh + 1;
  42387. + dotdot_name = (struct dentry *)(new_lh + 2);
  42388. + dataonstack = (struct reiser4_dentry_fsdata *)(dotdot_name + 1);
  42389. +
  42390. + assert("nikita-2318", old_dir != NULL);
  42391. + assert("nikita-2319", new_dir != NULL);
  42392. + assert("nikita-2320", old_name != NULL);
  42393. + assert("nikita-2321", new_name != NULL);
  42394. +
  42395. + old_inode = old_name->d_inode;
  42396. + new_inode = new_name->d_inode;
  42397. +
  42398. + dplug = inode_dir_plugin(old_dir);
  42399. + fplug = NULL;
  42400. +
  42401. + new_fsdata = reiser4_get_dentry_fsdata(new_name);
  42402. + if (IS_ERR(new_fsdata)) {
  42403. + kfree(old_entry);
  42404. + context_set_commit_async(ctx);
  42405. + reiser4_exit_context(ctx);
  42406. + return PTR_ERR(new_fsdata);
  42407. + }
  42408. +
  42409. + new_coord = &new_fsdata->dec.entry_coord;
  42410. + coord_clear_iplug(new_coord);
  42411. +
  42412. + is_dir = S_ISDIR(old_inode->i_mode);
  42413. +
  42414. + assert("nikita-3461", old_inode->i_nlink >= 1 + !!is_dir);
  42415. +
  42416. + /* if target is existing directory and it's not empty---return error.
  42417. +
  42418. + This check is done specifically, because is_dir_empty() requires
  42419. + tree traversal and have to be done before locks are taken.
  42420. + */
  42421. + if (is_dir && new_inode != NULL && is_dir_empty(new_inode) != 0) {
  42422. + kfree(old_entry);
  42423. + context_set_commit_async(ctx);
  42424. + reiser4_exit_context(ctx);
  42425. + return RETERR(-ENOTEMPTY);
  42426. + }
  42427. +
  42428. + result = can_rename(old_dir, old_inode, new_dir, new_inode);
  42429. + if (result != 0) {
  42430. + kfree(old_entry);
  42431. + context_set_commit_async(ctx);
  42432. + reiser4_exit_context(ctx);
  42433. + return result;
  42434. + }
  42435. +
  42436. + result = hashed_rename_estimate_and_grab(old_dir, old_name,
  42437. + new_dir, new_name);
  42438. + if (result != 0) {
  42439. + kfree(old_entry);
  42440. + context_set_commit_async(ctx);
  42441. + reiser4_exit_context(ctx);
  42442. + return result;
  42443. + }
  42444. +
  42445. + init_lh(new_lh);
  42446. +
  42447. + /* find entry for @new_name */
  42448. + result = reiser4_find_entry(new_dir, new_name, new_lh, ZNODE_WRITE_LOCK,
  42449. + new_entry);
  42450. +
  42451. + if (IS_CBKERR(result)) {
  42452. + done_lh(new_lh);
  42453. + kfree(old_entry);
  42454. + context_set_commit_async(ctx);
  42455. + reiser4_exit_context(ctx);
  42456. + return result;
  42457. + }
  42458. +
  42459. + reiser4_seal_done(&new_fsdata->dec.entry_seal);
  42460. +
  42461. + /* add or replace name for @old_inode as @new_name */
  42462. + if (new_inode != NULL) {
  42463. + /* target (@new_name) exists. */
  42464. + /* Not clear what to do with objects that are
  42465. + both directories and files at the same time. */
  42466. + if (result == CBK_COORD_FOUND) {
  42467. + result = replace_name(old_inode,
  42468. + new_dir,
  42469. + new_inode, new_coord, new_lh);
  42470. + if (result == 0)
  42471. + fplug = inode_file_plugin(new_inode);
  42472. + } else if (result == CBK_COORD_NOTFOUND) {
  42473. + /* VFS told us that @new_name is bound to existing
  42474. + inode, but we failed to find directory entry. */
  42475. + warning("nikita-2324", "Target not found");
  42476. + result = RETERR(-ENOENT);
  42477. + }
  42478. + } else {
  42479. + /* target (@new_name) doesn't exists. */
  42480. + if (result == CBK_COORD_NOTFOUND)
  42481. + result = add_name(old_inode,
  42482. + new_dir,
  42483. + new_name, new_coord, new_lh, is_dir);
  42484. + else if (result == CBK_COORD_FOUND) {
  42485. + /* VFS told us that @new_name is "negative" dentry,
  42486. + but we found directory entry. */
  42487. + warning("nikita-2331", "Target found unexpectedly");
  42488. + result = RETERR(-EIO);
  42489. + }
  42490. + }
  42491. +
  42492. + assert("nikita-3462", ergo(result == 0,
  42493. + old_inode->i_nlink >= 2 + !!is_dir));
  42494. +
  42495. + /* We are done with all modifications to the @new_dir, release lock on
  42496. + node. */
  42497. + done_lh(new_lh);
  42498. +
  42499. + if (fplug != NULL) {
  42500. + /* detach @new_inode from name-space */
  42501. + result = fplug->detach(new_inode, new_dir);
  42502. + if (result != 0)
  42503. + warning("nikita-2330", "Cannot detach %lli: %i. %s",
  42504. + (unsigned long long)get_inode_oid(new_inode),
  42505. + result, possible_leak);
  42506. + }
  42507. +
  42508. + if (new_inode != NULL)
  42509. + reiser4_update_sd(new_inode);
  42510. +
  42511. + if (result == 0) {
  42512. + old_entry->obj = old_inode;
  42513. +
  42514. + dplug->build_entry_key(old_dir,
  42515. + &old_name->d_name, &old_entry->key);
  42516. +
  42517. + /* At this stage new name was introduced for
  42518. + @old_inode. @old_inode, @new_dir, and @new_inode i_nlink
  42519. + counters were updated.
  42520. +
  42521. + We want to remove @old_name now. If @old_inode wasn't
  42522. + directory this is simple.
  42523. + */
  42524. + result = dplug->rem_entry(old_dir, old_name, old_entry);
  42525. + if (result != 0 && result != -ENOMEM) {
  42526. + warning("nikita-2335",
  42527. + "Cannot remove old name: %i", result);
  42528. + } else {
  42529. + result = reiser4_del_nlink(old_inode, old_dir, 0);
  42530. + if (result != 0 && result != -ENOMEM) {
  42531. + warning("nikita-2337",
  42532. + "Cannot drop link on old: %i", result);
  42533. + }
  42534. + }
  42535. +
  42536. + if (result == 0 && is_dir) {
  42537. + /* @old_inode is directory. We also have to update
  42538. + dotdot entry. */
  42539. + coord_t *dotdot_coord;
  42540. +
  42541. + memset(dataonstack, 0, sizeof(*dataonstack));
  42542. + memset(dotdot_entry, 0, sizeof(*dotdot_entry));
  42543. + dotdot_entry->obj = old_dir;
  42544. + memset(dotdot_name, 0, sizeof(*dotdot_name));
  42545. + dotdot_name->d_name.name = "..";
  42546. + dotdot_name->d_name.len = 2;
  42547. + /*
  42548. + * allocate ->d_fsdata on the stack to avoid using
  42549. + * reiser4_get_dentry_fsdata(). Locking is not needed,
  42550. + * because dentry is private to the current thread.
  42551. + */
  42552. + dotdot_name->d_fsdata = dataonstack;
  42553. + init_lh(dotdot_lh);
  42554. +
  42555. + dotdot_coord = &dataonstack->dec.entry_coord;
  42556. + coord_clear_iplug(dotdot_coord);
  42557. +
  42558. + result = reiser4_find_entry(old_inode, dotdot_name,
  42559. + dotdot_lh, ZNODE_WRITE_LOCK,
  42560. + dotdot_entry);
  42561. + if (result == 0) {
  42562. + /* replace_name() decreases i_nlink on
  42563. + * @old_dir */
  42564. + result = replace_name(new_dir,
  42565. + old_inode,
  42566. + old_dir,
  42567. + dotdot_coord, dotdot_lh);
  42568. + } else
  42569. + result = RETERR(-EIO);
  42570. + done_lh(dotdot_lh);
  42571. + }
  42572. + }
  42573. + reiser4_update_dir(new_dir);
  42574. + reiser4_update_dir(old_dir);
  42575. + reiser4_update_sd(old_inode);
  42576. + if (result == 0) {
  42577. + file_plugin *fplug;
  42578. +
  42579. + if (new_inode != NULL) {
  42580. + /* add safe-link for target file (in case we removed
  42581. + * last reference to the poor fellow */
  42582. + fplug = inode_file_plugin(new_inode);
  42583. + if (new_inode->i_nlink == 0)
  42584. + result = safe_link_add(new_inode, SAFE_UNLINK);
  42585. + }
  42586. + }
  42587. + kfree(old_entry);
  42588. + context_set_commit_async(ctx);
  42589. + reiser4_exit_context(ctx);
  42590. + return result;
  42591. +}
  42592. +
  42593. +#if 0
  42594. +int reiser4_rename_common(struct inode *old_dir /* directory where @old
  42595. + * is located */ ,
  42596. + struct dentry *old_name /* old name */ ,
  42597. + struct inode *new_dir /* directory where @new
  42598. + * is located */ ,
  42599. + struct dentry *new_name/* new name */)
  42600. +{
  42601. + /* From `The Open Group Base Specifications Issue 6'
  42602. +
  42603. + If either the old or new argument names a symbolic link, rename()
  42604. + shall operate on the symbolic link itself, and shall not resolve
  42605. + the last component of the argument. If the old argument and the new
  42606. + argument resolve to the same existing file, rename() shall return
  42607. + successfully and perform no other action.
  42608. +
  42609. + [this is done by VFS: vfs_rename()]
  42610. +
  42611. + If the old argument points to the pathname of a file that is not a
  42612. + directory, the new argument shall not point to the pathname of a
  42613. + directory.
  42614. +
  42615. + [checked by VFS: vfs_rename->may_delete()]
  42616. +
  42617. + If the link named by the new argument exists, it shall
  42618. + be removed and old renamed to new. In this case, a link named new
  42619. + shall remain visible to other processes throughout the renaming
  42620. + operation and refer either to the file referred to by new or old
  42621. + before the operation began.
  42622. +
  42623. + [we should assure this]
  42624. +
  42625. + Write access permission is required for
  42626. + both the directory containing old and the directory containing new.
  42627. +
  42628. + [checked by VFS: vfs_rename->may_delete(), may_create()]
  42629. +
  42630. + If the old argument points to the pathname of a directory, the new
  42631. + argument shall not point to the pathname of a file that is not a
  42632. + directory.
  42633. +
  42634. + [checked by VFS: vfs_rename->may_delete()]
  42635. +
  42636. + If the directory named by the new argument exists, it
  42637. + shall be removed and old renamed to new. In this case, a link named
  42638. + new shall exist throughout the renaming operation and shall refer
  42639. + either to the directory referred to by new or old before the
  42640. + operation began.
  42641. +
  42642. + [we should assure this]
  42643. +
  42644. + If new names an existing directory, it shall be
  42645. + required to be an empty directory.
  42646. +
  42647. + [we should check this]
  42648. +
  42649. + If the old argument points to a pathname of a symbolic link, the
  42650. + symbolic link shall be renamed. If the new argument points to a
  42651. + pathname of a symbolic link, the symbolic link shall be removed.
  42652. +
  42653. + The new pathname shall not contain a path prefix that names
  42654. + old. Write access permission is required for the directory
  42655. + containing old and the directory containing new. If the old
  42656. + argument points to the pathname of a directory, write access
  42657. + permission may be required for the directory named by old, and, if
  42658. + it exists, the directory named by new.
  42659. +
  42660. + [checked by VFS: vfs_rename(), vfs_rename_dir()]
  42661. +
  42662. + If the link named by the new argument exists and the file's link
  42663. + count becomes 0 when it is removed and no process has the file
  42664. + open, the space occupied by the file shall be freed and the file
  42665. + shall no longer be accessible. If one or more processes have the
  42666. + file open when the last link is removed, the link shall be removed
  42667. + before rename() returns, but the removal of the file contents shall
  42668. + be postponed until all references to the file are closed.
  42669. +
  42670. + [iput() handles this, but we can do this manually, a la
  42671. + reiser4_unlink()]
  42672. +
  42673. + Upon successful completion, rename() shall mark for update the
  42674. + st_ctime and st_mtime fields of the parent directory of each file.
  42675. +
  42676. + [N/A]
  42677. +
  42678. + */
  42679. + reiser4_context *ctx;
  42680. + int result;
  42681. + int is_dir; /* is @old_name directory */
  42682. + struct inode *old_inode;
  42683. + struct inode *new_inode;
  42684. + reiser4_dir_entry_desc old_entry;
  42685. + reiser4_dir_entry_desc new_entry;
  42686. + coord_t *new_coord;
  42687. + struct reiser4_dentry_fsdata *new_fsdata;
  42688. + lock_handle new_lh;
  42689. + dir_plugin *dplug;
  42690. + file_plugin *fplug;
  42691. +
  42692. + ctx = reiser4_init_context(old_dir->i_sb);
  42693. + if (IS_ERR(ctx))
  42694. + return PTR_ERR(ctx);
  42695. +
  42696. + assert("nikita-2318", old_dir != NULL);
  42697. + assert("nikita-2319", new_dir != NULL);
  42698. + assert("nikita-2320", old_name != NULL);
  42699. + assert("nikita-2321", new_name != NULL);
  42700. +
  42701. + old_inode = old_name->d_inode;
  42702. + new_inode = new_name->d_inode;
  42703. +
  42704. + dplug = inode_dir_plugin(old_dir);
  42705. + fplug = NULL;
  42706. +
  42707. + new_fsdata = reiser4_get_dentry_fsdata(new_name);
  42708. + if (IS_ERR(new_fsdata)) {
  42709. + result = PTR_ERR(new_fsdata);
  42710. + goto exit;
  42711. + }
  42712. +
  42713. + new_coord = &new_fsdata->dec.entry_coord;
  42714. + coord_clear_iplug(new_coord);
  42715. +
  42716. + is_dir = S_ISDIR(old_inode->i_mode);
  42717. +
  42718. + assert("nikita-3461", old_inode->i_nlink >= 1 + !!is_dir);
  42719. +
  42720. + /* if target is existing directory and it's not empty---return error.
  42721. +
  42722. + This check is done specifically, because is_dir_empty() requires
  42723. + tree traversal and have to be done before locks are taken.
  42724. + */
  42725. + if (is_dir && new_inode != NULL && is_dir_empty(new_inode) != 0)
  42726. + return RETERR(-ENOTEMPTY);
  42727. +
  42728. + result = can_rename(old_dir, old_inode, new_dir, new_inode);
  42729. + if (result != 0)
  42730. + goto exit;
  42731. +
  42732. + result = hashed_rename_estimate_and_grab(old_dir, old_name,
  42733. + new_dir, new_name);
  42734. + if (result != 0)
  42735. + goto exit;
  42736. +
  42737. + init_lh(&new_lh);
  42738. +
  42739. + /* find entry for @new_name */
  42740. + result = reiser4_find_entry(new_dir, new_name, &new_lh,
  42741. + ZNODE_WRITE_LOCK, &new_entry);
  42742. +
  42743. + if (IS_CBKERR(result)) {
  42744. + done_lh(&new_lh);
  42745. + goto exit;
  42746. + }
  42747. +
  42748. + reiser4_seal_done(&new_fsdata->dec.entry_seal);
  42749. +
  42750. + /* add or replace name for @old_inode as @new_name */
  42751. + if (new_inode != NULL) {
  42752. + /* target (@new_name) exists. */
  42753. + /* Not clear what to do with objects that are
  42754. + both directories and files at the same time. */
  42755. + if (result == CBK_COORD_FOUND) {
  42756. + result = replace_name(old_inode,
  42757. + new_dir,
  42758. + new_inode, new_coord, &new_lh);
  42759. + if (result == 0)
  42760. + fplug = inode_file_plugin(new_inode);
  42761. + } else if (result == CBK_COORD_NOTFOUND) {
  42762. + /* VFS told us that @new_name is bound to existing
  42763. + inode, but we failed to find directory entry. */
  42764. + warning("nikita-2324", "Target not found");
  42765. + result = RETERR(-ENOENT);
  42766. + }
  42767. + } else {
  42768. + /* target (@new_name) doesn't exists. */
  42769. + if (result == CBK_COORD_NOTFOUND)
  42770. + result = add_name(old_inode,
  42771. + new_dir,
  42772. + new_name, new_coord, &new_lh, is_dir);
  42773. + else if (result == CBK_COORD_FOUND) {
  42774. + /* VFS told us that @new_name is "negative" dentry,
  42775. + but we found directory entry. */
  42776. + warning("nikita-2331", "Target found unexpectedly");
  42777. + result = RETERR(-EIO);
  42778. + }
  42779. + }
  42780. +
  42781. + assert("nikita-3462", ergo(result == 0,
  42782. + old_inode->i_nlink >= 2 + !!is_dir));
  42783. +
  42784. + /* We are done with all modifications to the @new_dir, release lock on
  42785. + node. */
  42786. + done_lh(&new_lh);
  42787. +
  42788. + if (fplug != NULL) {
  42789. + /* detach @new_inode from name-space */
  42790. + result = fplug->detach(new_inode, new_dir);
  42791. + if (result != 0)
  42792. + warning("nikita-2330", "Cannot detach %lli: %i. %s",
  42793. + (unsigned long long)get_inode_oid(new_inode),
  42794. + result, possible_leak);
  42795. + }
  42796. +
  42797. + if (new_inode != NULL)
  42798. + reiser4_update_sd(new_inode);
  42799. +
  42800. + if (result == 0) {
  42801. + memset(&old_entry, 0, sizeof old_entry);
  42802. + old_entry.obj = old_inode;
  42803. +
  42804. + dplug->build_entry_key(old_dir,
  42805. + &old_name->d_name, &old_entry.key);
  42806. +
  42807. + /* At this stage new name was introduced for
  42808. + @old_inode. @old_inode, @new_dir, and @new_inode i_nlink
  42809. + counters were updated.
  42810. +
  42811. + We want to remove @old_name now. If @old_inode wasn't
  42812. + directory this is simple.
  42813. + */
  42814. + result = dplug->rem_entry(old_dir, old_name, &old_entry);
  42815. + /*result = rem_entry_hashed(old_dir, old_name, &old_entry); */
  42816. + if (result != 0 && result != -ENOMEM) {
  42817. + warning("nikita-2335",
  42818. + "Cannot remove old name: %i", result);
  42819. + } else {
  42820. + result = reiser4_del_nlink(old_inode, old_dir, 0);
  42821. + if (result != 0 && result != -ENOMEM) {
  42822. + warning("nikita-2337",
  42823. + "Cannot drop link on old: %i", result);
  42824. + }
  42825. + }
  42826. +
  42827. + if (result == 0 && is_dir) {
  42828. + /* @old_inode is directory. We also have to update
  42829. + dotdot entry. */
  42830. + coord_t *dotdot_coord;
  42831. + lock_handle dotdot_lh;
  42832. + struct dentry dotdot_name;
  42833. + reiser4_dir_entry_desc dotdot_entry;
  42834. + struct reiser4_dentry_fsdata dataonstack;
  42835. + struct reiser4_dentry_fsdata *fsdata;
  42836. +
  42837. + memset(&dataonstack, 0, sizeof dataonstack);
  42838. + memset(&dotdot_entry, 0, sizeof dotdot_entry);
  42839. + dotdot_entry.obj = old_dir;
  42840. + memset(&dotdot_name, 0, sizeof dotdot_name);
  42841. + dotdot_name.d_name.name = "..";
  42842. + dotdot_name.d_name.len = 2;
  42843. + /*
  42844. + * allocate ->d_fsdata on the stack to avoid using
  42845. + * reiser4_get_dentry_fsdata(). Locking is not needed,
  42846. + * because dentry is private to the current thread.
  42847. + */
  42848. + dotdot_name.d_fsdata = &dataonstack;
  42849. + init_lh(&dotdot_lh);
  42850. +
  42851. + fsdata = &dataonstack;
  42852. + dotdot_coord = &fsdata->dec.entry_coord;
  42853. + coord_clear_iplug(dotdot_coord);
  42854. +
  42855. + result = reiser4_find_entry(old_inode,
  42856. + &dotdot_name,
  42857. + &dotdot_lh,
  42858. + ZNODE_WRITE_LOCK,
  42859. + &dotdot_entry);
  42860. + if (result == 0) {
  42861. + /* replace_name() decreases i_nlink on
  42862. + * @old_dir */
  42863. + result = replace_name(new_dir,
  42864. + old_inode,
  42865. + old_dir,
  42866. + dotdot_coord, &dotdot_lh);
  42867. + } else
  42868. + result = RETERR(-EIO);
  42869. + done_lh(&dotdot_lh);
  42870. + }
  42871. + }
  42872. + reiser4_update_dir(new_dir);
  42873. + reiser4_update_dir(old_dir);
  42874. + reiser4_update_sd(old_inode);
  42875. + if (result == 0) {
  42876. + file_plugin *fplug;
  42877. +
  42878. + if (new_inode != NULL) {
  42879. + /* add safe-link for target file (in case we removed
  42880. + * last reference to the poor fellow */
  42881. + fplug = inode_file_plugin(new_inode);
  42882. + if (new_inode->i_nlink == 0)
  42883. + result = safe_link_add(new_inode, SAFE_UNLINK);
  42884. + }
  42885. + }
  42886. +exit:
  42887. + context_set_commit_async(ctx);
  42888. + reiser4_exit_context(ctx);
  42889. + return result;
  42890. +}
  42891. +#endif
  42892. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/item/acl.h linux-4.14.2/fs/reiser4/plugin/item/acl.h
  42893. --- linux-4.14.2.orig/fs/reiser4/plugin/item/acl.h 1970-01-01 01:00:00.000000000 +0100
  42894. +++ linux-4.14.2/fs/reiser4/plugin/item/acl.h 2017-11-26 22:13:09.000000000 +0100
  42895. @@ -0,0 +1,66 @@
  42896. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  42897. +
  42898. +/* Directory entry. */
  42899. +
  42900. +#if !defined( __FS_REISER4_PLUGIN_DIRECTORY_ENTRY_H__ )
  42901. +#define __FS_REISER4_PLUGIN_DIRECTORY_ENTRY_H__
  42902. +
  42903. +#include "../../forward.h"
  42904. +#include "../../dformat.h"
  42905. +#include "../../kassign.h"
  42906. +#include "../../key.h"
  42907. +
  42908. +#include <linux/fs.h>
  42909. +#include <linux/dcache.h> /* for struct dentry */
  42910. +
  42911. +typedef struct directory_entry_format {
  42912. + /* key of object stat-data. It's not necessary to store whole
  42913. + key here, because it's always key of stat-data, so minor
  42914. + packing locality and offset can be omitted here. But this
  42915. + relies on particular key allocation scheme for stat-data, so,
  42916. + for extensibility sake, whole key can be stored here.
  42917. +
  42918. + We store key as array of bytes, because we don't want 8-byte
  42919. + alignment of dir entries.
  42920. + */
  42921. + obj_key_id id;
  42922. + /* file name. Null terminated string. */
  42923. + d8 name[0];
  42924. +} directory_entry_format;
  42925. +
  42926. +void print_de(const char *prefix, coord_t * coord);
  42927. +int extract_key_de(const coord_t * coord, reiser4_key * key);
  42928. +int update_key_de(const coord_t * coord, const reiser4_key * key,
  42929. + lock_handle * lh);
  42930. +char *extract_name_de(const coord_t * coord, char *buf);
  42931. +unsigned extract_file_type_de(const coord_t * coord);
  42932. +int add_entry_de(struct inode *dir, coord_t * coord,
  42933. + lock_handle * lh, const struct dentry *name,
  42934. + reiser4_dir_entry_desc * entry);
  42935. +int rem_entry_de(struct inode *dir, const struct qstr *name, coord_t * coord,
  42936. + lock_handle * lh, reiser4_dir_entry_desc * entry);
  42937. +int max_name_len_de(const struct inode *dir);
  42938. +
  42939. +int de_rem_and_shrink(struct inode *dir, coord_t * coord, int length);
  42940. +
  42941. +char *extract_dent_name(const coord_t * coord,
  42942. + directory_entry_format * dent, char *buf);
  42943. +
  42944. +#if REISER4_LARGE_KEY
  42945. +#define DE_NAME_BUF_LEN (24)
  42946. +#else
  42947. +#define DE_NAME_BUF_LEN (16)
  42948. +#endif
  42949. +
  42950. +/* __FS_REISER4_PLUGIN_DIRECTORY_ENTRY_H__ */
  42951. +#endif
  42952. +
  42953. +/* Make Linus happy.
  42954. + Local variables:
  42955. + c-indentation-style: "K&R"
  42956. + mode-name: "LC"
  42957. + c-basic-offset: 8
  42958. + tab-width: 8
  42959. + fill-column: 120
  42960. + End:
  42961. +*/
  42962. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/item/blackbox.c linux-4.14.2/fs/reiser4/plugin/item/blackbox.c
  42963. --- linux-4.14.2.orig/fs/reiser4/plugin/item/blackbox.c 1970-01-01 01:00:00.000000000 +0100
  42964. +++ linux-4.14.2/fs/reiser4/plugin/item/blackbox.c 2017-11-26 22:13:09.000000000 +0100
  42965. @@ -0,0 +1,142 @@
  42966. +/* Copyright 2003 by Hans Reiser, licensing governed by
  42967. + * reiser4/README */
  42968. +
  42969. +/* Black box item implementation */
  42970. +
  42971. +#include "../../forward.h"
  42972. +#include "../../debug.h"
  42973. +#include "../../dformat.h"
  42974. +#include "../../kassign.h"
  42975. +#include "../../coord.h"
  42976. +#include "../../tree.h"
  42977. +#include "../../lock.h"
  42978. +
  42979. +#include "blackbox.h"
  42980. +#include "item.h"
  42981. +#include "../plugin.h"
  42982. +
  42983. +int
  42984. +store_black_box(reiser4_tree * tree,
  42985. + const reiser4_key * key, void *data, int length)
  42986. +{
  42987. + int result;
  42988. + reiser4_item_data idata;
  42989. + coord_t coord;
  42990. + lock_handle lh;
  42991. +
  42992. + memset(&idata, 0, sizeof idata);
  42993. +
  42994. + idata.data = data;
  42995. + idata.user = 0;
  42996. + idata.length = length;
  42997. + idata.iplug = item_plugin_by_id(BLACK_BOX_ID);
  42998. +
  42999. + init_lh(&lh);
  43000. + result = insert_by_key(tree, key,
  43001. + &idata, &coord, &lh, LEAF_LEVEL, CBK_UNIQUE);
  43002. +
  43003. + assert("nikita-3413",
  43004. + ergo(result == 0,
  43005. + WITH_COORD(&coord,
  43006. + item_length_by_coord(&coord) == length)));
  43007. +
  43008. + done_lh(&lh);
  43009. + return result;
  43010. +}
  43011. +
  43012. +int
  43013. +load_black_box(reiser4_tree * tree,
  43014. + reiser4_key * key, void *data, int length, int exact)
  43015. +{
  43016. + int result;
  43017. + coord_t coord;
  43018. + lock_handle lh;
  43019. +
  43020. + init_lh(&lh);
  43021. + result = coord_by_key(tree, key,
  43022. + &coord, &lh, ZNODE_READ_LOCK,
  43023. + exact ? FIND_EXACT : FIND_MAX_NOT_MORE_THAN,
  43024. + LEAF_LEVEL, LEAF_LEVEL, CBK_UNIQUE, NULL);
  43025. +
  43026. + if (result == 0) {
  43027. + int ilen;
  43028. +
  43029. + result = zload(coord.node);
  43030. + if (result == 0) {
  43031. + ilen = item_length_by_coord(&coord);
  43032. + if (ilen <= length) {
  43033. + memcpy(data, item_body_by_coord(&coord), ilen);
  43034. + unit_key_by_coord(&coord, key);
  43035. + } else if (exact) {
  43036. + /*
  43037. + * item is larger than buffer provided by the
  43038. + * user. Only issue a warning if @exact is
  43039. + * set. If @exact is false, we are iterating
  43040. + * over all safe-links and here we are reaching
  43041. + * the end of the iteration.
  43042. + */
  43043. + warning("nikita-3415",
  43044. + "Wrong black box length: %i > %i",
  43045. + ilen, length);
  43046. + result = RETERR(-EIO);
  43047. + }
  43048. + zrelse(coord.node);
  43049. + }
  43050. + }
  43051. +
  43052. + done_lh(&lh);
  43053. + return result;
  43054. +
  43055. +}
  43056. +
  43057. +int
  43058. +update_black_box(reiser4_tree * tree,
  43059. + const reiser4_key * key, void *data, int length)
  43060. +{
  43061. + int result;
  43062. + coord_t coord;
  43063. + lock_handle lh;
  43064. +
  43065. + init_lh(&lh);
  43066. + result = coord_by_key(tree, key,
  43067. + &coord, &lh, ZNODE_READ_LOCK,
  43068. + FIND_EXACT,
  43069. + LEAF_LEVEL, LEAF_LEVEL, CBK_UNIQUE, NULL);
  43070. + if (result == 0) {
  43071. + int ilen;
  43072. +
  43073. + result = zload(coord.node);
  43074. + if (result == 0) {
  43075. + ilen = item_length_by_coord(&coord);
  43076. + if (length <= ilen) {
  43077. + memcpy(item_body_by_coord(&coord), data,
  43078. + length);
  43079. + } else {
  43080. + warning("nikita-3437",
  43081. + "Wrong black box length: %i < %i",
  43082. + ilen, length);
  43083. + result = RETERR(-EIO);
  43084. + }
  43085. + zrelse(coord.node);
  43086. + }
  43087. + }
  43088. +
  43089. + done_lh(&lh);
  43090. + return result;
  43091. +
  43092. +}
  43093. +
  43094. +int kill_black_box(reiser4_tree * tree, const reiser4_key * key)
  43095. +{
  43096. + return reiser4_cut_tree(tree, key, key, NULL, 1);
  43097. +}
  43098. +
  43099. +/* Make Linus happy.
  43100. + Local variables:
  43101. + c-indentation-style: "K&R"
  43102. + mode-name: "LC"
  43103. + c-basic-offset: 8
  43104. + tab-width: 8
  43105. + fill-column: 120
  43106. + End:
  43107. +*/
  43108. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/item/blackbox.h linux-4.14.2/fs/reiser4/plugin/item/blackbox.h
  43109. --- linux-4.14.2.orig/fs/reiser4/plugin/item/blackbox.h 1970-01-01 01:00:00.000000000 +0100
  43110. +++ linux-4.14.2/fs/reiser4/plugin/item/blackbox.h 2017-11-26 22:13:09.000000000 +0100
  43111. @@ -0,0 +1,33 @@
  43112. +/* Copyright 2003 by Hans Reiser, licensing governed by
  43113. + * reiser4/README */
  43114. +
  43115. +/* "Black box" entry to fixed-width contain user supplied data */
  43116. +
  43117. +#if !defined( __FS_REISER4_BLACK_BOX_H__ )
  43118. +#define __FS_REISER4_BLACK_BOX_H__
  43119. +
  43120. +#include "../../forward.h"
  43121. +#include "../../dformat.h"
  43122. +#include "../../kassign.h"
  43123. +#include "../../key.h"
  43124. +
  43125. +extern int store_black_box(reiser4_tree * tree,
  43126. + const reiser4_key * key, void *data, int length);
  43127. +extern int load_black_box(reiser4_tree * tree,
  43128. + reiser4_key * key, void *data, int length, int exact);
  43129. +extern int kill_black_box(reiser4_tree * tree, const reiser4_key * key);
  43130. +extern int update_black_box(reiser4_tree * tree,
  43131. + const reiser4_key * key, void *data, int length);
  43132. +
  43133. +/* __FS_REISER4_BLACK_BOX_H__ */
  43134. +#endif
  43135. +
  43136. +/* Make Linus happy.
  43137. + Local variables:
  43138. + c-indentation-style: "K&R"
  43139. + mode-name: "LC"
  43140. + c-basic-offset: 8
  43141. + tab-width: 8
  43142. + fill-column: 120
  43143. + End:
  43144. +*/
  43145. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/item/cde.c linux-4.14.2/fs/reiser4/plugin/item/cde.c
  43146. --- linux-4.14.2.orig/fs/reiser4/plugin/item/cde.c 1970-01-01 01:00:00.000000000 +0100
  43147. +++ linux-4.14.2/fs/reiser4/plugin/item/cde.c 2017-11-26 22:13:09.000000000 +0100
  43148. @@ -0,0 +1,1004 @@
  43149. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  43150. +
  43151. +/* Directory entry implementation */
  43152. +
  43153. +/* DESCRIPTION:
  43154. +
  43155. + This is "compound" directory item plugin implementation. This directory
  43156. + item type is compound (as opposed to the "simple directory item" in
  43157. + fs/reiser4/plugin/item/sde.[ch]), because it consists of several directory
  43158. + entries.
  43159. +
  43160. + The reason behind this decision is disk space efficiency: all directory
  43161. + entries inside the same directory have identical fragment in their
  43162. + keys. This, of course, depends on key assignment policy. In our default key
  43163. + assignment policy, all directory entries have the same locality which is
  43164. + equal to the object id of their directory.
  43165. +
  43166. + Composing directory item out of several directory entries for the same
  43167. + directory allows us to store said key fragment only once. That is, this is
  43168. + some ad hoc form of key compression (stem compression) that is implemented
  43169. + here, because general key compression is not supposed to be implemented in
  43170. + v4.0.
  43171. +
  43172. + Another decision that was made regarding all directory item plugins, is
  43173. + that they will store entry keys unaligned. This is for that sake of disk
  43174. + space efficiency again.
  43175. +
  43176. + In should be noted, that storing keys unaligned increases CPU consumption,
  43177. + at least on some architectures.
  43178. +
  43179. + Internal on-disk structure of the compound directory item is the following:
  43180. +
  43181. + HEADER cde_item_format. Here number of entries is stored.
  43182. + ENTRY_HEADER_0 cde_unit_header. Here part of entry key and
  43183. + ENTRY_HEADER_1 offset of entry body are stored.
  43184. + ENTRY_HEADER_2 (basically two last parts of key)
  43185. + ...
  43186. + ENTRY_HEADER_N
  43187. + ENTRY_BODY_0 directory_entry_format. Here part of stat data key and
  43188. + ENTRY_BODY_1 NUL-terminated name are stored.
  43189. + ENTRY_BODY_2 (part of statadta key in the
  43190. + sence that since all SDs have
  43191. + zero offset, this offset is not
  43192. + stored on disk).
  43193. + ...
  43194. + ENTRY_BODY_N
  43195. +
  43196. + When it comes to the balancing, each directory entry in compound directory
  43197. + item is unit, that is, something that can be cut from one item and pasted
  43198. + into another item of the same type. Handling of unit cut and paste is major
  43199. + reason for the complexity of code below.
  43200. +
  43201. +*/
  43202. +
  43203. +#include "../../forward.h"
  43204. +#include "../../debug.h"
  43205. +#include "../../dformat.h"
  43206. +#include "../../kassign.h"
  43207. +#include "../../key.h"
  43208. +#include "../../coord.h"
  43209. +#include "sde.h"
  43210. +#include "cde.h"
  43211. +#include "item.h"
  43212. +#include "../node/node.h"
  43213. +#include "../plugin.h"
  43214. +#include "../../znode.h"
  43215. +#include "../../carry.h"
  43216. +#include "../../tree.h"
  43217. +#include "../../inode.h"
  43218. +
  43219. +#include <linux/fs.h> /* for struct inode */
  43220. +#include <linux/dcache.h> /* for struct dentry */
  43221. +
  43222. +#if 0
  43223. +#define CHECKME(coord) \
  43224. +({ \
  43225. + const char *message; \
  43226. + coord_t dup; \
  43227. + \
  43228. + coord_dup_nocheck(&dup, (coord)); \
  43229. + dup.unit_pos = 0; \
  43230. + assert("nikita-2871", cde_check(&dup, &message) == 0); \
  43231. +})
  43232. +#else
  43233. +#define CHECKME(coord) noop
  43234. +#endif
  43235. +
  43236. +/* return body of compound directory item at @coord */
  43237. +static inline cde_item_format *formatted_at(const coord_t * coord)
  43238. +{
  43239. + assert("nikita-1282", coord != NULL);
  43240. + return item_body_by_coord(coord);
  43241. +}
  43242. +
  43243. +/* return entry header at @coord */
  43244. +static inline cde_unit_header *header_at(const coord_t *
  43245. + coord /* coord of item */ ,
  43246. + int idx /* index of unit */ )
  43247. +{
  43248. + assert("nikita-1283", coord != NULL);
  43249. + return &formatted_at(coord)->entry[idx];
  43250. +}
  43251. +
  43252. +/* return number of units in compound directory item at @coord */
  43253. +static int units(const coord_t * coord /* coord of item */ )
  43254. +{
  43255. + return le16_to_cpu(get_unaligned(&formatted_at(coord)->num_of_entries));
  43256. +}
  43257. +
  43258. +/* return offset of the body of @idx-th entry in @coord */
  43259. +static unsigned int offset_of(const coord_t * coord /* coord of item */ ,
  43260. + int idx /* index of unit */ )
  43261. +{
  43262. + if (idx < units(coord))
  43263. + return le16_to_cpu(get_unaligned(&header_at(coord, idx)->offset));
  43264. + else if (idx == units(coord))
  43265. + return item_length_by_coord(coord);
  43266. + else
  43267. + impossible("nikita-1308", "Wrong idx");
  43268. + return 0;
  43269. +}
  43270. +
  43271. +/* set offset of the body of @idx-th entry in @coord */
  43272. +static void set_offset(const coord_t * coord /* coord of item */ ,
  43273. + int idx /* index of unit */ ,
  43274. + unsigned int offset /* new offset */ )
  43275. +{
  43276. + put_unaligned(cpu_to_le16((__u16) offset), &header_at(coord, idx)->offset);
  43277. +}
  43278. +
  43279. +static void adj_offset(const coord_t * coord /* coord of item */ ,
  43280. + int idx /* index of unit */ ,
  43281. + int delta /* offset change */ )
  43282. +{
  43283. + d16 *doffset;
  43284. + __u16 offset;
  43285. +
  43286. + doffset = &header_at(coord, idx)->offset;
  43287. + offset = le16_to_cpu(get_unaligned(doffset));
  43288. + offset += delta;
  43289. + put_unaligned(cpu_to_le16((__u16) offset), doffset);
  43290. +}
  43291. +
  43292. +/* return pointer to @offset-th byte from the beginning of @coord */
  43293. +static char *address(const coord_t * coord /* coord of item */ ,
  43294. + int offset)
  43295. +{
  43296. + return ((char *)item_body_by_coord(coord)) + offset;
  43297. +}
  43298. +
  43299. +/* return pointer to the body of @idx-th entry in @coord */
  43300. +static directory_entry_format *entry_at(const coord_t * coord /* coord of
  43301. + * item */ ,
  43302. + int idx /* index of unit */ )
  43303. +{
  43304. + return (directory_entry_format *) address(coord,
  43305. + (int)offset_of(coord, idx));
  43306. +}
  43307. +
  43308. +/* return number of unit referenced by @coord */
  43309. +static int idx_of(const coord_t * coord /* coord of item */ )
  43310. +{
  43311. + assert("nikita-1285", coord != NULL);
  43312. + return coord->unit_pos;
  43313. +}
  43314. +
  43315. +/* find position where entry with @entry_key would be inserted into @coord */
  43316. +static int find(const coord_t * coord /* coord of item */ ,
  43317. + const reiser4_key * entry_key /* key to look for */ ,
  43318. + cmp_t * last /* result of last comparison */ )
  43319. +{
  43320. + int entries;
  43321. +
  43322. + int left;
  43323. + int right;
  43324. +
  43325. + cde_unit_header *header;
  43326. +
  43327. + assert("nikita-1295", coord != NULL);
  43328. + assert("nikita-1296", entry_key != NULL);
  43329. + assert("nikita-1297", last != NULL);
  43330. +
  43331. + entries = units(coord);
  43332. + left = 0;
  43333. + right = entries - 1;
  43334. + while (right - left >= REISER4_SEQ_SEARCH_BREAK) {
  43335. + int median;
  43336. +
  43337. + median = (left + right) >> 1;
  43338. +
  43339. + header = header_at(coord, median);
  43340. + *last = de_id_key_cmp(&header->hash, entry_key);
  43341. + switch (*last) {
  43342. + case LESS_THAN:
  43343. + left = median;
  43344. + break;
  43345. + case GREATER_THAN:
  43346. + right = median;
  43347. + break;
  43348. + case EQUAL_TO:{
  43349. + do {
  43350. + median--;
  43351. + header--;
  43352. + } while (median >= 0 &&
  43353. + de_id_key_cmp(&header->hash,
  43354. + entry_key) == EQUAL_TO);
  43355. + return median + 1;
  43356. + }
  43357. + }
  43358. + }
  43359. + header = header_at(coord, left);
  43360. + for (; left < entries; ++left, ++header) {
  43361. + prefetch(header + 1);
  43362. + *last = de_id_key_cmp(&header->hash, entry_key);
  43363. + if (*last != LESS_THAN)
  43364. + break;
  43365. + }
  43366. + if (left < entries)
  43367. + return left;
  43368. + else
  43369. + return RETERR(-ENOENT);
  43370. +
  43371. +}
  43372. +
  43373. +/* expand @coord as to accommodate for insertion of @no new entries starting
  43374. + from @pos, with total bodies size @size. */
  43375. +static int expand_item(const coord_t * coord /* coord of item */ ,
  43376. + int pos /* unit position */ , int no /* number of new
  43377. + * units*/ ,
  43378. + int size /* total size of new units' data */ ,
  43379. + unsigned int data_size /* free space already reserved
  43380. + * in the item for insertion */ )
  43381. +{
  43382. + int entries;
  43383. + cde_unit_header *header;
  43384. + char *dent;
  43385. + int i;
  43386. +
  43387. + assert("nikita-1310", coord != NULL);
  43388. + assert("nikita-1311", pos >= 0);
  43389. + assert("nikita-1312", no > 0);
  43390. + assert("nikita-1313", data_size >= no * sizeof(directory_entry_format));
  43391. + assert("nikita-1343",
  43392. + item_length_by_coord(coord) >=
  43393. + (int)(size + data_size + no * sizeof *header));
  43394. +
  43395. + entries = units(coord);
  43396. +
  43397. + if (pos == entries)
  43398. + dent = address(coord, size);
  43399. + else
  43400. + dent = (char *)entry_at(coord, pos);
  43401. + /* place where new header will be in */
  43402. + header = header_at(coord, pos);
  43403. + /* free space for new entry headers */
  43404. + memmove(header + no, header,
  43405. + (unsigned)(address(coord, size) - (char *)header));
  43406. + /* if adding to the end initialise first new header */
  43407. + if (pos == entries) {
  43408. + set_offset(coord, pos, (unsigned)size);
  43409. + }
  43410. +
  43411. + /* adjust entry pointer and size */
  43412. + dent = dent + no * sizeof *header;
  43413. + size += no * sizeof *header;
  43414. + /* free space for new entries */
  43415. + memmove(dent + data_size, dent,
  43416. + (unsigned)(address(coord, size) - dent));
  43417. +
  43418. + /* increase counter */
  43419. + entries += no;
  43420. + put_unaligned(cpu_to_le16((__u16) entries), &formatted_at(coord)->num_of_entries);
  43421. +
  43422. + /* [ 0 ... pos ] entries were shifted by no * ( sizeof *header )
  43423. + bytes. */
  43424. + for (i = 0; i <= pos; ++i)
  43425. + adj_offset(coord, i, no * sizeof *header);
  43426. + /* [ pos + no ... +\infty ) entries were shifted by ( no *
  43427. + sizeof *header + data_size ) bytes */
  43428. + for (i = pos + no; i < entries; ++i)
  43429. + adj_offset(coord, i, no * sizeof *header + data_size);
  43430. + return 0;
  43431. +}
  43432. +
  43433. +/* insert new @entry into item */
  43434. +static int expand(const coord_t * coord /* coord of item */ ,
  43435. + struct cde_entry * entry /* entry to insert */ ,
  43436. + int len /* length of @entry data */ ,
  43437. + int *pos /* position to insert */ ,
  43438. + reiser4_dir_entry_desc * dir_entry /* parameters for new
  43439. + * entry */ )
  43440. +{
  43441. + cmp_t cmp_res;
  43442. + int datasize;
  43443. +
  43444. + *pos = find(coord, &dir_entry->key, &cmp_res);
  43445. + if (*pos < 0)
  43446. + *pos = units(coord);
  43447. +
  43448. + datasize = sizeof(directory_entry_format);
  43449. + if (is_longname(entry->name->name, entry->name->len))
  43450. + datasize += entry->name->len + 1;
  43451. +
  43452. + expand_item(coord, *pos, 1, item_length_by_coord(coord) - len,
  43453. + datasize);
  43454. + return 0;
  43455. +}
  43456. +
  43457. +/* paste body of @entry into item */
  43458. +static int paste_entry(const coord_t * coord /* coord of item */ ,
  43459. + struct cde_entry * entry /* new entry */ ,
  43460. + int pos /* position to insert */ ,
  43461. + reiser4_dir_entry_desc * dir_entry /* parameters for
  43462. + * new entry */ )
  43463. +{
  43464. + cde_unit_header *header;
  43465. + directory_entry_format *dent;
  43466. + const char *name;
  43467. + int len;
  43468. +
  43469. + header = header_at(coord, pos);
  43470. + dent = entry_at(coord, pos);
  43471. +
  43472. + build_de_id_by_key(&dir_entry->key, &header->hash);
  43473. + build_inode_key_id(entry->obj, &dent->id);
  43474. + /* AUDIT unsafe strcpy() operation! It should be replaced with
  43475. + much less CPU hungry
  43476. + memcpy( ( char * ) dent -> name, entry -> name -> name , entry -> name -> len );
  43477. +
  43478. + Also a more major thing is that there should be a way to figure out
  43479. + amount of space in dent -> name and be able to check that we are
  43480. + not going to overwrite more than we supposed to */
  43481. + name = entry->name->name;
  43482. + len = entry->name->len;
  43483. + if (is_longname(name, len)) {
  43484. + strcpy((unsigned char *)dent->name, name);
  43485. + put_unaligned(0, &dent->name[len]);
  43486. + }
  43487. + return 0;
  43488. +}
  43489. +
  43490. +/* estimate how much space is necessary in item to insert/paste set of entries
  43491. + described in @data. */
  43492. +int estimate_cde(const coord_t * coord /* coord of item */ ,
  43493. + const reiser4_item_data * data /* parameters for new item */ )
  43494. +{
  43495. + struct cde_entry_data *e;
  43496. + int result;
  43497. + int i;
  43498. +
  43499. + e = (struct cde_entry_data *) data->data;
  43500. +
  43501. + assert("nikita-1288", e != NULL);
  43502. + assert("nikita-1289", e->num_of_entries >= 0);
  43503. +
  43504. + if (coord == NULL)
  43505. + /* insert */
  43506. + result = sizeof(cde_item_format);
  43507. + else
  43508. + /* paste */
  43509. + result = 0;
  43510. +
  43511. + result += e->num_of_entries *
  43512. + (sizeof(cde_unit_header) + sizeof(directory_entry_format));
  43513. + for (i = 0; i < e->num_of_entries; ++i) {
  43514. + const char *name;
  43515. + int len;
  43516. +
  43517. + name = e->entry[i].name->name;
  43518. + len = e->entry[i].name->len;
  43519. + assert("nikita-2054", strlen(name) == len);
  43520. + if (is_longname(name, len))
  43521. + result += len + 1;
  43522. + }
  43523. + ((reiser4_item_data *) data)->length = result;
  43524. + return result;
  43525. +}
  43526. +
  43527. +/* ->nr_units() method for this item plugin. */
  43528. +pos_in_node_t nr_units_cde(const coord_t * coord /* coord of item */ )
  43529. +{
  43530. + return units(coord);
  43531. +}
  43532. +
  43533. +/* ->unit_key() method for this item plugin. */
  43534. +reiser4_key *unit_key_cde(const coord_t * coord /* coord of item */ ,
  43535. + reiser4_key * key /* resulting key */ )
  43536. +{
  43537. + assert("nikita-1452", coord != NULL);
  43538. + assert("nikita-1345", idx_of(coord) < units(coord));
  43539. + assert("nikita-1346", key != NULL);
  43540. +
  43541. + item_key_by_coord(coord, key);
  43542. + extract_key_from_de_id(extract_dir_id_from_key(key),
  43543. + &header_at(coord, idx_of(coord))->hash, key);
  43544. + return key;
  43545. +}
  43546. +
  43547. +/* mergeable_cde(): implementation of ->mergeable() item method.
  43548. +
  43549. + Two directory items are mergeable iff they are from the same
  43550. + directory. That simple.
  43551. +
  43552. +*/
  43553. +int mergeable_cde(const coord_t * p1 /* coord of first item */ ,
  43554. + const coord_t * p2 /* coord of second item */ )
  43555. +{
  43556. + reiser4_key k1;
  43557. + reiser4_key k2;
  43558. +
  43559. + assert("nikita-1339", p1 != NULL);
  43560. + assert("nikita-1340", p2 != NULL);
  43561. +
  43562. + return
  43563. + (item_plugin_by_coord(p1) == item_plugin_by_coord(p2)) &&
  43564. + (extract_dir_id_from_key(item_key_by_coord(p1, &k1)) ==
  43565. + extract_dir_id_from_key(item_key_by_coord(p2, &k2)));
  43566. +
  43567. +}
  43568. +
  43569. +/* ->max_key_inside() method for this item plugin. */
  43570. +reiser4_key *max_key_inside_cde(const coord_t * coord /* coord of item */ ,
  43571. + reiser4_key * result /* resulting key */ )
  43572. +{
  43573. + assert("nikita-1342", coord != NULL);
  43574. +
  43575. + item_key_by_coord(coord, result);
  43576. + set_key_ordering(result, get_key_ordering(reiser4_max_key()));
  43577. + set_key_fulloid(result, get_key_fulloid(reiser4_max_key()));
  43578. + set_key_offset(result, get_key_offset(reiser4_max_key()));
  43579. + return result;
  43580. +}
  43581. +
  43582. +/* @data contains data which are to be put into tree */
  43583. +int can_contain_key_cde(const coord_t * coord /* coord of item */ ,
  43584. + const reiser4_key * key /* key to check */ ,
  43585. + const reiser4_item_data * data /* parameters of new
  43586. + * item/unit being
  43587. + * created */ )
  43588. +{
  43589. + reiser4_key item_key;
  43590. +
  43591. + /* FIXME-VS: do not rely on anything but iplug field of @data. Only
  43592. + data->iplug is initialized */
  43593. + assert("vs-457", data && data->iplug);
  43594. +/* assert( "vs-553", data -> user == 0 );*/
  43595. + item_key_by_coord(coord, &item_key);
  43596. +
  43597. + return (item_plugin_by_coord(coord) == data->iplug) &&
  43598. + (extract_dir_id_from_key(&item_key) ==
  43599. + extract_dir_id_from_key(key));
  43600. +}
  43601. +
  43602. +#if REISER4_DEBUG
  43603. +/* cde_check ->check() method for compressed directory items
  43604. +
  43605. + used for debugging, every item should have here the most complete
  43606. + possible check of the consistency of the item that the inventor can
  43607. + construct
  43608. +*/
  43609. +int reiser4_check_cde(const coord_t * coord /* coord of item to check */,
  43610. + const char **error /* where to store error message */)
  43611. +{
  43612. + int i;
  43613. + int result;
  43614. + char *item_start;
  43615. + char *item_end;
  43616. + reiser4_key key;
  43617. +
  43618. + coord_t c;
  43619. +
  43620. + assert("nikita-1357", coord != NULL);
  43621. + assert("nikita-1358", error != NULL);
  43622. +
  43623. + if (!ergo(coord->item_pos != 0,
  43624. + is_dot_key(item_key_by_coord(coord, &key)))) {
  43625. + *error = "CDE doesn't start with dot";
  43626. + return -1;
  43627. + }
  43628. + item_start = item_body_by_coord(coord);
  43629. + item_end = item_start + item_length_by_coord(coord);
  43630. +
  43631. + coord_dup(&c, coord);
  43632. + result = 0;
  43633. + for (i = 0; i < units(coord); ++i) {
  43634. + directory_entry_format *entry;
  43635. +
  43636. + if ((char *)(header_at(coord, i) + 1) >
  43637. + item_end - units(coord) * sizeof *entry) {
  43638. + *error = "CDE header is out of bounds";
  43639. + result = -1;
  43640. + break;
  43641. + }
  43642. + entry = entry_at(coord, i);
  43643. + if ((char *)entry < item_start + sizeof(cde_item_format)) {
  43644. + *error = "CDE header is too low";
  43645. + result = -1;
  43646. + break;
  43647. + }
  43648. + if ((char *)(entry + 1) > item_end) {
  43649. + *error = "CDE header is too high";
  43650. + result = -1;
  43651. + break;
  43652. + }
  43653. + }
  43654. +
  43655. + return result;
  43656. +}
  43657. +#endif
  43658. +
  43659. +/* ->init() method for this item plugin. */
  43660. +int init_cde(coord_t * coord /* coord of item */ ,
  43661. + coord_t * from UNUSED_ARG, reiser4_item_data * data /* structure used for insertion */
  43662. + UNUSED_ARG)
  43663. +{
  43664. + put_unaligned(cpu_to_le16(0), &formatted_at(coord)->num_of_entries);
  43665. + return 0;
  43666. +}
  43667. +
  43668. +/* ->lookup() method for this item plugin. */
  43669. +lookup_result lookup_cde(const reiser4_key * key /* key to search for */ ,
  43670. + lookup_bias bias /* search bias */ ,
  43671. + coord_t * coord /* coord of item to lookup in */ )
  43672. +{
  43673. + cmp_t last_comp;
  43674. + int pos;
  43675. +
  43676. + reiser4_key utmost_key;
  43677. +
  43678. + assert("nikita-1293", coord != NULL);
  43679. + assert("nikita-1294", key != NULL);
  43680. +
  43681. + CHECKME(coord);
  43682. +
  43683. + if (keygt(item_key_by_coord(coord, &utmost_key), key)) {
  43684. + coord->unit_pos = 0;
  43685. + coord->between = BEFORE_UNIT;
  43686. + return CBK_COORD_NOTFOUND;
  43687. + }
  43688. + pos = find(coord, key, &last_comp);
  43689. + if (pos >= 0) {
  43690. + coord->unit_pos = (int)pos;
  43691. + switch (last_comp) {
  43692. + case EQUAL_TO:
  43693. + coord->between = AT_UNIT;
  43694. + return CBK_COORD_FOUND;
  43695. + case GREATER_THAN:
  43696. + coord->between = BEFORE_UNIT;
  43697. + return RETERR(-ENOENT);
  43698. + case LESS_THAN:
  43699. + default:
  43700. + impossible("nikita-1298", "Broken find");
  43701. + return RETERR(-EIO);
  43702. + }
  43703. + } else {
  43704. + coord->unit_pos = units(coord) - 1;
  43705. + coord->between = AFTER_UNIT;
  43706. + return (bias ==
  43707. + FIND_MAX_NOT_MORE_THAN) ? CBK_COORD_FOUND :
  43708. + CBK_COORD_NOTFOUND;
  43709. + }
  43710. +}
  43711. +
  43712. +/* ->paste() method for this item plugin. */
  43713. +int paste_cde(coord_t * coord /* coord of item */ ,
  43714. + reiser4_item_data * data /* parameters of new unit being
  43715. + * inserted */ ,
  43716. + carry_plugin_info * info UNUSED_ARG /* todo carry queue */ )
  43717. +{
  43718. + struct cde_entry_data *e;
  43719. + int result;
  43720. + int i;
  43721. +
  43722. + CHECKME(coord);
  43723. + e = (struct cde_entry_data *) data->data;
  43724. +
  43725. + result = 0;
  43726. + for (i = 0; i < e->num_of_entries; ++i) {
  43727. + int pos;
  43728. + int phantom_size;
  43729. +
  43730. + phantom_size = data->length;
  43731. + if (units(coord) == 0)
  43732. + phantom_size -= sizeof(cde_item_format);
  43733. +
  43734. + result =
  43735. + expand(coord, e->entry + i, phantom_size, &pos, data->arg);
  43736. + if (result != 0)
  43737. + break;
  43738. + result = paste_entry(coord, e->entry + i, pos, data->arg);
  43739. + if (result != 0)
  43740. + break;
  43741. + }
  43742. + CHECKME(coord);
  43743. + return result;
  43744. +}
  43745. +
  43746. +/* amount of space occupied by all entries starting from @idx both headers and
  43747. + bodies. */
  43748. +static unsigned int part_size(const coord_t * coord /* coord of item */ ,
  43749. + int idx /* index of unit */ )
  43750. +{
  43751. + assert("nikita-1299", coord != NULL);
  43752. + assert("nikita-1300", idx < (int)units(coord));
  43753. +
  43754. + return sizeof(cde_item_format) +
  43755. + (idx + 1) * sizeof(cde_unit_header) + offset_of(coord,
  43756. + idx + 1) -
  43757. + offset_of(coord, 0);
  43758. +}
  43759. +
  43760. +/* how many but not more than @want units of @source can be merged with
  43761. + item in @target node. If pend == append - we try to append last item
  43762. + of @target by first units of @source. If pend == prepend - we try to
  43763. + "prepend" first item in @target by last units of @source. @target
  43764. + node has @free_space bytes of free space. Total size of those units
  43765. + are returned via @size */
  43766. +int can_shift_cde(unsigned free_space /* free space in item */ ,
  43767. + coord_t * coord /* coord of source item */ ,
  43768. + znode * target /* target node */ ,
  43769. + shift_direction pend /* shift direction */ ,
  43770. + unsigned *size /* resulting number of shifted bytes */ ,
  43771. + unsigned want /* maximal number of bytes to shift */ )
  43772. +{
  43773. + int shift;
  43774. +
  43775. + CHECKME(coord);
  43776. + if (want == 0) {
  43777. + *size = 0;
  43778. + return 0;
  43779. + }
  43780. +
  43781. + /* pend == SHIFT_LEFT <==> shifting to the left */
  43782. + if (pend == SHIFT_LEFT) {
  43783. + for (shift = min((int)want - 1, units(coord)); shift >= 0;
  43784. + --shift) {
  43785. + *size = part_size(coord, shift);
  43786. + if (target != NULL)
  43787. + *size -= sizeof(cde_item_format);
  43788. + if (*size <= free_space)
  43789. + break;
  43790. + }
  43791. + shift = shift + 1;
  43792. + } else {
  43793. + int total_size;
  43794. +
  43795. + assert("nikita-1301", pend == SHIFT_RIGHT);
  43796. +
  43797. + total_size = item_length_by_coord(coord);
  43798. + for (shift = units(coord) - want - 1; shift < units(coord) - 1;
  43799. + ++shift) {
  43800. + *size = total_size - part_size(coord, shift);
  43801. + if (target == NULL)
  43802. + *size += sizeof(cde_item_format);
  43803. + if (*size <= free_space)
  43804. + break;
  43805. + }
  43806. + shift = units(coord) - shift - 1;
  43807. + }
  43808. + if (shift == 0)
  43809. + *size = 0;
  43810. + CHECKME(coord);
  43811. + return shift;
  43812. +}
  43813. +
  43814. +/* ->copy_units() method for this item plugin. */
  43815. +void copy_units_cde(coord_t * target /* coord of target item */ ,
  43816. + coord_t * source /* coord of source item */ ,
  43817. + unsigned from /* starting unit */ ,
  43818. + unsigned count /* how many units to copy */ ,
  43819. + shift_direction where_is_free_space /* shift direction */ ,
  43820. + unsigned free_space /* free space in item */ )
  43821. +{
  43822. + char *header_from;
  43823. + char *header_to;
  43824. +
  43825. + char *entry_from;
  43826. + char *entry_to;
  43827. +
  43828. + int pos_in_target;
  43829. + int data_size;
  43830. + int data_delta;
  43831. + int i;
  43832. +
  43833. + assert("nikita-1303", target != NULL);
  43834. + assert("nikita-1304", source != NULL);
  43835. + assert("nikita-1305", (int)from < units(source));
  43836. + assert("nikita-1307", (int)(from + count) <= units(source));
  43837. +
  43838. + if (where_is_free_space == SHIFT_LEFT) {
  43839. + assert("nikita-1453", from == 0);
  43840. + pos_in_target = units(target);
  43841. + } else {
  43842. + assert("nikita-1309", (int)(from + count) == units(source));
  43843. + pos_in_target = 0;
  43844. + memmove(item_body_by_coord(target),
  43845. + (char *)item_body_by_coord(target) + free_space,
  43846. + item_length_by_coord(target) - free_space);
  43847. + }
  43848. +
  43849. + CHECKME(target);
  43850. + CHECKME(source);
  43851. +
  43852. + /* expand @target */
  43853. + data_size =
  43854. + offset_of(source, (int)(from + count)) - offset_of(source,
  43855. + (int)from);
  43856. +
  43857. + if (units(target) == 0)
  43858. + free_space -= sizeof(cde_item_format);
  43859. +
  43860. + expand_item(target, pos_in_target, (int)count,
  43861. + (int)(item_length_by_coord(target) - free_space),
  43862. + (unsigned)data_size);
  43863. +
  43864. + /* copy first @count units of @source into @target */
  43865. + data_delta =
  43866. + offset_of(target, pos_in_target) - offset_of(source, (int)from);
  43867. +
  43868. + /* copy entries */
  43869. + entry_from = (char *)entry_at(source, (int)from);
  43870. + entry_to = (char *)entry_at(source, (int)(from + count));
  43871. + memmove(entry_at(target, pos_in_target), entry_from,
  43872. + (unsigned)(entry_to - entry_from));
  43873. +
  43874. + /* copy headers */
  43875. + header_from = (char *)header_at(source, (int)from);
  43876. + header_to = (char *)header_at(source, (int)(from + count));
  43877. + memmove(header_at(target, pos_in_target), header_from,
  43878. + (unsigned)(header_to - header_from));
  43879. +
  43880. + /* update offsets */
  43881. + for (i = pos_in_target; i < (int)(pos_in_target + count); ++i)
  43882. + adj_offset(target, i, data_delta);
  43883. + CHECKME(target);
  43884. + CHECKME(source);
  43885. +}
  43886. +
  43887. +/* ->cut_units() method for this item plugin. */
  43888. +int cut_units_cde(coord_t * coord /* coord of item */ ,
  43889. + pos_in_node_t from /* start unit pos */ ,
  43890. + pos_in_node_t to /* stop unit pos */ ,
  43891. + struct carry_cut_data *cdata UNUSED_ARG,
  43892. + reiser4_key * smallest_removed, reiser4_key * new_first)
  43893. +{
  43894. + char *header_from;
  43895. + char *header_to;
  43896. +
  43897. + char *entry_from;
  43898. + char *entry_to;
  43899. +
  43900. + int size;
  43901. + int entry_delta;
  43902. + int header_delta;
  43903. + int i;
  43904. +
  43905. + unsigned count;
  43906. +
  43907. + CHECKME(coord);
  43908. +
  43909. + count = to - from + 1;
  43910. +
  43911. + assert("nikita-1454", coord != NULL);
  43912. + assert("nikita-1455", (int)(from + count) <= units(coord));
  43913. +
  43914. + if (smallest_removed)
  43915. + unit_key_by_coord(coord, smallest_removed);
  43916. +
  43917. + if (new_first) {
  43918. + coord_t next;
  43919. +
  43920. + /* not everything is cut from item head */
  43921. + assert("vs-1527", from == 0);
  43922. + assert("vs-1528", to < units(coord) - 1);
  43923. +
  43924. + coord_dup(&next, coord);
  43925. + next.unit_pos++;
  43926. + unit_key_by_coord(&next, new_first);
  43927. + }
  43928. +
  43929. + size = item_length_by_coord(coord);
  43930. + if (count == (unsigned)units(coord)) {
  43931. + return size;
  43932. + }
  43933. +
  43934. + header_from = (char *)header_at(coord, (int)from);
  43935. + header_to = (char *)header_at(coord, (int)(from + count));
  43936. +
  43937. + entry_from = (char *)entry_at(coord, (int)from);
  43938. + entry_to = (char *)entry_at(coord, (int)(from + count));
  43939. +
  43940. + /* move headers */
  43941. + memmove(header_from, header_to,
  43942. + (unsigned)(address(coord, size) - header_to));
  43943. +
  43944. + header_delta = header_to - header_from;
  43945. +
  43946. + entry_from -= header_delta;
  43947. + entry_to -= header_delta;
  43948. + size -= header_delta;
  43949. +
  43950. + /* copy entries */
  43951. + memmove(entry_from, entry_to,
  43952. + (unsigned)(address(coord, size) - entry_to));
  43953. +
  43954. + entry_delta = entry_to - entry_from;
  43955. + size -= entry_delta;
  43956. +
  43957. + /* update offsets */
  43958. +
  43959. + for (i = 0; i < (int)from; ++i)
  43960. + adj_offset(coord, i, -header_delta);
  43961. +
  43962. + for (i = from; i < units(coord) - (int)count; ++i)
  43963. + adj_offset(coord, i, -header_delta - entry_delta);
  43964. +
  43965. + put_unaligned(cpu_to_le16((__u16) units(coord) - count),
  43966. + &formatted_at(coord)->num_of_entries);
  43967. +
  43968. + if (from == 0) {
  43969. + /* entries from head was removed - move remaining to right */
  43970. + memmove((char *)item_body_by_coord(coord) +
  43971. + header_delta + entry_delta, item_body_by_coord(coord),
  43972. + (unsigned)size);
  43973. + if (REISER4_DEBUG)
  43974. + memset(item_body_by_coord(coord), 0,
  43975. + (unsigned)header_delta + entry_delta);
  43976. + } else {
  43977. + /* freed space is already at the end of item */
  43978. + if (REISER4_DEBUG)
  43979. + memset((char *)item_body_by_coord(coord) + size, 0,
  43980. + (unsigned)header_delta + entry_delta);
  43981. + }
  43982. +
  43983. + return header_delta + entry_delta;
  43984. +}
  43985. +
  43986. +int kill_units_cde(coord_t * coord /* coord of item */ ,
  43987. + pos_in_node_t from /* start unit pos */ ,
  43988. + pos_in_node_t to /* stop unit pos */ ,
  43989. + struct carry_kill_data *kdata UNUSED_ARG,
  43990. + reiser4_key * smallest_removed, reiser4_key * new_first)
  43991. +{
  43992. + return cut_units_cde(coord, from, to, NULL, smallest_removed, new_first);
  43993. +}
  43994. +
  43995. +/* ->s.dir.extract_key() method for this item plugin. */
  43996. +int extract_key_cde(const coord_t * coord /* coord of item */ ,
  43997. + reiser4_key * key /* resulting key */ )
  43998. +{
  43999. + directory_entry_format *dent;
  44000. +
  44001. + assert("nikita-1155", coord != NULL);
  44002. + assert("nikita-1156", key != NULL);
  44003. +
  44004. + dent = entry_at(coord, idx_of(coord));
  44005. + return extract_key_from_id(&dent->id, key);
  44006. +}
  44007. +
  44008. +int
  44009. +update_key_cde(const coord_t * coord, const reiser4_key * key,
  44010. + lock_handle * lh UNUSED_ARG)
  44011. +{
  44012. + directory_entry_format *dent;
  44013. + obj_key_id obj_id;
  44014. + int result;
  44015. +
  44016. + assert("nikita-2344", coord != NULL);
  44017. + assert("nikita-2345", key != NULL);
  44018. +
  44019. + dent = entry_at(coord, idx_of(coord));
  44020. + result = build_obj_key_id(key, &obj_id);
  44021. + if (result == 0) {
  44022. + dent->id = obj_id;
  44023. + znode_make_dirty(coord->node);
  44024. + }
  44025. + return 0;
  44026. +}
  44027. +
  44028. +/* ->s.dir.extract_name() method for this item plugin. */
  44029. +char *extract_name_cde(const coord_t * coord /* coord of item */ , char *buf)
  44030. +{
  44031. + directory_entry_format *dent;
  44032. +
  44033. + assert("nikita-1157", coord != NULL);
  44034. +
  44035. + dent = entry_at(coord, idx_of(coord));
  44036. + return extract_dent_name(coord, dent, buf);
  44037. +}
  44038. +
  44039. +static int cde_bytes(int pasting, const reiser4_item_data * data)
  44040. +{
  44041. + int result;
  44042. +
  44043. + result = data->length;
  44044. + if (!pasting)
  44045. + result -= sizeof(cde_item_format);
  44046. + return result;
  44047. +}
  44048. +
  44049. +/* ->s.dir.add_entry() method for this item plugin */
  44050. +int add_entry_cde(struct inode *dir /* directory object */ ,
  44051. + coord_t * coord /* coord of item */ ,
  44052. + lock_handle * lh /* lock handle for insertion */ ,
  44053. + const struct dentry *name /* name to insert */ ,
  44054. + reiser4_dir_entry_desc * dir_entry /* parameters of new
  44055. + * directory entry */ )
  44056. +{
  44057. + reiser4_item_data data;
  44058. + struct cde_entry entry;
  44059. + struct cde_entry_data edata;
  44060. + int result;
  44061. +
  44062. + assert("nikita-1656", coord->node == lh->node);
  44063. + assert("nikita-1657", znode_is_write_locked(coord->node));
  44064. +
  44065. + edata.num_of_entries = 1;
  44066. + edata.entry = &entry;
  44067. +
  44068. + entry.dir = dir;
  44069. + entry.obj = dir_entry->obj;
  44070. + entry.name = &name->d_name;
  44071. +
  44072. + data.data = (char *)&edata;
  44073. + data.user = 0; /* &edata is not user space */
  44074. + data.iplug = item_plugin_by_id(COMPOUND_DIR_ID);
  44075. + data.arg = dir_entry;
  44076. + assert("nikita-1302", data.iplug != NULL);
  44077. +
  44078. + result = is_dot_key(&dir_entry->key);
  44079. + data.length = estimate_cde(result ? coord : NULL, &data);
  44080. +
  44081. + inode_add_bytes(dir, cde_bytes(result, &data));
  44082. +
  44083. + if (result)
  44084. + result = insert_by_coord(coord, &data, &dir_entry->key, lh, 0);
  44085. + else
  44086. + result = reiser4_resize_item(coord, &data, &dir_entry->key,
  44087. + lh, 0);
  44088. + return result;
  44089. +}
  44090. +
  44091. +/* ->s.dir.rem_entry() */
  44092. +int rem_entry_cde(struct inode *dir /* directory of item */ ,
  44093. + const struct qstr *name, coord_t * coord /* coord of item */ ,
  44094. + lock_handle * lh UNUSED_ARG /* lock handle for
  44095. + * removal */ ,
  44096. + reiser4_dir_entry_desc * entry UNUSED_ARG /* parameters of
  44097. + * directory entry
  44098. + * being removed */ )
  44099. +{
  44100. + coord_t shadow;
  44101. + int result;
  44102. + int length;
  44103. + ON_DEBUG(char buf[DE_NAME_BUF_LEN]);
  44104. +
  44105. + assert("nikita-2870", strlen(name->name) == name->len);
  44106. + assert("nikita-2869",
  44107. + !strcmp(name->name, extract_name_cde(coord, buf)));
  44108. +
  44109. + length = sizeof(directory_entry_format) + sizeof(cde_unit_header);
  44110. + if (is_longname(name->name, name->len))
  44111. + length += name->len + 1;
  44112. +
  44113. + if (inode_get_bytes(dir) < length) {
  44114. + warning("nikita-2628", "Dir is broke: %llu: %llu",
  44115. + (unsigned long long)get_inode_oid(dir),
  44116. + inode_get_bytes(dir));
  44117. +
  44118. + return RETERR(-EIO);
  44119. + }
  44120. +
  44121. + /* cut_node() is supposed to take pointers to _different_
  44122. + coords, because it will modify them without respect to
  44123. + possible aliasing. To work around this, create temporary copy
  44124. + of @coord.
  44125. + */
  44126. + coord_dup(&shadow, coord);
  44127. + result =
  44128. + kill_node_content(coord, &shadow, NULL, NULL, NULL, NULL, NULL, 0);
  44129. + if (result == 0) {
  44130. + inode_sub_bytes(dir, length);
  44131. + }
  44132. + return result;
  44133. +}
  44134. +
  44135. +/* ->s.dir.max_name_len() method for this item plugin */
  44136. +int max_name_len_cde(const struct inode *dir /* directory */ )
  44137. +{
  44138. + return
  44139. + reiser4_tree_by_inode(dir)->nplug->max_item_size() -
  44140. + sizeof(directory_entry_format) - sizeof(cde_item_format) -
  44141. + sizeof(cde_unit_header) - 2;
  44142. +}
  44143. +
  44144. +/* Make Linus happy.
  44145. + Local variables:
  44146. + c-indentation-style: "K&R"
  44147. + mode-name: "LC"
  44148. + c-basic-offset: 8
  44149. + tab-width: 8
  44150. + fill-column: 120
  44151. + End:
  44152. +*/
  44153. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/item/cde.h linux-4.14.2/fs/reiser4/plugin/item/cde.h
  44154. --- linux-4.14.2.orig/fs/reiser4/plugin/item/cde.h 1970-01-01 01:00:00.000000000 +0100
  44155. +++ linux-4.14.2/fs/reiser4/plugin/item/cde.h 2017-11-26 22:13:09.000000000 +0100
  44156. @@ -0,0 +1,87 @@
  44157. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  44158. +
  44159. +/* Compound directory item. See cde.c for description. */
  44160. +
  44161. +#if !defined( __FS_REISER4_PLUGIN_COMPRESSED_DE_H__ )
  44162. +#define __FS_REISER4_PLUGIN_COMPRESSED_DE_H__
  44163. +
  44164. +#include "../../forward.h"
  44165. +#include "../../kassign.h"
  44166. +#include "../../dformat.h"
  44167. +
  44168. +#include <linux/fs.h> /* for struct inode */
  44169. +#include <linux/dcache.h> /* for struct dentry, etc */
  44170. +
  44171. +typedef struct cde_unit_header {
  44172. + de_id hash;
  44173. + d16 offset;
  44174. +} cde_unit_header;
  44175. +
  44176. +typedef struct cde_item_format {
  44177. + d16 num_of_entries;
  44178. + cde_unit_header entry[0];
  44179. +} cde_item_format;
  44180. +
  44181. +struct cde_entry {
  44182. + const struct inode *dir;
  44183. + const struct inode *obj;
  44184. + const struct qstr *name;
  44185. +};
  44186. +
  44187. +struct cde_entry_data {
  44188. + int num_of_entries;
  44189. + struct cde_entry *entry;
  44190. +};
  44191. +
  44192. +/* plugin->item.b.* */
  44193. +reiser4_key *max_key_inside_cde(const coord_t * coord, reiser4_key * result);
  44194. +int can_contain_key_cde(const coord_t * coord, const reiser4_key * key,
  44195. + const reiser4_item_data *);
  44196. +int mergeable_cde(const coord_t * p1, const coord_t * p2);
  44197. +pos_in_node_t nr_units_cde(const coord_t * coord);
  44198. +reiser4_key *unit_key_cde(const coord_t * coord, reiser4_key * key);
  44199. +int estimate_cde(const coord_t * coord, const reiser4_item_data * data);
  44200. +void print_cde(const char *prefix, coord_t * coord);
  44201. +int init_cde(coord_t * coord, coord_t * from, reiser4_item_data * data);
  44202. +lookup_result lookup_cde(const reiser4_key * key, lookup_bias bias,
  44203. + coord_t * coord);
  44204. +int paste_cde(coord_t * coord, reiser4_item_data * data,
  44205. + carry_plugin_info * info UNUSED_ARG);
  44206. +int can_shift_cde(unsigned free_space, coord_t * coord, znode * target,
  44207. + shift_direction pend, unsigned *size, unsigned want);
  44208. +void copy_units_cde(coord_t * target, coord_t * source, unsigned from,
  44209. + unsigned count, shift_direction where_is_free_space,
  44210. + unsigned free_space);
  44211. +int cut_units_cde(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
  44212. + struct carry_cut_data *, reiser4_key * smallest_removed,
  44213. + reiser4_key * new_first);
  44214. +int kill_units_cde(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
  44215. + struct carry_kill_data *, reiser4_key * smallest_removed,
  44216. + reiser4_key * new_first);
  44217. +void print_cde(const char *prefix, coord_t * coord);
  44218. +int reiser4_check_cde(const coord_t * coord, const char **error);
  44219. +
  44220. +/* plugin->u.item.s.dir.* */
  44221. +int extract_key_cde(const coord_t * coord, reiser4_key * key);
  44222. +int update_key_cde(const coord_t * coord, const reiser4_key * key,
  44223. + lock_handle * lh);
  44224. +char *extract_name_cde(const coord_t * coord, char *buf);
  44225. +int add_entry_cde(struct inode *dir, coord_t * coord,
  44226. + lock_handle * lh, const struct dentry *name,
  44227. + reiser4_dir_entry_desc * entry);
  44228. +int rem_entry_cde(struct inode *dir, const struct qstr *name, coord_t * coord,
  44229. + lock_handle * lh, reiser4_dir_entry_desc * entry);
  44230. +int max_name_len_cde(const struct inode *dir);
  44231. +
  44232. +/* __FS_REISER4_PLUGIN_COMPRESSED_DE_H__ */
  44233. +#endif
  44234. +
  44235. +/* Make Linus happy.
  44236. + Local variables:
  44237. + c-indentation-style: "K&R"
  44238. + mode-name: "LC"
  44239. + c-basic-offset: 8
  44240. + tab-width: 8
  44241. + fill-column: 120
  44242. + End:
  44243. +*/
  44244. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/item/ctail.c linux-4.14.2/fs/reiser4/plugin/item/ctail.c
  44245. --- linux-4.14.2.orig/fs/reiser4/plugin/item/ctail.c 1970-01-01 01:00:00.000000000 +0100
  44246. +++ linux-4.14.2/fs/reiser4/plugin/item/ctail.c 2017-11-26 22:13:09.000000000 +0100
  44247. @@ -0,0 +1,1769 @@
  44248. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  44249. +
  44250. +/* ctails (aka "clustered tails") are items for cryptcompress objects */
  44251. +
  44252. +/* DESCRIPTION:
  44253. +
  44254. +Each cryptcompress object is stored on disk as a set of clusters sliced
  44255. +into ctails.
  44256. +
  44257. +Internal on-disk structure:
  44258. +
  44259. + HEADER (1) Here stored disk cluster shift
  44260. + BODY
  44261. +*/
  44262. +
  44263. +#include "../../forward.h"
  44264. +#include "../../debug.h"
  44265. +#include "../../dformat.h"
  44266. +#include "../../kassign.h"
  44267. +#include "../../key.h"
  44268. +#include "../../coord.h"
  44269. +#include "item.h"
  44270. +#include "../node/node.h"
  44271. +#include "../plugin.h"
  44272. +#include "../object.h"
  44273. +#include "../../znode.h"
  44274. +#include "../../carry.h"
  44275. +#include "../../tree.h"
  44276. +#include "../../inode.h"
  44277. +#include "../../super.h"
  44278. +#include "../../context.h"
  44279. +#include "../../page_cache.h"
  44280. +#include "../cluster.h"
  44281. +#include "../../flush.h"
  44282. +#include "../../tree_walk.h"
  44283. +
  44284. +#include <linux/pagevec.h>
  44285. +#include <linux/swap.h>
  44286. +#include <linux/fs.h>
  44287. +
  44288. +/* return body of ctail item at @coord */
  44289. +static ctail_item_format *ctail_formatted_at(const coord_t * coord)
  44290. +{
  44291. + assert("edward-60", coord != NULL);
  44292. + return item_body_by_coord(coord);
  44293. +}
  44294. +
  44295. +static int cluster_shift_by_coord(const coord_t * coord)
  44296. +{
  44297. + return get_unaligned(&ctail_formatted_at(coord)->cluster_shift);
  44298. +}
  44299. +
  44300. +static inline void dclust_set_extension_shift(hint_t * hint)
  44301. +{
  44302. + assert("edward-1270",
  44303. + item_id_by_coord(&hint->ext_coord.coord) == CTAIL_ID);
  44304. + hint->ext_coord.extension.ctail.shift =
  44305. + cluster_shift_by_coord(&hint->ext_coord.coord);
  44306. +}
  44307. +
  44308. +static loff_t off_by_coord(const coord_t * coord)
  44309. +{
  44310. + reiser4_key key;
  44311. + return get_key_offset(item_key_by_coord(coord, &key));
  44312. +}
  44313. +
  44314. +int coord_is_unprepped_ctail(const coord_t * coord)
  44315. +{
  44316. + assert("edward-1233", coord != NULL);
  44317. + assert("edward-1234", item_id_by_coord(coord) == CTAIL_ID);
  44318. + assert("edward-1235",
  44319. + ergo((int)cluster_shift_by_coord(coord) == (int)UCTAIL_SHIFT,
  44320. + nr_units_ctail(coord) == (pos_in_node_t) UCTAIL_NR_UNITS));
  44321. +
  44322. + return (int)cluster_shift_by_coord(coord) == (int)UCTAIL_SHIFT;
  44323. +}
  44324. +
  44325. +static cloff_t clust_by_coord(const coord_t * coord, struct inode *inode)
  44326. +{
  44327. + int shift;
  44328. +
  44329. + if (inode != NULL) {
  44330. + shift = inode_cluster_shift(inode);
  44331. + assert("edward-1236",
  44332. + ergo(!coord_is_unprepped_ctail(coord),
  44333. + shift == cluster_shift_by_coord(coord)));
  44334. + } else {
  44335. + assert("edward-1237", !coord_is_unprepped_ctail(coord));
  44336. + shift = cluster_shift_by_coord(coord);
  44337. + }
  44338. + return off_by_coord(coord) >> shift;
  44339. +}
  44340. +
  44341. +static int disk_cluster_size(const coord_t * coord)
  44342. +{
  44343. + assert("edward-1156",
  44344. + item_plugin_by_coord(coord) == item_plugin_by_id(CTAIL_ID));
  44345. + /* calculation of disk cluster size
  44346. + is meaninless if ctail is unprepped */
  44347. + assert("edward-1238", !coord_is_unprepped_ctail(coord));
  44348. +
  44349. + return 1 << cluster_shift_by_coord(coord);
  44350. +}
  44351. +
  44352. +/* true if the key is of first disk cluster item */
  44353. +static int is_disk_cluster_key(const reiser4_key * key, const coord_t * coord)
  44354. +{
  44355. + assert("edward-1239", item_id_by_coord(coord) == CTAIL_ID);
  44356. +
  44357. + return coord_is_unprepped_ctail(coord) ||
  44358. + ((get_key_offset(key) &
  44359. + ((loff_t) disk_cluster_size(coord) - 1)) == 0);
  44360. +}
  44361. +
  44362. +static char *first_unit(coord_t * coord)
  44363. +{
  44364. + /* FIXME: warning: pointer of type `void *' used in arithmetic */
  44365. + return (char *)item_body_by_coord(coord) + sizeof(ctail_item_format);
  44366. +}
  44367. +
  44368. +/* plugin->u.item.b.max_key_inside :
  44369. + tail_max_key_inside */
  44370. +
  44371. +/* plugin->u.item.b.can_contain_key */
  44372. +int can_contain_key_ctail(const coord_t * coord, const reiser4_key * key,
  44373. + const reiser4_item_data * data)
  44374. +{
  44375. + reiser4_key item_key;
  44376. +
  44377. + if (item_plugin_by_coord(coord) != data->iplug)
  44378. + return 0;
  44379. +
  44380. + item_key_by_coord(coord, &item_key);
  44381. + if (get_key_locality(key) != get_key_locality(&item_key) ||
  44382. + get_key_objectid(key) != get_key_objectid(&item_key))
  44383. + return 0;
  44384. + if (get_key_offset(&item_key) + nr_units_ctail(coord) !=
  44385. + get_key_offset(key))
  44386. + return 0;
  44387. + if (is_disk_cluster_key(key, coord))
  44388. + /*
  44389. + * can not merge at the beginning
  44390. + * of a logical cluster in a file
  44391. + */
  44392. + return 0;
  44393. + return 1;
  44394. +}
  44395. +
  44396. +/* plugin->u.item.b.mergeable */
  44397. +int mergeable_ctail(const coord_t * p1, const coord_t * p2)
  44398. +{
  44399. + reiser4_key key1, key2;
  44400. +
  44401. + assert("edward-62", item_id_by_coord(p1) == CTAIL_ID);
  44402. + assert("edward-61", plugin_of_group(item_plugin_by_coord(p1),
  44403. + UNIX_FILE_METADATA_ITEM_TYPE));
  44404. +
  44405. + if (item_id_by_coord(p2) != CTAIL_ID) {
  44406. + /* second item is of another type */
  44407. + return 0;
  44408. + }
  44409. + item_key_by_coord(p1, &key1);
  44410. + item_key_by_coord(p2, &key2);
  44411. + if (get_key_locality(&key1) != get_key_locality(&key2) ||
  44412. + get_key_objectid(&key1) != get_key_objectid(&key2) ||
  44413. + get_key_type(&key1) != get_key_type(&key2)) {
  44414. + /* items of different objects */
  44415. + return 0;
  44416. + }
  44417. + if (get_key_offset(&key1) + nr_units_ctail(p1) != get_key_offset(&key2))
  44418. + /* not adjacent items */
  44419. + return 0;
  44420. + if (is_disk_cluster_key(&key2, p2))
  44421. + /*
  44422. + * can not merge at the beginning
  44423. + * of a logical cluster in a file
  44424. + */
  44425. + return 0;
  44426. + return 1;
  44427. +}
  44428. +
  44429. +/* plugin->u.item.b.nr_units */
  44430. +pos_in_node_t nr_units_ctail(const coord_t * coord)
  44431. +{
  44432. + return (item_length_by_coord(coord) -
  44433. + sizeof(ctail_formatted_at(coord)->cluster_shift));
  44434. +}
  44435. +
  44436. +/* plugin->u.item.b.estimate:
  44437. + estimate how much space is needed to insert/paste @data->length bytes
  44438. + into ctail at @coord */
  44439. +int estimate_ctail(const coord_t * coord /* coord of item */ ,
  44440. + const reiser4_item_data *
  44441. + data /* parameters for new item */ )
  44442. +{
  44443. + if (coord == NULL)
  44444. + /* insert */
  44445. + return (sizeof(ctail_item_format) + data->length);
  44446. + else
  44447. + /* paste */
  44448. + return data->length;
  44449. +}
  44450. +
  44451. +/* ->init() method for this item plugin. */
  44452. +int init_ctail(coord_t * to /* coord of item */ ,
  44453. + coord_t * from /* old_item */ ,
  44454. + reiser4_item_data * data /* structure used for insertion */ )
  44455. +{
  44456. + int cluster_shift; /* cpu value to convert */
  44457. +
  44458. + if (data) {
  44459. + assert("edward-463", data->length > sizeof(ctail_item_format));
  44460. + cluster_shift = *((int *)(data->arg));
  44461. + data->length -= sizeof(ctail_item_format);
  44462. + } else {
  44463. + assert("edward-464", from != NULL);
  44464. + assert("edward-855", ctail_ok(from));
  44465. + cluster_shift = (int)(cluster_shift_by_coord(from));
  44466. + }
  44467. + put_unaligned((d8)cluster_shift, &ctail_formatted_at(to)->cluster_shift);
  44468. + assert("edward-856", ctail_ok(to));
  44469. + return 0;
  44470. +}
  44471. +
  44472. +/* plugin->u.item.b.lookup:
  44473. + NULL: We are looking for item keys only */
  44474. +
  44475. +#if REISER4_DEBUG
  44476. +int ctail_ok(const coord_t * coord)
  44477. +{
  44478. + return coord_is_unprepped_ctail(coord) ||
  44479. + cluster_shift_ok(cluster_shift_by_coord(coord));
  44480. +}
  44481. +
  44482. +/* plugin->u.item.b.check */
  44483. +int check_ctail(const coord_t * coord, const char **error)
  44484. +{
  44485. + if (!ctail_ok(coord)) {
  44486. + if (error)
  44487. + *error = "bad cluster shift in ctail";
  44488. + return 1;
  44489. + }
  44490. + return 0;
  44491. +}
  44492. +#endif
  44493. +
  44494. +/* plugin->u.item.b.paste */
  44495. +int
  44496. +paste_ctail(coord_t * coord, reiser4_item_data * data,
  44497. + carry_plugin_info * info UNUSED_ARG)
  44498. +{
  44499. + unsigned old_nr_units;
  44500. +
  44501. + assert("edward-268", data->data != NULL);
  44502. + /* copy only from kernel space */
  44503. + assert("edward-66", data->user == 0);
  44504. +
  44505. + old_nr_units =
  44506. + item_length_by_coord(coord) - sizeof(ctail_item_format) -
  44507. + data->length;
  44508. +
  44509. + /* ctail items never get pasted in the middle */
  44510. +
  44511. + if (coord->unit_pos == 0 && coord->between == AT_UNIT) {
  44512. +
  44513. + /* paste at the beginning when create new item */
  44514. + assert("edward-450",
  44515. + item_length_by_coord(coord) ==
  44516. + data->length + sizeof(ctail_item_format));
  44517. + assert("edward-451", old_nr_units == 0);
  44518. + } else if (coord->unit_pos == old_nr_units - 1
  44519. + && coord->between == AFTER_UNIT) {
  44520. +
  44521. + /* paste at the end */
  44522. + coord->unit_pos++;
  44523. + } else
  44524. + impossible("edward-453", "bad paste position");
  44525. +
  44526. + memcpy(first_unit(coord) + coord->unit_pos, data->data, data->length);
  44527. +
  44528. + assert("edward-857", ctail_ok(coord));
  44529. +
  44530. + return 0;
  44531. +}
  44532. +
  44533. +/* plugin->u.item.b.fast_paste */
  44534. +
  44535. +/*
  44536. + * plugin->u.item.b.can_shift
  44537. + *
  44538. + * Return number of units that can be shifted;
  44539. + * Store space (in bytes) occupied by those units in @size.
  44540. + */
  44541. +int can_shift_ctail(unsigned free_space, coord_t *source,
  44542. + znode * target, shift_direction direction UNUSED_ARG,
  44543. + unsigned *size, unsigned want)
  44544. +{
  44545. + /* make sure that that we do not want to shift more than we have */
  44546. + assert("edward-68", want > 0 && want <= nr_units_ctail(source));
  44547. +
  44548. + *size = min(want, free_space);
  44549. +
  44550. + if (!target) {
  44551. + /*
  44552. + * new item will be created
  44553. + */
  44554. + if (*size <= sizeof(ctail_item_format)) {
  44555. + /*
  44556. + * can not shift only ctail header
  44557. + */
  44558. + *size = 0;
  44559. + return 0;
  44560. + }
  44561. + return *size - sizeof(ctail_item_format);
  44562. + }
  44563. + else
  44564. + /*
  44565. + * shifting to the mergeable item
  44566. + */
  44567. + return *size;
  44568. +}
  44569. +
  44570. +/*
  44571. + * plugin->u.item.b.copy_units
  44572. + * cooperates with ->can_shift()
  44573. + */
  44574. +void copy_units_ctail(coord_t * target, coord_t * source,
  44575. + unsigned from, unsigned count /* units */ ,
  44576. + shift_direction where_is_free_space,
  44577. + unsigned free_space /* bytes */ )
  44578. +{
  44579. + /* make sure that item @target is expanded already */
  44580. + assert("edward-69", (unsigned)item_length_by_coord(target) >= count);
  44581. + assert("edward-70", free_space == count || free_space == count + 1);
  44582. +
  44583. + assert("edward-858", ctail_ok(source));
  44584. +
  44585. + if (where_is_free_space == SHIFT_LEFT) {
  44586. + /*
  44587. + * append item @target with @count first bytes
  44588. + * of @source: this restriction came from ordinary tails
  44589. + */
  44590. + assert("edward-71", from == 0);
  44591. + assert("edward-860", ctail_ok(target));
  44592. +
  44593. + memcpy(first_unit(target) + nr_units_ctail(target) - count,
  44594. + first_unit(source), count);
  44595. + } else {
  44596. + /*
  44597. + * target item is moved to right already
  44598. + */
  44599. + reiser4_key key;
  44600. +
  44601. + assert("edward-72", nr_units_ctail(source) == from + count);
  44602. +
  44603. + if (free_space == count) {
  44604. + init_ctail(target, source, NULL);
  44605. + } else {
  44606. + /*
  44607. + * shifting to a mergeable item
  44608. + */
  44609. + assert("edward-862", ctail_ok(target));
  44610. + }
  44611. + memcpy(first_unit(target), first_unit(source) + from, count);
  44612. +
  44613. + assert("edward-863", ctail_ok(target));
  44614. + /*
  44615. + * new units are inserted before first unit
  44616. + * in an item, therefore, we have to update
  44617. + * item key
  44618. + */
  44619. + item_key_by_coord(source, &key);
  44620. + set_key_offset(&key, get_key_offset(&key) + from);
  44621. +
  44622. + node_plugin_by_node(target->node)->update_item_key(target,
  44623. + &key,
  44624. + NULL /*info */);
  44625. + }
  44626. +}
  44627. +
  44628. +/* plugin->u.item.b.create_hook */
  44629. +int create_hook_ctail(const coord_t * coord, void *arg)
  44630. +{
  44631. + assert("edward-864", znode_is_loaded(coord->node));
  44632. +
  44633. + znode_set_convertible(coord->node);
  44634. + return 0;
  44635. +}
  44636. +
  44637. +/* plugin->u.item.b.kill_hook */
  44638. +int kill_hook_ctail(const coord_t * coord, pos_in_node_t from,
  44639. + pos_in_node_t count, carry_kill_data * kdata)
  44640. +{
  44641. + struct inode *inode;
  44642. +
  44643. + assert("edward-1157", item_id_by_coord(coord) == CTAIL_ID);
  44644. + assert("edward-291", znode_is_write_locked(coord->node));
  44645. +
  44646. + inode = kdata->inode;
  44647. + if (inode) {
  44648. + reiser4_key key;
  44649. + struct cryptcompress_info * info;
  44650. + cloff_t index;
  44651. +
  44652. + item_key_by_coord(coord, &key);
  44653. + info = cryptcompress_inode_data(inode);
  44654. + index = off_to_clust(get_key_offset(&key), inode);
  44655. +
  44656. + if (from == 0) {
  44657. + info->trunc_index = index;
  44658. + if (is_disk_cluster_key(&key, coord)) {
  44659. + /*
  44660. + * first item of disk cluster is to be killed
  44661. + */
  44662. + truncate_complete_page_cluster(
  44663. + inode, index, kdata->params.truncate);
  44664. + inode_sub_bytes(inode,
  44665. + inode_cluster_size(inode));
  44666. + }
  44667. + }
  44668. + }
  44669. + return 0;
  44670. +}
  44671. +
  44672. +/* for shift_hook_ctail(),
  44673. + return true if the first disk cluster item has dirty child
  44674. +*/
  44675. +static int ctail_convertible(const coord_t * coord)
  44676. +{
  44677. + int result;
  44678. + reiser4_key key;
  44679. + jnode *child = NULL;
  44680. +
  44681. + assert("edward-477", coord != NULL);
  44682. + assert("edward-478", item_id_by_coord(coord) == CTAIL_ID);
  44683. +
  44684. + if (coord_is_unprepped_ctail(coord))
  44685. + /* unprepped ctail should be converted */
  44686. + return 1;
  44687. +
  44688. + item_key_by_coord(coord, &key);
  44689. + child = jlookup(current_tree,
  44690. + get_key_objectid(&key),
  44691. + off_to_pg(off_by_coord(coord)));
  44692. + if (!child)
  44693. + return 0;
  44694. + result = JF_ISSET(child, JNODE_DIRTY);
  44695. + jput(child);
  44696. + return result;
  44697. +}
  44698. +
  44699. +/* FIXME-EDWARD */
  44700. +/* plugin->u.item.b.shift_hook */
  44701. +int shift_hook_ctail(const coord_t * item /* coord of item */ ,
  44702. + unsigned from UNUSED_ARG /* start unit */ ,
  44703. + unsigned count UNUSED_ARG /* stop unit */ ,
  44704. + znode * old_node /* old parent */ )
  44705. +{
  44706. + assert("edward-479", item != NULL);
  44707. + assert("edward-480", item->node != old_node);
  44708. +
  44709. + if (!znode_convertible(old_node) || znode_convertible(item->node))
  44710. + return 0;
  44711. + if (ctail_convertible(item))
  44712. + znode_set_convertible(item->node);
  44713. + return 0;
  44714. +}
  44715. +
  44716. +static int
  44717. +cut_or_kill_ctail_units(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
  44718. + int cut, void *p, reiser4_key * smallest_removed,
  44719. + reiser4_key * new_first)
  44720. +{
  44721. + pos_in_node_t count; /* number of units to cut */
  44722. + char *item;
  44723. +
  44724. + count = to - from + 1;
  44725. + item = item_body_by_coord(coord);
  44726. +
  44727. + assert("edward-74", ergo(from != 0, to == coord_last_unit_pos(coord)));
  44728. +
  44729. + if (smallest_removed) {
  44730. + /* store smallest key removed */
  44731. + item_key_by_coord(coord, smallest_removed);
  44732. + set_key_offset(smallest_removed,
  44733. + get_key_offset(smallest_removed) + from);
  44734. + }
  44735. +
  44736. + if (new_first) {
  44737. + assert("vs-1531", from == 0);
  44738. +
  44739. + item_key_by_coord(coord, new_first);
  44740. + set_key_offset(new_first,
  44741. + get_key_offset(new_first) + from + count);
  44742. + }
  44743. +
  44744. + if (!cut)
  44745. + kill_hook_ctail(coord, from, 0, (struct carry_kill_data *)p);
  44746. +
  44747. + if (from == 0) {
  44748. + if (count != nr_units_ctail(coord)) {
  44749. + /* part of item is removed, so move free space at the beginning
  44750. + of the item and update item key */
  44751. + reiser4_key key;
  44752. + memcpy(item + to + 1, item, sizeof(ctail_item_format));
  44753. + item_key_by_coord(coord, &key);
  44754. + set_key_offset(&key, get_key_offset(&key) + count);
  44755. + node_plugin_by_node(coord->node)->update_item_key(coord,
  44756. + &key,
  44757. + NULL);
  44758. + } else {
  44759. + /* cut_units should not be called to cut evrything */
  44760. + assert("vs-1532", ergo(cut, 0));
  44761. + /* whole item is cut, so more then amount of space occupied
  44762. + by units got freed */
  44763. + count += sizeof(ctail_item_format);
  44764. + }
  44765. + }
  44766. + return count;
  44767. +}
  44768. +
  44769. +/* plugin->u.item.b.cut_units */
  44770. +int
  44771. +cut_units_ctail(coord_t * item, pos_in_node_t from, pos_in_node_t to,
  44772. + carry_cut_data * cdata, reiser4_key * smallest_removed,
  44773. + reiser4_key * new_first)
  44774. +{
  44775. + return cut_or_kill_ctail_units(item, from, to, 1, NULL,
  44776. + smallest_removed, new_first);
  44777. +}
  44778. +
  44779. +/* plugin->u.item.b.kill_units */
  44780. +int
  44781. +kill_units_ctail(coord_t * item, pos_in_node_t from, pos_in_node_t to,
  44782. + struct carry_kill_data *kdata, reiser4_key * smallest_removed,
  44783. + reiser4_key * new_first)
  44784. +{
  44785. + return cut_or_kill_ctail_units(item, from, to, 0, kdata,
  44786. + smallest_removed, new_first);
  44787. +}
  44788. +
  44789. +/* plugin->u.item.s.file.read */
  44790. +int read_ctail(struct file *file UNUSED_ARG, flow_t * f, hint_t * hint)
  44791. +{
  44792. + uf_coord_t *uf_coord;
  44793. + coord_t *coord;
  44794. +
  44795. + uf_coord = &hint->ext_coord;
  44796. + coord = &uf_coord->coord;
  44797. + assert("edward-127", f->user == 0);
  44798. + assert("edward-129", coord && coord->node);
  44799. + assert("edward-130", coord_is_existing_unit(coord));
  44800. + assert("edward-132", znode_is_loaded(coord->node));
  44801. +
  44802. + /* start read only from the beginning of ctail */
  44803. + assert("edward-133", coord->unit_pos == 0);
  44804. + /* read only whole ctails */
  44805. + assert("edward-135", nr_units_ctail(coord) <= f->length);
  44806. +
  44807. + assert("edward-136", reiser4_schedulable());
  44808. + assert("edward-886", ctail_ok(coord));
  44809. +
  44810. + if (f->data)
  44811. + memcpy(f->data, (char *)first_unit(coord),
  44812. + (size_t) nr_units_ctail(coord));
  44813. +
  44814. + dclust_set_extension_shift(hint);
  44815. + mark_page_accessed(znode_page(coord->node));
  44816. + move_flow_forward(f, nr_units_ctail(coord));
  44817. +
  44818. + return 0;
  44819. +}
  44820. +
  44821. +/**
  44822. + * Prepare transform stream with plain text for page
  44823. + * @page taking into account synchronization issues.
  44824. + */
  44825. +static int ctail_read_disk_cluster(struct cluster_handle * clust,
  44826. + struct inode * inode, struct page * page,
  44827. + znode_lock_mode mode)
  44828. +{
  44829. + int result;
  44830. +
  44831. + assert("edward-1450", mode == ZNODE_READ_LOCK || ZNODE_WRITE_LOCK);
  44832. + assert("edward-671", clust->hint != NULL);
  44833. + assert("edward-140", clust->dstat == INVAL_DISK_CLUSTER);
  44834. + assert("edward-672", cryptcompress_inode_ok(inode));
  44835. + assert("edward-1527", PageLocked(page));
  44836. +
  44837. + unlock_page(page);
  44838. +
  44839. + /* set input stream */
  44840. + result = grab_tfm_stream(inode, &clust->tc, INPUT_STREAM);
  44841. + if (result) {
  44842. + lock_page(page);
  44843. + return result;
  44844. + }
  44845. + result = find_disk_cluster(clust, inode, 1 /* read items */, mode);
  44846. + lock_page(page);
  44847. + if (result)
  44848. + return result;
  44849. + /*
  44850. + * at this point we have locked position in the tree
  44851. + */
  44852. + assert("edward-1528", znode_is_any_locked(clust->hint->lh.node));
  44853. +
  44854. + if (page->mapping != inode->i_mapping) {
  44855. + /* page was truncated */
  44856. + reiser4_unset_hint(clust->hint);
  44857. + reset_cluster_params(clust);
  44858. + return AOP_TRUNCATED_PAGE;
  44859. + }
  44860. + if (PageUptodate(page)) {
  44861. + /* disk cluster can be obsolete, don't use it! */
  44862. + reiser4_unset_hint(clust->hint);
  44863. + reset_cluster_params(clust);
  44864. + return 0;
  44865. + }
  44866. + if (clust->dstat == FAKE_DISK_CLUSTER ||
  44867. + clust->dstat == UNPR_DISK_CLUSTER ||
  44868. + clust->dstat == TRNC_DISK_CLUSTER) {
  44869. + /*
  44870. + * this information about disk cluster will be valid
  44871. + * as long as we keep the position in the tree locked
  44872. + */
  44873. + tfm_cluster_set_uptodate(&clust->tc);
  44874. + return 0;
  44875. + }
  44876. + /* now prepare output stream.. */
  44877. + result = grab_coa(&clust->tc, inode_compression_plugin(inode));
  44878. + if (result)
  44879. + return result;
  44880. + /* ..and fill this with plain text */
  44881. + result = reiser4_inflate_cluster(clust, inode);
  44882. + if (result)
  44883. + return result;
  44884. + /*
  44885. + * The stream is ready! It won't be obsolete as
  44886. + * long as we keep last disk cluster item locked.
  44887. + */
  44888. + tfm_cluster_set_uptodate(&clust->tc);
  44889. + return 0;
  44890. +}
  44891. +
  44892. +/*
  44893. + * fill one page with plain text.
  44894. + */
  44895. +int do_readpage_ctail(struct inode * inode, struct cluster_handle * clust,
  44896. + struct page *page, znode_lock_mode mode)
  44897. +{
  44898. + int ret;
  44899. + unsigned cloff;
  44900. + char *data;
  44901. + size_t to_page;
  44902. + struct tfm_cluster * tc = &clust->tc;
  44903. +
  44904. + assert("edward-212", PageLocked(page));
  44905. +
  44906. + if (unlikely(page->mapping != inode->i_mapping))
  44907. + return AOP_TRUNCATED_PAGE;
  44908. + if (PageUptodate(page))
  44909. + goto exit;
  44910. + to_page = pbytes(page_index(page), inode);
  44911. + if (to_page == 0) {
  44912. + zero_user(page, 0, PAGE_SIZE);
  44913. + SetPageUptodate(page);
  44914. + goto exit;
  44915. + }
  44916. + if (!tfm_cluster_is_uptodate(&clust->tc)) {
  44917. + clust->index = pg_to_clust(page->index, inode);
  44918. +
  44919. + /* this will unlock/lock the page */
  44920. + ret = ctail_read_disk_cluster(clust, inode, page, mode);
  44921. +
  44922. + assert("edward-212", PageLocked(page));
  44923. + if (ret)
  44924. + return ret;
  44925. +
  44926. + /* refresh bytes */
  44927. + to_page = pbytes(page_index(page), inode);
  44928. + if (to_page == 0) {
  44929. + zero_user(page, 0, PAGE_SIZE);
  44930. + SetPageUptodate(page);
  44931. + goto exit;
  44932. + }
  44933. + }
  44934. + if (PageUptodate(page))
  44935. + /* somebody else fill it already */
  44936. + goto exit;
  44937. +
  44938. + assert("edward-119", tfm_cluster_is_uptodate(tc));
  44939. + assert("edward-1529", znode_is_any_locked(clust->hint->lh.node));
  44940. +
  44941. + switch (clust->dstat) {
  44942. + case UNPR_DISK_CLUSTER:
  44943. + /*
  44944. + * Page is not uptodate and item cluster is unprepped:
  44945. + * this must not ever happen.
  44946. + */
  44947. + warning("edward-1632",
  44948. + "Bad item cluster %lu (Inode %llu). Fsck?",
  44949. + clust->index,
  44950. + (unsigned long long)get_inode_oid(inode));
  44951. + return RETERR(-EIO);
  44952. + case TRNC_DISK_CLUSTER:
  44953. + /*
  44954. + * Race with truncate!
  44955. + * We resolve it in favour of the last one (the only way,
  44956. + * as in this case plain text is unrecoverable)
  44957. + */
  44958. + case FAKE_DISK_CLUSTER:
  44959. + /* fill the page by zeroes */
  44960. + zero_user(page, 0, PAGE_SIZE);
  44961. + SetPageUptodate(page);
  44962. + break;
  44963. + case PREP_DISK_CLUSTER:
  44964. + /* fill page by transformed stream with plain text */
  44965. + assert("edward-1058", !PageUptodate(page));
  44966. + assert("edward-120", tc->len <= inode_cluster_size(inode));
  44967. +
  44968. + /* page index in this logical cluster */
  44969. + cloff = pg_to_off_to_cloff(page->index, inode);
  44970. +
  44971. + data = kmap(page);
  44972. + memcpy(data, tfm_stream_data(tc, OUTPUT_STREAM) + cloff, to_page);
  44973. + memset(data + to_page, 0, (size_t) PAGE_SIZE - to_page);
  44974. + flush_dcache_page(page);
  44975. + kunmap(page);
  44976. + SetPageUptodate(page);
  44977. + break;
  44978. + default:
  44979. + impossible("edward-1169", "bad disk cluster state");
  44980. + }
  44981. + exit:
  44982. + return 0;
  44983. +}
  44984. +
  44985. +/* plugin->u.item.s.file.readpage */
  44986. +int readpage_ctail(void *vp, struct page *page)
  44987. +{
  44988. + int result;
  44989. + hint_t * hint;
  44990. + struct cluster_handle * clust = vp;
  44991. +
  44992. + assert("edward-114", clust != NULL);
  44993. + assert("edward-115", PageLocked(page));
  44994. + assert("edward-116", !PageUptodate(page));
  44995. + assert("edward-118", page->mapping && page->mapping->host);
  44996. + assert("edward-867", !tfm_cluster_is_uptodate(&clust->tc));
  44997. +
  44998. + hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
  44999. + if (hint == NULL) {
  45000. + unlock_page(page);
  45001. + return RETERR(-ENOMEM);
  45002. + }
  45003. + clust->hint = hint;
  45004. + result = load_file_hint(clust->file, hint);
  45005. + if (result) {
  45006. + kfree(hint);
  45007. + unlock_page(page);
  45008. + return result;
  45009. + }
  45010. + assert("vs-25", hint->ext_coord.lh == &hint->lh);
  45011. +
  45012. + result = do_readpage_ctail(page->mapping->host, clust, page,
  45013. + ZNODE_READ_LOCK);
  45014. + assert("edward-213", PageLocked(page));
  45015. + assert("edward-1163", ergo(!result, PageUptodate(page)));
  45016. +
  45017. + unlock_page(page);
  45018. + done_lh(&hint->lh);
  45019. + hint->ext_coord.valid = 0;
  45020. + save_file_hint(clust->file, hint);
  45021. + kfree(hint);
  45022. + tfm_cluster_clr_uptodate(&clust->tc);
  45023. +
  45024. + return result;
  45025. +}
  45026. +
  45027. +/* Helper function for ->readpages() */
  45028. +static int ctail_read_page_cluster(struct cluster_handle * clust,
  45029. + struct inode *inode)
  45030. +{
  45031. + int i;
  45032. + int result;
  45033. + assert("edward-779", clust != NULL);
  45034. + assert("edward-1059", clust->win == NULL);
  45035. + assert("edward-780", inode != NULL);
  45036. +
  45037. + result = prepare_page_cluster(inode, clust, READ_OP);
  45038. + if (result)
  45039. + return result;
  45040. +
  45041. + assert("edward-781", !tfm_cluster_is_uptodate(&clust->tc));
  45042. +
  45043. + for (i = 0; i < clust->nr_pages; i++) {
  45044. + struct page *page = clust->pages[i];
  45045. + lock_page(page);
  45046. + result = do_readpage_ctail(inode, clust, page, ZNODE_READ_LOCK);
  45047. + unlock_page(page);
  45048. + if (result)
  45049. + break;
  45050. + }
  45051. + tfm_cluster_clr_uptodate(&clust->tc);
  45052. + put_page_cluster(clust, inode, READ_OP);
  45053. + return result;
  45054. +}
  45055. +
  45056. +/* filler for read_cache_pages() */
  45057. +static int ctail_readpages_filler(void * data, struct page * page)
  45058. +{
  45059. + int ret = 0;
  45060. + struct cluster_handle * clust = data;
  45061. + struct inode * inode = file_inode(clust->file);
  45062. +
  45063. + assert("edward-1525", page->mapping == inode->i_mapping);
  45064. +
  45065. + if (PageUptodate(page)) {
  45066. + unlock_page(page);
  45067. + return 0;
  45068. + }
  45069. + if (pbytes(page_index(page), inode) == 0) {
  45070. + zero_user(page, 0, PAGE_SIZE);
  45071. + SetPageUptodate(page);
  45072. + unlock_page(page);
  45073. + return 0;
  45074. + }
  45075. + move_cluster_forward(clust, inode, page->index);
  45076. + unlock_page(page);
  45077. + /*
  45078. + * read the whole page cluster
  45079. + */
  45080. + ret = ctail_read_page_cluster(clust, inode);
  45081. +
  45082. + assert("edward-869", !tfm_cluster_is_uptodate(&clust->tc));
  45083. + return ret;
  45084. +}
  45085. +
  45086. +/*
  45087. + * We populate a bit more then upper readahead suggests:
  45088. + * with each nominated page we read the whole page cluster
  45089. + * this page belongs to.
  45090. + */
  45091. +int readpages_ctail(struct file *file, struct address_space *mapping,
  45092. + struct list_head *pages)
  45093. +{
  45094. + int ret = 0;
  45095. + hint_t *hint;
  45096. + struct cluster_handle clust;
  45097. + struct inode *inode = mapping->host;
  45098. +
  45099. + assert("edward-1521", inode == file_inode(file));
  45100. +
  45101. + cluster_init_read(&clust, NULL);
  45102. + clust.file = file;
  45103. + hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
  45104. + if (hint == NULL) {
  45105. + warning("vs-28", "failed to allocate hint");
  45106. + ret = RETERR(-ENOMEM);
  45107. + goto exit1;
  45108. + }
  45109. + clust.hint = hint;
  45110. + ret = load_file_hint(clust.file, hint);
  45111. + if (ret) {
  45112. + warning("edward-1522", "failed to load hint");
  45113. + goto exit2;
  45114. + }
  45115. + assert("vs-26", hint->ext_coord.lh == &hint->lh);
  45116. + ret = alloc_cluster_pgset(&clust, cluster_nrpages(inode));
  45117. + if (ret) {
  45118. + warning("edward-1523", "failed to alloc pgset");
  45119. + goto exit3;
  45120. + }
  45121. + ret = read_cache_pages(mapping, pages, ctail_readpages_filler, &clust);
  45122. +
  45123. + assert("edward-870", !tfm_cluster_is_uptodate(&clust.tc));
  45124. + exit3:
  45125. + done_lh(&hint->lh);
  45126. + save_file_hint(file, hint);
  45127. + hint->ext_coord.valid = 0;
  45128. + exit2:
  45129. + kfree(hint);
  45130. + exit1:
  45131. + put_cluster_handle(&clust);
  45132. + return ret;
  45133. +}
  45134. +
  45135. +/*
  45136. + plugin->u.item.s.file.append_key
  45137. + key of the first item of the next disk cluster
  45138. +*/
  45139. +reiser4_key *append_key_ctail(const coord_t * coord, reiser4_key * key)
  45140. +{
  45141. + assert("edward-1241", item_id_by_coord(coord) == CTAIL_ID);
  45142. + assert("edward-1242", cluster_shift_ok(cluster_shift_by_coord(coord)));
  45143. +
  45144. + item_key_by_coord(coord, key);
  45145. + set_key_offset(key, ((__u64) (clust_by_coord(coord, NULL)) + 1)
  45146. + << cluster_shift_by_coord(coord));
  45147. + return key;
  45148. +}
  45149. +
  45150. +static int insert_unprepped_ctail(struct cluster_handle * clust,
  45151. + struct inode *inode)
  45152. +{
  45153. + int result;
  45154. + char buf[UCTAIL_NR_UNITS];
  45155. + reiser4_item_data data;
  45156. + reiser4_key key;
  45157. + int shift = (int)UCTAIL_SHIFT;
  45158. +
  45159. + memset(buf, 0, (size_t) UCTAIL_NR_UNITS);
  45160. + result = key_by_inode_cryptcompress(inode,
  45161. + clust_to_off(clust->index, inode),
  45162. + &key);
  45163. + if (result)
  45164. + return result;
  45165. + data.user = 0;
  45166. + data.iplug = item_plugin_by_id(CTAIL_ID);
  45167. + data.arg = &shift;
  45168. + data.length = sizeof(ctail_item_format) + (size_t) UCTAIL_NR_UNITS;
  45169. + data.data = buf;
  45170. +
  45171. + result = insert_by_coord(&clust->hint->ext_coord.coord,
  45172. + &data, &key, clust->hint->ext_coord.lh, 0);
  45173. + return result;
  45174. +}
  45175. +
  45176. +static int
  45177. +insert_cryptcompress_flow(coord_t * coord, lock_handle * lh, flow_t * f,
  45178. + int cluster_shift)
  45179. +{
  45180. + int result;
  45181. + carry_pool *pool;
  45182. + carry_level *lowest_level;
  45183. + reiser4_item_data *data;
  45184. + carry_op *op;
  45185. +
  45186. + pool =
  45187. + init_carry_pool(sizeof(*pool) + 3 * sizeof(*lowest_level) +
  45188. + sizeof(*data));
  45189. + if (IS_ERR(pool))
  45190. + return PTR_ERR(pool);
  45191. + lowest_level = (carry_level *) (pool + 1);
  45192. + init_carry_level(lowest_level, pool);
  45193. + data = (reiser4_item_data *) (lowest_level + 3);
  45194. +
  45195. + assert("edward-466", coord->between == AFTER_ITEM
  45196. + || coord->between == AFTER_UNIT || coord->between == BEFORE_ITEM
  45197. + || coord->between == EMPTY_NODE
  45198. + || coord->between == BEFORE_UNIT);
  45199. +
  45200. + if (coord->between == AFTER_UNIT) {
  45201. + coord->unit_pos = 0;
  45202. + coord->between = AFTER_ITEM;
  45203. + }
  45204. + op = reiser4_post_carry(lowest_level, COP_INSERT_FLOW, coord->node,
  45205. + 0 /* operate directly on coord -> node */);
  45206. + if (IS_ERR(op) || (op == NULL)) {
  45207. + done_carry_pool(pool);
  45208. + return RETERR(op ? PTR_ERR(op) : -EIO);
  45209. + }
  45210. + data->user = 0;
  45211. + data->iplug = item_plugin_by_id(CTAIL_ID);
  45212. + data->arg = &cluster_shift;
  45213. +
  45214. + data->length = 0;
  45215. + data->data = NULL;
  45216. +
  45217. + op->u.insert_flow.flags =
  45218. + COPI_SWEEP |
  45219. + COPI_DONT_SHIFT_LEFT |
  45220. + COPI_DONT_SHIFT_RIGHT;
  45221. + op->u.insert_flow.insert_point = coord;
  45222. + op->u.insert_flow.flow = f;
  45223. + op->u.insert_flow.data = data;
  45224. + op->u.insert_flow.new_nodes = 0;
  45225. +
  45226. + lowest_level->track_type = CARRY_TRACK_CHANGE;
  45227. + lowest_level->tracked = lh;
  45228. +
  45229. + result = reiser4_carry(lowest_level, NULL);
  45230. + done_carry_pool(pool);
  45231. +
  45232. + return result;
  45233. +}
  45234. +
  45235. +/* Implementation of CRC_APPEND_ITEM mode of ctail conversion */
  45236. +static int insert_cryptcompress_flow_in_place(coord_t * coord,
  45237. + lock_handle * lh, flow_t * f,
  45238. + int cluster_shift)
  45239. +{
  45240. + int ret;
  45241. + coord_t pos;
  45242. + lock_handle lock;
  45243. +
  45244. + assert("edward-484",
  45245. + coord->between == AT_UNIT || coord->between == AFTER_ITEM);
  45246. + assert("edward-485", item_id_by_coord(coord) == CTAIL_ID);
  45247. +
  45248. + coord_dup(&pos, coord);
  45249. + pos.unit_pos = 0;
  45250. + pos.between = AFTER_ITEM;
  45251. +
  45252. + init_lh(&lock);
  45253. + copy_lh(&lock, lh);
  45254. +
  45255. + ret = insert_cryptcompress_flow(&pos, &lock, f, cluster_shift);
  45256. + done_lh(&lock);
  45257. + assert("edward-1347", znode_is_write_locked(lh->node));
  45258. + assert("edward-1228", !ret);
  45259. + return ret;
  45260. +}
  45261. +
  45262. +/* Implementation of CRC_OVERWRITE_ITEM mode of ctail conversion */
  45263. +static int overwrite_ctail(coord_t * coord, flow_t * f)
  45264. +{
  45265. + unsigned count;
  45266. +
  45267. + assert("edward-269", f->user == 0);
  45268. + assert("edward-270", f->data != NULL);
  45269. + assert("edward-271", f->length > 0);
  45270. + assert("edward-272", coord_is_existing_unit(coord));
  45271. + assert("edward-273", coord->unit_pos == 0);
  45272. + assert("edward-274", znode_is_write_locked(coord->node));
  45273. + assert("edward-275", reiser4_schedulable());
  45274. + assert("edward-467", item_id_by_coord(coord) == CTAIL_ID);
  45275. + assert("edward-1243", ctail_ok(coord));
  45276. +
  45277. + count = nr_units_ctail(coord);
  45278. +
  45279. + if (count > f->length)
  45280. + count = f->length;
  45281. + memcpy(first_unit(coord), f->data, count);
  45282. + move_flow_forward(f, count);
  45283. + coord->unit_pos += count;
  45284. + return 0;
  45285. +}
  45286. +
  45287. +/* Implementation of CRC_CUT_ITEM mode of ctail conversion:
  45288. + cut ctail (part or whole) starting from next unit position */
  45289. +static int cut_ctail(coord_t * coord)
  45290. +{
  45291. + coord_t stop;
  45292. +
  45293. + assert("edward-435", coord->between == AT_UNIT &&
  45294. + coord->item_pos < coord_num_items(coord) &&
  45295. + coord->unit_pos <= coord_num_units(coord));
  45296. +
  45297. + if (coord->unit_pos == coord_num_units(coord))
  45298. + /* nothing to cut */
  45299. + return 0;
  45300. + coord_dup(&stop, coord);
  45301. + stop.unit_pos = coord_last_unit_pos(coord);
  45302. +
  45303. + return cut_node_content(coord, &stop, NULL, NULL, NULL);
  45304. +}
  45305. +
  45306. +int ctail_insert_unprepped_cluster(struct cluster_handle * clust,
  45307. + struct inode * inode)
  45308. +{
  45309. + int result;
  45310. + assert("edward-1244", inode != NULL);
  45311. + assert("edward-1245", clust->hint != NULL);
  45312. + assert("edward-1246", clust->dstat == FAKE_DISK_CLUSTER);
  45313. + assert("edward-1247", clust->reserved == 1);
  45314. +
  45315. + result = get_disk_cluster_locked(clust, inode, ZNODE_WRITE_LOCK);
  45316. + if (cbk_errored(result))
  45317. + return result;
  45318. + assert("edward-1249", result == CBK_COORD_NOTFOUND);
  45319. + assert("edward-1250", znode_is_write_locked(clust->hint->lh.node));
  45320. +
  45321. + assert("edward-1295",
  45322. + clust->hint->ext_coord.lh->node ==
  45323. + clust->hint->ext_coord.coord.node);
  45324. +
  45325. + coord_set_between_clusters(&clust->hint->ext_coord.coord);
  45326. +
  45327. + result = insert_unprepped_ctail(clust, inode);
  45328. + all_grabbed2free();
  45329. +
  45330. + assert("edward-1251", !result);
  45331. + assert("edward-1252", cryptcompress_inode_ok(inode));
  45332. + assert("edward-1253", znode_is_write_locked(clust->hint->lh.node));
  45333. + assert("edward-1254",
  45334. + reiser4_clustered_blocks(reiser4_get_current_sb()));
  45335. + assert("edward-1255",
  45336. + znode_convertible(clust->hint->ext_coord.coord.node));
  45337. +
  45338. + return result;
  45339. +}
  45340. +
  45341. +/* plugin->u.item.f.scan */
  45342. +int scan_ctail(flush_scan * scan)
  45343. +{
  45344. + int result = 0;
  45345. + struct page *page;
  45346. + struct inode *inode;
  45347. + jnode *node = scan->node;
  45348. +
  45349. + assert("edward-227", scan->node != NULL);
  45350. + assert("edward-228", jnode_is_cluster_page(scan->node));
  45351. + assert("edward-639", znode_is_write_locked(scan->parent_lock.node));
  45352. +
  45353. + page = jnode_page(node);
  45354. + inode = page->mapping->host;
  45355. +
  45356. + if (!reiser4_scanning_left(scan))
  45357. + return result;
  45358. + if (!ZF_ISSET(scan->parent_lock.node, JNODE_DIRTY))
  45359. + znode_make_dirty(scan->parent_lock.node);
  45360. +
  45361. + if (!znode_convertible(scan->parent_lock.node)) {
  45362. + if (JF_ISSET(scan->node, JNODE_DIRTY))
  45363. + znode_set_convertible(scan->parent_lock.node);
  45364. + else {
  45365. + warning("edward-681",
  45366. + "cluster page is already processed");
  45367. + return -EAGAIN;
  45368. + }
  45369. + }
  45370. + return result;
  45371. +}
  45372. +
  45373. +/* If true, this function attaches children */
  45374. +static int should_attach_convert_idata(flush_pos_t * pos)
  45375. +{
  45376. + int result;
  45377. + assert("edward-431", pos != NULL);
  45378. + assert("edward-432", pos->child == NULL);
  45379. + assert("edward-619", znode_is_write_locked(pos->coord.node));
  45380. + assert("edward-470",
  45381. + item_plugin_by_coord(&pos->coord) ==
  45382. + item_plugin_by_id(CTAIL_ID));
  45383. +
  45384. + /* check for leftmost child */
  45385. + utmost_child_ctail(&pos->coord, LEFT_SIDE, &pos->child);
  45386. +
  45387. + if (!pos->child)
  45388. + return 0;
  45389. + spin_lock_jnode(pos->child);
  45390. + result = (JF_ISSET(pos->child, JNODE_DIRTY) &&
  45391. + pos->child->atom == ZJNODE(pos->coord.node)->atom);
  45392. + spin_unlock_jnode(pos->child);
  45393. + if (!result && pos->child) {
  45394. + /* existing child isn't to attach, clear up this one */
  45395. + jput(pos->child);
  45396. + pos->child = NULL;
  45397. + }
  45398. + return result;
  45399. +}
  45400. +
  45401. +/**
  45402. + * Collect all needed information about the object here,
  45403. + * as in-memory inode can be evicted from memory before
  45404. + * disk update completion.
  45405. + */
  45406. +static int init_convert_data_ctail(struct convert_item_info * idata,
  45407. + struct inode *inode)
  45408. +{
  45409. + assert("edward-813", idata != NULL);
  45410. + assert("edward-814", inode != NULL);
  45411. +
  45412. + idata->cluster_shift = inode_cluster_shift(inode);
  45413. + idata->d_cur = DC_FIRST_ITEM;
  45414. + idata->d_next = DC_INVALID_STATE;
  45415. +
  45416. + return 0;
  45417. +}
  45418. +
  45419. +static int alloc_item_convert_data(struct convert_info * sq)
  45420. +{
  45421. + assert("edward-816", sq != NULL);
  45422. + assert("edward-817", sq->itm == NULL);
  45423. +
  45424. + sq->itm = kmalloc(sizeof(*sq->itm), reiser4_ctx_gfp_mask_get());
  45425. + if (sq->itm == NULL)
  45426. + return RETERR(-ENOMEM);
  45427. + init_lh(&sq->right_lock);
  45428. + sq->right_locked = 0;
  45429. + return 0;
  45430. +}
  45431. +
  45432. +static void free_item_convert_data(struct convert_info * sq)
  45433. +{
  45434. + assert("edward-818", sq != NULL);
  45435. + assert("edward-819", sq->itm != NULL);
  45436. + assert("edward-820", sq->iplug != NULL);
  45437. +
  45438. + done_lh(&sq->right_lock);
  45439. + sq->right_locked = 0;
  45440. + kfree(sq->itm);
  45441. + sq->itm = NULL;
  45442. + return;
  45443. +}
  45444. +
  45445. +static struct convert_info *alloc_convert_data(void)
  45446. +{
  45447. + struct convert_info *info;
  45448. +
  45449. + info = kmalloc(sizeof(*info), reiser4_ctx_gfp_mask_get());
  45450. + if (info != NULL) {
  45451. + memset(info, 0, sizeof(*info));
  45452. + cluster_init_write(&info->clust, NULL);
  45453. + }
  45454. + return info;
  45455. +}
  45456. +
  45457. +static void reset_convert_data(struct convert_info *info)
  45458. +{
  45459. + info->clust.tc.hole = 0;
  45460. +}
  45461. +
  45462. +void free_convert_data(flush_pos_t * pos)
  45463. +{
  45464. + struct convert_info *sq;
  45465. +
  45466. + assert("edward-823", pos != NULL);
  45467. + assert("edward-824", pos->sq != NULL);
  45468. +
  45469. + sq = pos->sq;
  45470. + if (sq->itm)
  45471. + free_item_convert_data(sq);
  45472. + put_cluster_handle(&sq->clust);
  45473. + kfree(pos->sq);
  45474. + pos->sq = NULL;
  45475. + return;
  45476. +}
  45477. +
  45478. +static int init_item_convert_data(flush_pos_t * pos, struct inode *inode)
  45479. +{
  45480. + struct convert_info *sq;
  45481. +
  45482. + assert("edward-825", pos != NULL);
  45483. + assert("edward-826", pos->sq != NULL);
  45484. + assert("edward-827", item_convert_data(pos) != NULL);
  45485. + assert("edward-828", inode != NULL);
  45486. +
  45487. + sq = pos->sq;
  45488. + memset(sq->itm, 0, sizeof(*sq->itm));
  45489. +
  45490. + /* iplug->init_convert_data() */
  45491. + return init_convert_data_ctail(sq->itm, inode);
  45492. +}
  45493. +
  45494. +/* create and attach disk cluster info used by 'convert' phase of the flush
  45495. + squalloc() */
  45496. +static int attach_convert_idata(flush_pos_t * pos, struct inode *inode)
  45497. +{
  45498. + int ret = 0;
  45499. + struct convert_item_info *info;
  45500. + struct cluster_handle *clust;
  45501. + file_plugin *fplug = inode_file_plugin(inode);
  45502. +
  45503. + assert("edward-248", pos != NULL);
  45504. + assert("edward-249", pos->child != NULL);
  45505. + assert("edward-251", inode != NULL);
  45506. + assert("edward-682", cryptcompress_inode_ok(inode));
  45507. + assert("edward-252",
  45508. + fplug == file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID));
  45509. + assert("edward-473",
  45510. + item_plugin_by_coord(&pos->coord) ==
  45511. + item_plugin_by_id(CTAIL_ID));
  45512. +
  45513. + if (!pos->sq) {
  45514. + pos->sq = alloc_convert_data();
  45515. + if (!pos->sq)
  45516. + return RETERR(-ENOMEM);
  45517. + }
  45518. + else
  45519. + reset_convert_data(pos->sq);
  45520. +
  45521. + clust = &pos->sq->clust;
  45522. +
  45523. + ret = set_cluster_by_page(clust,
  45524. + jnode_page(pos->child),
  45525. + MAX_CLUSTER_NRPAGES);
  45526. + if (ret)
  45527. + goto err;
  45528. +
  45529. + assert("edward-829", pos->sq != NULL);
  45530. + assert("edward-250", item_convert_data(pos) == NULL);
  45531. +
  45532. + pos->sq->iplug = item_plugin_by_id(CTAIL_ID);
  45533. +
  45534. + ret = alloc_item_convert_data(pos->sq);
  45535. + if (ret)
  45536. + goto err;
  45537. + ret = init_item_convert_data(pos, inode);
  45538. + if (ret)
  45539. + goto err;
  45540. + info = item_convert_data(pos);
  45541. +
  45542. + ret = checkout_logical_cluster(clust, pos->child, inode);
  45543. + if (ret)
  45544. + goto err;
  45545. +
  45546. + reiser4_deflate_cluster(clust, inode);
  45547. + inc_item_convert_count(pos);
  45548. +
  45549. + /* prepare flow for insertion */
  45550. + fplug->flow_by_inode(inode,
  45551. + (const char __user *)tfm_stream_data(&clust->tc,
  45552. + OUTPUT_STREAM),
  45553. + 0 /* kernel space */ ,
  45554. + clust->tc.len,
  45555. + clust_to_off(clust->index, inode),
  45556. + WRITE_OP, &info->flow);
  45557. + if (clust->tc.hole)
  45558. + info->flow.length = 0;
  45559. +
  45560. + jput(pos->child);
  45561. + return 0;
  45562. + err:
  45563. + jput(pos->child);
  45564. + free_convert_data(pos);
  45565. + return ret;
  45566. +}
  45567. +
  45568. +/* clear up disk cluster info */
  45569. +static void detach_convert_idata(struct convert_info * sq)
  45570. +{
  45571. + struct convert_item_info *info;
  45572. +
  45573. + assert("edward-253", sq != NULL);
  45574. + assert("edward-840", sq->itm != NULL);
  45575. +
  45576. + info = sq->itm;
  45577. + assert("edward-1212", info->flow.length == 0);
  45578. +
  45579. + free_item_convert_data(sq);
  45580. + return;
  45581. +}
  45582. +
  45583. +/* plugin->u.item.f.utmost_child */
  45584. +
  45585. +/* This function sets leftmost child for a first cluster item,
  45586. + if the child exists, and NULL in other cases.
  45587. + NOTE-EDWARD: Do not call this for RIGHT_SIDE */
  45588. +
  45589. +int utmost_child_ctail(const coord_t * coord, sideof side, jnode ** child)
  45590. +{
  45591. + reiser4_key key;
  45592. +
  45593. + item_key_by_coord(coord, &key);
  45594. +
  45595. + assert("edward-257", coord != NULL);
  45596. + assert("edward-258", child != NULL);
  45597. + assert("edward-259", side == LEFT_SIDE);
  45598. + assert("edward-260",
  45599. + item_plugin_by_coord(coord) == item_plugin_by_id(CTAIL_ID));
  45600. +
  45601. + if (!is_disk_cluster_key(&key, coord))
  45602. + *child = NULL;
  45603. + else
  45604. + *child = jlookup(current_tree,
  45605. + get_key_objectid(item_key_by_coord
  45606. + (coord, &key)),
  45607. + off_to_pg(get_key_offset(&key)));
  45608. + return 0;
  45609. +}
  45610. +
  45611. +/*
  45612. + * Set status (d_next) of the first item at the right neighbor
  45613. + *
  45614. + * If the current position is the last item in the node, then
  45615. + * look at its first item at the right neighbor (skip empty nodes).
  45616. + * Note, that right neighbors may be not dirty because of races.
  45617. + * If so, make it dirty and set convertible flag.
  45618. + */
  45619. +static int pre_convert_ctail(flush_pos_t * pos)
  45620. +{
  45621. + int ret = 0;
  45622. + int stop = 0;
  45623. + znode *slider;
  45624. + lock_handle slider_lh;
  45625. + lock_handle right_lh;
  45626. +
  45627. + assert("edward-1232", !node_is_empty(pos->coord.node));
  45628. + assert("edward-1014",
  45629. + pos->coord.item_pos < coord_num_items(&pos->coord));
  45630. + assert("edward-1015", convert_data_attached(pos));
  45631. + assert("edward-1611",
  45632. + item_convert_data(pos)->d_cur != DC_INVALID_STATE);
  45633. + assert("edward-1017",
  45634. + item_convert_data(pos)->d_next == DC_INVALID_STATE);
  45635. +
  45636. + /*
  45637. + * In the following two cases we don't need
  45638. + * to look at right neighbor
  45639. + */
  45640. + if (item_convert_data(pos)->d_cur == DC_AFTER_CLUSTER) {
  45641. + /*
  45642. + * cluster is over, so the first item of the right
  45643. + * neighbor doesn't belong to this cluster
  45644. + */
  45645. + return 0;
  45646. + }
  45647. + if (pos->coord.item_pos < coord_num_items(&pos->coord) - 1) {
  45648. + /*
  45649. + * current position is not the last item in the node,
  45650. + * so the first item of the right neighbor doesn't
  45651. + * belong to this cluster
  45652. + */
  45653. + return 0;
  45654. + }
  45655. + /*
  45656. + * Look at right neighbor.
  45657. + * Note that concurrent truncate is not a problem
  45658. + * since we have locked the beginning of the cluster.
  45659. + */
  45660. + slider = pos->coord.node;
  45661. + init_lh(&slider_lh);
  45662. + init_lh(&right_lh);
  45663. +
  45664. + while (!stop) {
  45665. + coord_t coord;
  45666. +
  45667. + ret = reiser4_get_right_neighbor(&right_lh,
  45668. + slider,
  45669. + ZNODE_WRITE_LOCK,
  45670. + GN_CAN_USE_UPPER_LEVELS);
  45671. + if (ret)
  45672. + break;
  45673. + slider = right_lh.node;
  45674. + ret = zload(slider);
  45675. + if (ret)
  45676. + break;
  45677. + coord_init_before_first_item(&coord, slider);
  45678. +
  45679. + if (node_is_empty(slider)) {
  45680. + warning("edward-1641", "Found empty right neighbor");
  45681. + znode_make_dirty(slider);
  45682. + znode_set_convertible(slider);
  45683. + /*
  45684. + * skip this node,
  45685. + * go rightward
  45686. + */
  45687. + stop = 0;
  45688. + } else if (same_disk_cluster(&pos->coord, &coord)) {
  45689. +
  45690. + item_convert_data(pos)->d_next = DC_CHAINED_ITEM;
  45691. +
  45692. + if (!ZF_ISSET(slider, JNODE_DIRTY)) {
  45693. + /*
  45694. + warning("edward-1024",
  45695. + "next slum item mergeable, "
  45696. + "but znode %p isn't dirty\n",
  45697. + lh.node);
  45698. + */
  45699. + znode_make_dirty(slider);
  45700. + }
  45701. + if (!znode_convertible(slider)) {
  45702. + /*
  45703. + warning("edward-1272",
  45704. + "next slum item mergeable, "
  45705. + "but znode %p isn't convertible\n",
  45706. + lh.node);
  45707. + */
  45708. + znode_set_convertible(slider);
  45709. + }
  45710. + stop = 1;
  45711. + convert_data(pos)->right_locked = 1;
  45712. + } else {
  45713. + item_convert_data(pos)->d_next = DC_AFTER_CLUSTER;
  45714. + stop = 1;
  45715. + convert_data(pos)->right_locked = 1;
  45716. + }
  45717. + zrelse(slider);
  45718. + done_lh(&slider_lh);
  45719. + move_lh(&slider_lh, &right_lh);
  45720. + }
  45721. + if (convert_data(pos)->right_locked)
  45722. + /*
  45723. + * Store locked right neighbor in
  45724. + * the conversion info. Otherwise,
  45725. + * we won't be able to access it,
  45726. + * if the current node gets deleted
  45727. + * during conversion
  45728. + */
  45729. + move_lh(&convert_data(pos)->right_lock, &slider_lh);
  45730. + done_lh(&slider_lh);
  45731. + done_lh(&right_lh);
  45732. +
  45733. + if (ret == -E_NO_NEIGHBOR) {
  45734. + item_convert_data(pos)->d_next = DC_AFTER_CLUSTER;
  45735. + ret = 0;
  45736. + }
  45737. + assert("edward-1610",
  45738. + ergo(ret != 0,
  45739. + item_convert_data(pos)->d_next == DC_INVALID_STATE));
  45740. + return ret;
  45741. +}
  45742. +
  45743. +/*
  45744. + * do some post-conversion actions;
  45745. + * detach conversion data if there is nothing to convert anymore
  45746. + */
  45747. +static void post_convert_ctail(flush_pos_t * pos,
  45748. + ctail_convert_mode_t mode, int old_nr_items)
  45749. +{
  45750. + switch (mode) {
  45751. + case CTAIL_CUT_ITEM:
  45752. + assert("edward-1214", item_convert_data(pos)->flow.length == 0);
  45753. + assert("edward-1215",
  45754. + coord_num_items(&pos->coord) == old_nr_items ||
  45755. + coord_num_items(&pos->coord) == old_nr_items - 1);
  45756. +
  45757. + if (item_convert_data(pos)->d_next == DC_CHAINED_ITEM)
  45758. + /*
  45759. + * the next item belongs to this cluster,
  45760. + * and should be also killed
  45761. + */
  45762. + break;
  45763. + if (coord_num_items(&pos->coord) != old_nr_items) {
  45764. + /*
  45765. + * the latest item in the
  45766. + * cluster has been killed,
  45767. + */
  45768. + detach_convert_idata(pos->sq);
  45769. + if (!node_is_empty(pos->coord.node))
  45770. + /*
  45771. + * make sure the next item will be scanned
  45772. + */
  45773. + coord_init_before_item(&pos->coord);
  45774. + break;
  45775. + }
  45776. + case CTAIL_APPEND_ITEM:
  45777. + /*
  45778. + * in the append mode the whole flow has been inserted
  45779. + * (see COP_INSERT_FLOW primitive)
  45780. + */
  45781. + assert("edward-434", item_convert_data(pos)->flow.length == 0);
  45782. + detach_convert_idata(pos->sq);
  45783. + break;
  45784. + case CTAIL_OVERWRITE_ITEM:
  45785. + if (coord_is_unprepped_ctail(&pos->coord)) {
  45786. + /*
  45787. + * the first (unprepped) ctail has been overwritten;
  45788. + * convert it to the prepped one
  45789. + */
  45790. + assert("edward-1259",
  45791. + cluster_shift_ok(item_convert_data(pos)->
  45792. + cluster_shift));
  45793. + put_unaligned((d8)item_convert_data(pos)->cluster_shift,
  45794. + &ctail_formatted_at(&pos->coord)->
  45795. + cluster_shift);
  45796. + }
  45797. + break;
  45798. + default:
  45799. + impossible("edward-1609", "Bad ctail conversion mode");
  45800. + }
  45801. +}
  45802. +
  45803. +static int assign_conversion_mode(flush_pos_t * pos, ctail_convert_mode_t *mode)
  45804. +{
  45805. + int ret = 0;
  45806. +
  45807. + *mode = CTAIL_INVAL_CONVERT_MODE;
  45808. +
  45809. + if (!convert_data_attached(pos)) {
  45810. + if (should_attach_convert_idata(pos)) {
  45811. + struct inode *inode;
  45812. + gfp_t old_mask = get_current_context()->gfp_mask;
  45813. +
  45814. + assert("edward-264", pos->child != NULL);
  45815. + assert("edward-265", jnode_page(pos->child) != NULL);
  45816. + assert("edward-266",
  45817. + jnode_page(pos->child)->mapping != NULL);
  45818. +
  45819. + inode = jnode_page(pos->child)->mapping->host;
  45820. +
  45821. + assert("edward-267", inode != NULL);
  45822. + /*
  45823. + * attach new convert item info
  45824. + */
  45825. + get_current_context()->gfp_mask |= __GFP_NOFAIL;
  45826. + ret = attach_convert_idata(pos, inode);
  45827. + get_current_context()->gfp_mask = old_mask;
  45828. + pos->child = NULL;
  45829. + if (ret == -E_REPEAT) {
  45830. + /*
  45831. + * jnode became clean, or there is no dirty
  45832. + * pages (nothing to update in disk cluster)
  45833. + */
  45834. + warning("edward-1021",
  45835. + "convert_ctail: nothing to attach");
  45836. + ret = 0;
  45837. + goto dont_convert;
  45838. + }
  45839. + if (ret)
  45840. + goto dont_convert;
  45841. +
  45842. + if (pos->sq->clust.tc.hole) {
  45843. + assert("edward-1634",
  45844. + item_convert_data(pos)->flow.length == 0);
  45845. + /*
  45846. + * new content is filled with zeros -
  45847. + * we punch a hole using cut (not kill)
  45848. + * primitive, so attached pages won't
  45849. + * be truncated
  45850. + */
  45851. + *mode = CTAIL_CUT_ITEM;
  45852. + }
  45853. + else
  45854. + /*
  45855. + * this is the first ctail in the cluster,
  45856. + * so it (may be only its head) should be
  45857. + * overwritten
  45858. + */
  45859. + *mode = CTAIL_OVERWRITE_ITEM;
  45860. + } else
  45861. + /*
  45862. + * non-convertible item
  45863. + */
  45864. + goto dont_convert;
  45865. + } else {
  45866. + /*
  45867. + * use old convert info
  45868. + */
  45869. + struct convert_item_info *idata;
  45870. + idata = item_convert_data(pos);
  45871. +
  45872. + switch (idata->d_cur) {
  45873. + case DC_FIRST_ITEM:
  45874. + case DC_CHAINED_ITEM:
  45875. + if (idata->flow.length)
  45876. + *mode = CTAIL_OVERWRITE_ITEM;
  45877. + else
  45878. + *mode = CTAIL_CUT_ITEM;
  45879. + break;
  45880. + case DC_AFTER_CLUSTER:
  45881. + if (idata->flow.length)
  45882. + *mode = CTAIL_APPEND_ITEM;
  45883. + else {
  45884. + /*
  45885. + * nothing to update anymore
  45886. + */
  45887. + detach_convert_idata(pos->sq);
  45888. + goto dont_convert;
  45889. + }
  45890. + break;
  45891. + default:
  45892. + impossible("edward-1018",
  45893. + "wrong current item state");
  45894. + ret = RETERR(-EIO);
  45895. + goto dont_convert;
  45896. + }
  45897. + }
  45898. + /*
  45899. + * ok, ctail will be converted
  45900. + */
  45901. + assert("edward-433", convert_data_attached(pos));
  45902. + assert("edward-1022",
  45903. + pos->coord.item_pos < coord_num_items(&pos->coord));
  45904. + return 0;
  45905. + dont_convert:
  45906. + return ret;
  45907. +}
  45908. +
  45909. +/*
  45910. + * perform an operation on the ctail item in
  45911. + * accordance with assigned conversion @mode
  45912. + */
  45913. +static int do_convert_ctail(flush_pos_t * pos, ctail_convert_mode_t mode)
  45914. +{
  45915. + int result = 0;
  45916. + struct convert_item_info * info;
  45917. +
  45918. + assert("edward-468", pos != NULL);
  45919. + assert("edward-469", pos->sq != NULL);
  45920. + assert("edward-845", item_convert_data(pos) != NULL);
  45921. +
  45922. + info = item_convert_data(pos);
  45923. + assert("edward-679", info->flow.data != NULL);
  45924. +
  45925. + switch (mode) {
  45926. + case CTAIL_APPEND_ITEM:
  45927. + assert("edward-1229", info->flow.length != 0);
  45928. + assert("edward-1256",
  45929. + cluster_shift_ok(cluster_shift_by_coord(&pos->coord)));
  45930. + /*
  45931. + * insert flow without balancing
  45932. + * (see comments to convert_node())
  45933. + */
  45934. + result = insert_cryptcompress_flow_in_place(&pos->coord,
  45935. + &pos->lock,
  45936. + &info->flow,
  45937. + info->cluster_shift);
  45938. + break;
  45939. + case CTAIL_OVERWRITE_ITEM:
  45940. + assert("edward-1230", info->flow.length != 0);
  45941. + overwrite_ctail(&pos->coord, &info->flow);
  45942. + if (info->flow.length != 0)
  45943. + break;
  45944. + else
  45945. + /*
  45946. + * fall through:
  45947. + * cut the rest of item (if any)
  45948. + */
  45949. + ;
  45950. + case CTAIL_CUT_ITEM:
  45951. + assert("edward-1231", info->flow.length == 0);
  45952. + result = cut_ctail(&pos->coord);
  45953. + break;
  45954. + default:
  45955. + result = RETERR(-EIO);
  45956. + impossible("edward-244", "bad ctail conversion mode");
  45957. + }
  45958. + return result;
  45959. +}
  45960. +
  45961. +/*
  45962. + * plugin->u.item.f.convert
  45963. + *
  45964. + * Convert ctail items at flush time
  45965. + */
  45966. +int convert_ctail(flush_pos_t * pos)
  45967. +{
  45968. + int ret;
  45969. + int old_nr_items;
  45970. + ctail_convert_mode_t mode;
  45971. +
  45972. + assert("edward-1020", pos != NULL);
  45973. + assert("edward-1213", coord_num_items(&pos->coord) != 0);
  45974. + assert("edward-1257", item_id_by_coord(&pos->coord) == CTAIL_ID);
  45975. + assert("edward-1258", ctail_ok(&pos->coord));
  45976. + assert("edward-261", pos->coord.node != NULL);
  45977. +
  45978. + old_nr_items = coord_num_items(&pos->coord);
  45979. + /*
  45980. + * detach old conversion data and
  45981. + * attach a new one, if needed
  45982. + */
  45983. + ret = assign_conversion_mode(pos, &mode);
  45984. + if (ret || mode == CTAIL_INVAL_CONVERT_MODE) {
  45985. + assert("edward-1633", !convert_data_attached(pos));
  45986. + return ret;
  45987. + }
  45988. + /*
  45989. + * find out the status of the right neighbor
  45990. + */
  45991. + ret = pre_convert_ctail(pos);
  45992. + if (ret) {
  45993. + detach_convert_idata(pos->sq);
  45994. + return ret;
  45995. + }
  45996. + ret = do_convert_ctail(pos, mode);
  45997. + if (ret) {
  45998. + detach_convert_idata(pos->sq);
  45999. + return ret;
  46000. + }
  46001. + /*
  46002. + * detach old conversion data if needed
  46003. + */
  46004. + post_convert_ctail(pos, mode, old_nr_items);
  46005. + return 0;
  46006. +}
  46007. +
  46008. +/*
  46009. + Local variables:
  46010. + c-indentation-style: "K&R"
  46011. + mode-name: "LC"
  46012. + c-basic-offset: 8
  46013. + tab-width: 8
  46014. + fill-column: 120
  46015. + End:
  46016. +*/
  46017. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/item/ctail.h linux-4.14.2/fs/reiser4/plugin/item/ctail.h
  46018. --- linux-4.14.2.orig/fs/reiser4/plugin/item/ctail.h 1970-01-01 01:00:00.000000000 +0100
  46019. +++ linux-4.14.2/fs/reiser4/plugin/item/ctail.h 2017-11-26 22:13:09.000000000 +0100
  46020. @@ -0,0 +1,102 @@
  46021. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  46022. +
  46023. +/* Ctail items are fragments (or bodies) of special tipe to provide
  46024. + optimal storage of encrypted and(or) compressed files. */
  46025. +
  46026. +
  46027. +#if !defined( __FS_REISER4_CTAIL_H__ )
  46028. +#define __FS_REISER4_CTAIL_H__
  46029. +
  46030. +/* Disk format of ctail item */
  46031. +typedef struct ctail_item_format {
  46032. + /* packed shift;
  46033. + if its value is different from UCTAIL_SHIFT (see below), then
  46034. + size of disk cluster is calculated as (1 << cluster_shift) */
  46035. + d8 cluster_shift;
  46036. + /* ctail body */
  46037. + d8 body[0];
  46038. +} __attribute__ ((packed)) ctail_item_format;
  46039. +
  46040. +/* "Unprepped" disk cluster is represented by a single ctail item
  46041. + with the following "magic" attributes: */
  46042. +/* "magic" cluster_shift */
  46043. +#define UCTAIL_SHIFT 0xff
  46044. +/* How many units unprepped ctail item has */
  46045. +#define UCTAIL_NR_UNITS 1
  46046. +
  46047. +/* The following is a set of various item states in a disk cluster.
  46048. + Disk cluster is a set of items whose keys belong to the interval
  46049. + [dc_key , dc_key + disk_cluster_size - 1] */
  46050. +typedef enum {
  46051. + DC_INVALID_STATE = 0,
  46052. + DC_FIRST_ITEM = 1,
  46053. + DC_CHAINED_ITEM = 2,
  46054. + DC_AFTER_CLUSTER = 3
  46055. +} dc_item_stat;
  46056. +
  46057. +/* ctail-specific extension.
  46058. + In particular this describes parameters of disk cluster an item belongs to */
  46059. +struct ctail_coord_extension {
  46060. + int shift; /* this contains cluster_shift extracted from
  46061. + ctail_item_format (above), or UCTAIL_SHIFT
  46062. + (the last one is the "magic" of unprepped disk clusters)*/
  46063. + int dsize; /* size of a prepped disk cluster */
  46064. + int ncount; /* count of nodes occupied by a disk cluster */
  46065. +};
  46066. +
  46067. +struct cut_list;
  46068. +
  46069. +/* plugin->item.b.* */
  46070. +int can_contain_key_ctail(const coord_t *, const reiser4_key *,
  46071. + const reiser4_item_data *);
  46072. +int mergeable_ctail(const coord_t * p1, const coord_t * p2);
  46073. +pos_in_node_t nr_units_ctail(const coord_t * coord);
  46074. +int estimate_ctail(const coord_t * coord, const reiser4_item_data * data);
  46075. +void print_ctail(const char *prefix, coord_t * coord);
  46076. +lookup_result lookup_ctail(const reiser4_key *, lookup_bias, coord_t *);
  46077. +
  46078. +int paste_ctail(coord_t * coord, reiser4_item_data * data,
  46079. + carry_plugin_info * info UNUSED_ARG);
  46080. +int init_ctail(coord_t *, coord_t *, reiser4_item_data *);
  46081. +int can_shift_ctail(unsigned free_space, coord_t * coord,
  46082. + znode * target, shift_direction pend, unsigned *size,
  46083. + unsigned want);
  46084. +void copy_units_ctail(coord_t * target, coord_t * source, unsigned from,
  46085. + unsigned count, shift_direction where_is_free_space,
  46086. + unsigned free_space);
  46087. +int cut_units_ctail(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
  46088. + carry_cut_data *, reiser4_key * smallest_removed,
  46089. + reiser4_key * new_first);
  46090. +int kill_units_ctail(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
  46091. + carry_kill_data *, reiser4_key * smallest_removed,
  46092. + reiser4_key * new_first);
  46093. +int ctail_ok(const coord_t * coord);
  46094. +int check_ctail(const coord_t * coord, const char **error);
  46095. +
  46096. +/* plugin->u.item.s.* */
  46097. +int read_ctail(struct file *, flow_t *, hint_t *);
  46098. +int readpage_ctail(void *, struct page *);
  46099. +int readpages_ctail(struct file *, struct address_space *, struct list_head *);
  46100. +reiser4_key *append_key_ctail(const coord_t *, reiser4_key *);
  46101. +int create_hook_ctail(const coord_t * coord, void *arg);
  46102. +int kill_hook_ctail(const coord_t *, pos_in_node_t, pos_in_node_t,
  46103. + carry_kill_data *);
  46104. +int shift_hook_ctail(const coord_t *, unsigned, unsigned, znode *);
  46105. +
  46106. +/* plugin->u.item.f */
  46107. +int utmost_child_ctail(const coord_t *, sideof, jnode **);
  46108. +int scan_ctail(flush_scan *);
  46109. +int convert_ctail(flush_pos_t *);
  46110. +size_t inode_scaled_cluster_size(struct inode *);
  46111. +
  46112. +#endif /* __FS_REISER4_CTAIL_H__ */
  46113. +
  46114. +/* Make Linus happy.
  46115. + Local variables:
  46116. + c-indentation-style: "K&R"
  46117. + mode-name: "LC"
  46118. + c-basic-offset: 8
  46119. + tab-width: 8
  46120. + fill-column: 120
  46121. + End:
  46122. +*/
  46123. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/item/extent.c linux-4.14.2/fs/reiser4/plugin/item/extent.c
  46124. --- linux-4.14.2.orig/fs/reiser4/plugin/item/extent.c 1970-01-01 01:00:00.000000000 +0100
  46125. +++ linux-4.14.2/fs/reiser4/plugin/item/extent.c 2017-11-26 22:13:09.000000000 +0100
  46126. @@ -0,0 +1,197 @@
  46127. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  46128. +
  46129. +#include "item.h"
  46130. +#include "../../key.h"
  46131. +#include "../../super.h"
  46132. +#include "../../carry.h"
  46133. +#include "../../inode.h"
  46134. +#include "../../page_cache.h"
  46135. +#include "../../flush.h"
  46136. +#include "../object.h"
  46137. +
  46138. +/* prepare structure reiser4_item_data. It is used to put one extent unit into tree */
  46139. +/* Audited by: green(2002.06.13) */
  46140. +reiser4_item_data *init_new_extent(reiser4_item_data * data, void *ext_unit,
  46141. + int nr_extents)
  46142. +{
  46143. + data->data = ext_unit;
  46144. + /* data->data is kernel space */
  46145. + data->user = 0;
  46146. + data->length = sizeof(reiser4_extent) * nr_extents;
  46147. + data->arg = NULL;
  46148. + data->iplug = item_plugin_by_id(EXTENT_POINTER_ID);
  46149. + return data;
  46150. +}
  46151. +
  46152. +/* how many bytes are addressed by @nr first extents of the extent item */
  46153. +reiser4_block_nr reiser4_extent_size(const coord_t * coord, pos_in_node_t nr)
  46154. +{
  46155. + pos_in_node_t i;
  46156. + reiser4_block_nr blocks;
  46157. + reiser4_extent *ext;
  46158. +
  46159. + ext = item_body_by_coord(coord);
  46160. + assert("vs-263", nr <= nr_units_extent(coord));
  46161. +
  46162. + blocks = 0;
  46163. + for (i = 0; i < nr; i++, ext++) {
  46164. + blocks += extent_get_width(ext);
  46165. + }
  46166. +
  46167. + return blocks * current_blocksize;
  46168. +}
  46169. +
  46170. +extent_state state_of_extent(reiser4_extent * ext)
  46171. +{
  46172. + switch ((int)extent_get_start(ext)) {
  46173. + case 0:
  46174. + return HOLE_EXTENT;
  46175. + case 1:
  46176. + return UNALLOCATED_EXTENT;
  46177. + default:
  46178. + break;
  46179. + }
  46180. + return ALLOCATED_EXTENT;
  46181. +}
  46182. +
  46183. +int extent_is_unallocated(const coord_t * item)
  46184. +{
  46185. + assert("jmacd-5133", item_is_extent(item));
  46186. +
  46187. + return state_of_extent(extent_by_coord(item)) == UNALLOCATED_EXTENT;
  46188. +}
  46189. +
  46190. +/* set extent's start and width */
  46191. +void reiser4_set_extent(reiser4_extent * ext, reiser4_block_nr start,
  46192. + reiser4_block_nr width)
  46193. +{
  46194. + extent_set_start(ext, start);
  46195. + extent_set_width(ext, width);
  46196. +}
  46197. +
  46198. +/**
  46199. + * reiser4_replace_extent - replace extent and paste 1 or 2 after it
  46200. + * @un_extent: coordinate of extent to be overwritten
  46201. + * @lh: need better comment
  46202. + * @key: need better comment
  46203. + * @exts_to_add: data prepared for insertion into tree
  46204. + * @replace: need better comment
  46205. + * @flags: need better comment
  46206. + * @return_insert_position: need better comment
  46207. + *
  46208. + * Overwrites one extent, pastes 1 or 2 more ones after overwritten one. If
  46209. + * @return_inserted_position is 1 - @un_extent and @lh are returned set to
  46210. + * first of newly inserted units, if it is 0 - @un_extent and @lh are returned
  46211. + * set to extent which was overwritten.
  46212. + */
  46213. +int reiser4_replace_extent(struct replace_handle *h,
  46214. + int return_inserted_position)
  46215. +{
  46216. + int result;
  46217. + znode *orig_znode;
  46218. + /*ON_DEBUG(reiser4_extent orig_ext);*/ /* this is for debugging */
  46219. +
  46220. + assert("vs-990", coord_is_existing_unit(h->coord));
  46221. + assert("vs-1375", znode_is_write_locked(h->coord->node));
  46222. + assert("vs-1426", extent_get_width(&h->overwrite) != 0);
  46223. + assert("vs-1427", extent_get_width(&h->new_extents[0]) != 0);
  46224. + assert("vs-1427", ergo(h->nr_new_extents == 2,
  46225. + extent_get_width(&h->new_extents[1]) != 0));
  46226. +
  46227. + /* compose structure for paste */
  46228. + init_new_extent(&h->item, &h->new_extents[0], h->nr_new_extents);
  46229. +
  46230. + coord_dup(&h->coord_after, h->coord);
  46231. + init_lh(&h->lh_after);
  46232. + copy_lh(&h->lh_after, h->lh);
  46233. + reiser4_tap_init(&h->watch, &h->coord_after, &h->lh_after, ZNODE_WRITE_LOCK);
  46234. + reiser4_tap_monitor(&h->watch);
  46235. +
  46236. + ON_DEBUG(h->orig_ext = *extent_by_coord(h->coord));
  46237. + orig_znode = h->coord->node;
  46238. +
  46239. +#if REISER4_DEBUG
  46240. + /* make sure that key is set properly */
  46241. + unit_key_by_coord(h->coord, &h->tmp);
  46242. + set_key_offset(&h->tmp,
  46243. + get_key_offset(&h->tmp) +
  46244. + extent_get_width(&h->overwrite) * current_blocksize);
  46245. + assert("vs-1080", keyeq(&h->tmp, &h->paste_key));
  46246. +#endif
  46247. +
  46248. + /* set insert point after unit to be replaced */
  46249. + h->coord->between = AFTER_UNIT;
  46250. +
  46251. + result = insert_into_item(h->coord, return_inserted_position ? h->lh : NULL,
  46252. + &h->paste_key, &h->item, h->flags);
  46253. + if (!result) {
  46254. + /* now we have to replace the unit after which new units were
  46255. + inserted. Its position is tracked by @watch */
  46256. + reiser4_extent *ext;
  46257. + znode *node;
  46258. +
  46259. + node = h->coord_after.node;
  46260. + if (node != orig_znode) {
  46261. + coord_clear_iplug(&h->coord_after);
  46262. + result = zload(node);
  46263. + }
  46264. +
  46265. + if (likely(!result)) {
  46266. + ext = extent_by_coord(&h->coord_after);
  46267. +
  46268. + assert("vs-987", znode_is_loaded(node));
  46269. + assert("vs-988", !memcmp(ext, &h->orig_ext, sizeof(*ext)));
  46270. +
  46271. + /* overwrite extent unit */
  46272. + memcpy(ext, &h->overwrite, sizeof(reiser4_extent));
  46273. + znode_make_dirty(node);
  46274. +
  46275. + if (node != orig_znode)
  46276. + zrelse(node);
  46277. +
  46278. + if (return_inserted_position == 0) {
  46279. + /* coord and lh are to be set to overwritten
  46280. + extent */
  46281. + assert("vs-1662",
  46282. + WITH_DATA(node, !memcmp(&h->overwrite,
  46283. + extent_by_coord(
  46284. + &h->coord_after),
  46285. + sizeof(reiser4_extent))));
  46286. +
  46287. + *h->coord = h->coord_after;
  46288. + done_lh(h->lh);
  46289. + copy_lh(h->lh, &h->lh_after);
  46290. + } else {
  46291. + /* h->coord and h->lh are to be set to first of
  46292. + inserted units */
  46293. + assert("vs-1663",
  46294. + WITH_DATA(h->coord->node,
  46295. + !memcmp(&h->new_extents[0],
  46296. + extent_by_coord(h->coord),
  46297. + sizeof(reiser4_extent))));
  46298. + assert("vs-1664", h->lh->node == h->coord->node);
  46299. + }
  46300. + }
  46301. + }
  46302. + reiser4_tap_done(&h->watch);
  46303. +
  46304. + return result;
  46305. +}
  46306. +
  46307. +lock_handle *znode_lh(znode *node)
  46308. +{
  46309. + assert("vs-1371", znode_is_write_locked(node));
  46310. + assert("vs-1372", znode_is_wlocked_once(node));
  46311. + return list_entry(node->lock.owners.next, lock_handle, owners_link);
  46312. +}
  46313. +
  46314. +/*
  46315. + * Local variables:
  46316. + * c-indentation-style: "K&R"
  46317. + * mode-name: "LC"
  46318. + * c-basic-offset: 8
  46319. + * tab-width: 8
  46320. + * fill-column: 79
  46321. + * scroll-step: 1
  46322. + * End:
  46323. + */
  46324. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/item/extent_file_ops.c linux-4.14.2/fs/reiser4/plugin/item/extent_file_ops.c
  46325. --- linux-4.14.2.orig/fs/reiser4/plugin/item/extent_file_ops.c 1970-01-01 01:00:00.000000000 +0100
  46326. +++ linux-4.14.2/fs/reiser4/plugin/item/extent_file_ops.c 2017-11-26 22:13:09.000000000 +0100
  46327. @@ -0,0 +1,1434 @@
  46328. +/* COPYRIGHT 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  46329. +
  46330. +#include "item.h"
  46331. +#include "../../inode.h"
  46332. +#include "../../page_cache.h"
  46333. +#include "../object.h"
  46334. +
  46335. +#include <linux/swap.h>
  46336. +
  46337. +static inline reiser4_extent *ext_by_offset(const znode *node, int offset)
  46338. +{
  46339. + reiser4_extent *ext;
  46340. +
  46341. + ext = (reiser4_extent *) (zdata(node) + offset);
  46342. + return ext;
  46343. +}
  46344. +
  46345. +/**
  46346. + * check_uf_coord - verify coord extension
  46347. + * @uf_coord:
  46348. + * @key:
  46349. + *
  46350. + * Makes sure that all fields of @uf_coord are set properly. If @key is
  46351. + * specified - check whether @uf_coord is set correspondingly.
  46352. + */
  46353. +static void check_uf_coord(const uf_coord_t *uf_coord, const reiser4_key *key)
  46354. +{
  46355. +#if REISER4_DEBUG
  46356. + const coord_t *coord;
  46357. + const struct extent_coord_extension *ext_coord;
  46358. + reiser4_extent *ext;
  46359. +
  46360. + coord = &uf_coord->coord;
  46361. + ext_coord = &uf_coord->extension.extent;
  46362. + ext = ext_by_offset(coord->node, uf_coord->extension.extent.ext_offset);
  46363. +
  46364. + assert("",
  46365. + WITH_DATA(coord->node,
  46366. + (uf_coord->valid == 1 &&
  46367. + coord_is_iplug_set(coord) &&
  46368. + item_is_extent(coord) &&
  46369. + ext_coord->nr_units == nr_units_extent(coord) &&
  46370. + ext == extent_by_coord(coord) &&
  46371. + ext_coord->width == extent_get_width(ext) &&
  46372. + coord->unit_pos < ext_coord->nr_units &&
  46373. + ext_coord->pos_in_unit < ext_coord->width &&
  46374. + memcmp(ext, &ext_coord->extent,
  46375. + sizeof(reiser4_extent)) == 0)));
  46376. + if (key) {
  46377. + reiser4_key coord_key;
  46378. +
  46379. + unit_key_by_coord(&uf_coord->coord, &coord_key);
  46380. + set_key_offset(&coord_key,
  46381. + get_key_offset(&coord_key) +
  46382. + (uf_coord->extension.extent.
  46383. + pos_in_unit << PAGE_SHIFT));
  46384. + assert("", keyeq(key, &coord_key));
  46385. + }
  46386. +#endif
  46387. +}
  46388. +
  46389. +static inline reiser4_extent *ext_by_ext_coord(const uf_coord_t *uf_coord)
  46390. +{
  46391. + return ext_by_offset(uf_coord->coord.node,
  46392. + uf_coord->extension.extent.ext_offset);
  46393. +}
  46394. +
  46395. +#if REISER4_DEBUG
  46396. +
  46397. +/**
  46398. + * offset_is_in_unit
  46399. + *
  46400. + *
  46401. + *
  46402. + */
  46403. +/* return 1 if offset @off is inside of extent unit pointed to by @coord. Set
  46404. + pos_in_unit inside of unit correspondingly */
  46405. +static int offset_is_in_unit(const coord_t *coord, loff_t off)
  46406. +{
  46407. + reiser4_key unit_key;
  46408. + __u64 unit_off;
  46409. + reiser4_extent *ext;
  46410. +
  46411. + ext = extent_by_coord(coord);
  46412. +
  46413. + unit_key_extent(coord, &unit_key);
  46414. + unit_off = get_key_offset(&unit_key);
  46415. + if (off < unit_off)
  46416. + return 0;
  46417. + if (off >= (unit_off + (current_blocksize * extent_get_width(ext))))
  46418. + return 0;
  46419. + return 1;
  46420. +}
  46421. +
  46422. +static int
  46423. +coord_matches_key_extent(const coord_t * coord, const reiser4_key * key)
  46424. +{
  46425. + reiser4_key item_key;
  46426. +
  46427. + assert("vs-771", coord_is_existing_unit(coord));
  46428. + assert("vs-1258", keylt(key, append_key_extent(coord, &item_key)));
  46429. + assert("vs-1259", keyge(key, item_key_by_coord(coord, &item_key)));
  46430. +
  46431. + return offset_is_in_unit(coord, get_key_offset(key));
  46432. +}
  46433. +
  46434. +#endif
  46435. +
  46436. +/**
  46437. + * can_append -
  46438. + * @key:
  46439. + * @coord:
  46440. + *
  46441. + * Returns 1 if @key is equal to an append key of item @coord is set to
  46442. + */
  46443. +static int can_append(const reiser4_key *key, const coord_t *coord)
  46444. +{
  46445. + reiser4_key append_key;
  46446. +
  46447. + return keyeq(key, append_key_extent(coord, &append_key));
  46448. +}
  46449. +
  46450. +/**
  46451. + * append_hole
  46452. + * @coord:
  46453. + * @lh:
  46454. + * @key:
  46455. + *
  46456. + */
  46457. +static int append_hole(coord_t *coord, lock_handle *lh,
  46458. + const reiser4_key *key)
  46459. +{
  46460. + reiser4_key append_key;
  46461. + reiser4_block_nr hole_width;
  46462. + reiser4_extent *ext, new_ext;
  46463. + reiser4_item_data idata;
  46464. +
  46465. + /* last item of file may have to be appended with hole */
  46466. + assert("vs-708", znode_get_level(coord->node) == TWIG_LEVEL);
  46467. + assert("vs-714", item_id_by_coord(coord) == EXTENT_POINTER_ID);
  46468. +
  46469. + /* key of first byte which is not addressed by this extent */
  46470. + append_key_extent(coord, &append_key);
  46471. +
  46472. + assert("", keyle(&append_key, key));
  46473. +
  46474. + /*
  46475. + * extent item has to be appended with hole. Calculate length of that
  46476. + * hole
  46477. + */
  46478. + hole_width = ((get_key_offset(key) - get_key_offset(&append_key) +
  46479. + current_blocksize - 1) >> current_blocksize_bits);
  46480. + assert("vs-954", hole_width > 0);
  46481. +
  46482. + /* set coord after last unit */
  46483. + coord_init_after_item_end(coord);
  46484. +
  46485. + /* get last extent in the item */
  46486. + ext = extent_by_coord(coord);
  46487. + if (state_of_extent(ext) == HOLE_EXTENT) {
  46488. + /*
  46489. + * last extent of a file is hole extent. Widen that extent by
  46490. + * @hole_width blocks. Note that we do not worry about
  46491. + * overflowing - extent width is 64 bits
  46492. + */
  46493. + reiser4_set_extent(ext, HOLE_EXTENT_START,
  46494. + extent_get_width(ext) + hole_width);
  46495. + znode_make_dirty(coord->node);
  46496. + return 0;
  46497. + }
  46498. +
  46499. + /* append last item of the file with hole extent unit */
  46500. + assert("vs-713", (state_of_extent(ext) == ALLOCATED_EXTENT ||
  46501. + state_of_extent(ext) == UNALLOCATED_EXTENT));
  46502. +
  46503. + reiser4_set_extent(&new_ext, HOLE_EXTENT_START, hole_width);
  46504. + init_new_extent(&idata, &new_ext, 1);
  46505. + return insert_into_item(coord, lh, &append_key, &idata, 0);
  46506. +}
  46507. +
  46508. +/**
  46509. + * check_jnodes
  46510. + * @twig: longterm locked twig node
  46511. + * @key:
  46512. + *
  46513. + */
  46514. +static void check_jnodes(znode *twig, const reiser4_key *key, int count)
  46515. +{
  46516. +#if REISER4_DEBUG
  46517. + coord_t c;
  46518. + reiser4_key node_key, jnode_key;
  46519. +
  46520. + jnode_key = *key;
  46521. +
  46522. + assert("", twig != NULL);
  46523. + assert("", znode_get_level(twig) == TWIG_LEVEL);
  46524. + assert("", znode_is_write_locked(twig));
  46525. +
  46526. + zload(twig);
  46527. + /* get the smallest key in twig node */
  46528. + coord_init_first_unit(&c, twig);
  46529. + unit_key_by_coord(&c, &node_key);
  46530. + assert("", keyle(&node_key, &jnode_key));
  46531. +
  46532. + coord_init_last_unit(&c, twig);
  46533. + unit_key_by_coord(&c, &node_key);
  46534. + if (item_plugin_by_coord(&c)->s.file.append_key)
  46535. + item_plugin_by_coord(&c)->s.file.append_key(&c, &node_key);
  46536. + set_key_offset(&jnode_key,
  46537. + get_key_offset(&jnode_key) + (loff_t)count * PAGE_SIZE - 1);
  46538. + assert("", keylt(&jnode_key, &node_key));
  46539. + zrelse(twig);
  46540. +#endif
  46541. +}
  46542. +
  46543. +/**
  46544. + * append_last_extent - append last file item
  46545. + * @uf_coord: coord to start insertion from
  46546. + * @jnodes: array of jnodes
  46547. + * @count: number of jnodes in the array
  46548. + *
  46549. + * There is already at least one extent item of file @inode in the tree. Append
  46550. + * the last of them with unallocated extent unit of width @count. Assign
  46551. + * fake block numbers to jnodes corresponding to the inserted extent.
  46552. + */
  46553. +static int append_last_extent(uf_coord_t *uf_coord, const reiser4_key *key,
  46554. + jnode **jnodes, int count)
  46555. +{
  46556. + int result;
  46557. + reiser4_extent new_ext;
  46558. + reiser4_item_data idata;
  46559. + coord_t *coord;
  46560. + struct extent_coord_extension *ext_coord;
  46561. + reiser4_extent *ext;
  46562. + reiser4_block_nr block;
  46563. + jnode *node;
  46564. + int i;
  46565. +
  46566. + coord = &uf_coord->coord;
  46567. + ext_coord = &uf_coord->extension.extent;
  46568. + ext = ext_by_ext_coord(uf_coord);
  46569. +
  46570. + /* check correctness of position in the item */
  46571. + assert("vs-228", coord->unit_pos == coord_last_unit_pos(coord));
  46572. + assert("vs-1311", coord->between == AFTER_UNIT);
  46573. + assert("vs-1302", ext_coord->pos_in_unit == ext_coord->width - 1);
  46574. +
  46575. + if (!can_append(key, coord)) {
  46576. + /* hole extent has to be inserted */
  46577. + result = append_hole(coord, uf_coord->lh, key);
  46578. + uf_coord->valid = 0;
  46579. + return result;
  46580. + }
  46581. +
  46582. + if (count == 0)
  46583. + return 0;
  46584. +
  46585. + assert("", get_key_offset(key) == (loff_t)index_jnode(jnodes[0]) * PAGE_SIZE);
  46586. +
  46587. + inode_add_blocks(mapping_jnode(jnodes[0])->host, count);
  46588. +
  46589. + switch (state_of_extent(ext)) {
  46590. + case UNALLOCATED_EXTENT:
  46591. + /*
  46592. + * last extent unit of the file is unallocated one. Increase
  46593. + * its width by @count
  46594. + */
  46595. + reiser4_set_extent(ext, UNALLOCATED_EXTENT_START,
  46596. + extent_get_width(ext) + count);
  46597. + znode_make_dirty(coord->node);
  46598. +
  46599. + /* update coord extension */
  46600. + ext_coord->width += count;
  46601. + ON_DEBUG(extent_set_width
  46602. + (&uf_coord->extension.extent.extent,
  46603. + ext_coord->width));
  46604. + break;
  46605. +
  46606. + case HOLE_EXTENT:
  46607. + case ALLOCATED_EXTENT:
  46608. + /*
  46609. + * last extent unit of the file is either hole or allocated
  46610. + * one. Append one unallocated extent of width @count
  46611. + */
  46612. + reiser4_set_extent(&new_ext, UNALLOCATED_EXTENT_START, count);
  46613. + init_new_extent(&idata, &new_ext, 1);
  46614. + result = insert_into_item(coord, uf_coord->lh, key, &idata, 0);
  46615. + uf_coord->valid = 0;
  46616. + if (result)
  46617. + return result;
  46618. + break;
  46619. +
  46620. + default:
  46621. + return RETERR(-EIO);
  46622. + }
  46623. +
  46624. + /*
  46625. + * make sure that we hold long term locked twig node containing all
  46626. + * jnodes we are about to capture
  46627. + */
  46628. + check_jnodes(uf_coord->lh->node, key, count);
  46629. +
  46630. + /*
  46631. + * assign fake block numbers to all jnodes. FIXME: make sure whether
  46632. + * twig node containing inserted extent item is locked
  46633. + */
  46634. + block = fake_blocknr_unformatted(count);
  46635. + for (i = 0; i < count; i ++, block ++) {
  46636. + node = jnodes[i];
  46637. + spin_lock_jnode(node);
  46638. + JF_SET(node, JNODE_CREATED);
  46639. + jnode_set_block(node, &block);
  46640. + result = reiser4_try_capture(node, ZNODE_WRITE_LOCK, 0);
  46641. + BUG_ON(result != 0);
  46642. + jnode_make_dirty_locked(node);
  46643. + spin_unlock_jnode(node);
  46644. + }
  46645. + return count;
  46646. +}
  46647. +
  46648. +/**
  46649. + * insert_first_hole - inser hole extent into tree
  46650. + * @coord:
  46651. + * @lh:
  46652. + * @key:
  46653. + *
  46654. + *
  46655. + */
  46656. +static int insert_first_hole(coord_t *coord, lock_handle *lh,
  46657. + const reiser4_key *key)
  46658. +{
  46659. + reiser4_extent new_ext;
  46660. + reiser4_item_data idata;
  46661. + reiser4_key item_key;
  46662. + reiser4_block_nr hole_width;
  46663. +
  46664. + /* @coord must be set for inserting of new item */
  46665. + assert("vs-711", coord_is_between_items(coord));
  46666. +
  46667. + item_key = *key;
  46668. + set_key_offset(&item_key, 0ull);
  46669. +
  46670. + hole_width = ((get_key_offset(key) + current_blocksize - 1) >>
  46671. + current_blocksize_bits);
  46672. + assert("vs-710", hole_width > 0);
  46673. +
  46674. + /* compose body of hole extent and insert item into tree */
  46675. + reiser4_set_extent(&new_ext, HOLE_EXTENT_START, hole_width);
  46676. + init_new_extent(&idata, &new_ext, 1);
  46677. + return insert_extent_by_coord(coord, &idata, &item_key, lh);
  46678. +}
  46679. +
  46680. +
  46681. +/**
  46682. + * insert_first_extent - insert first file item
  46683. + * @inode: inode of file
  46684. + * @uf_coord: coord to start insertion from
  46685. + * @jnodes: array of jnodes
  46686. + * @count: number of jnodes in the array
  46687. + * @inode:
  46688. + *
  46689. + * There are no items of file @inode in the tree yet. Insert unallocated extent
  46690. + * of width @count into tree or hole extent if writing not to the
  46691. + * beginning. Assign fake block numbers to jnodes corresponding to the inserted
  46692. + * unallocated extent. Returns number of jnodes or error code.
  46693. + */
  46694. +static int insert_first_extent(uf_coord_t *uf_coord, const reiser4_key *key,
  46695. + jnode **jnodes, int count,
  46696. + struct inode *inode)
  46697. +{
  46698. + int result;
  46699. + int i;
  46700. + reiser4_extent new_ext;
  46701. + reiser4_item_data idata;
  46702. + reiser4_block_nr block;
  46703. + struct unix_file_info *uf_info;
  46704. + jnode *node;
  46705. +
  46706. + /* first extent insertion starts at leaf level */
  46707. + assert("vs-719", znode_get_level(uf_coord->coord.node) == LEAF_LEVEL);
  46708. + assert("vs-711", coord_is_between_items(&uf_coord->coord));
  46709. +
  46710. + if (get_key_offset(key) != 0) {
  46711. + result = insert_first_hole(&uf_coord->coord, uf_coord->lh, key);
  46712. + uf_coord->valid = 0;
  46713. + uf_info = unix_file_inode_data(inode);
  46714. +
  46715. + /*
  46716. + * first item insertion is only possible when writing to empty
  46717. + * file or performing tail conversion
  46718. + */
  46719. + assert("", (uf_info->container == UF_CONTAINER_EMPTY ||
  46720. + (reiser4_inode_get_flag(inode,
  46721. + REISER4_PART_MIXED) &&
  46722. + reiser4_inode_get_flag(inode,
  46723. + REISER4_PART_IN_CONV))));
  46724. + /* if file was empty - update its state */
  46725. + if (result == 0 && uf_info->container == UF_CONTAINER_EMPTY)
  46726. + uf_info->container = UF_CONTAINER_EXTENTS;
  46727. + return result;
  46728. + }
  46729. +
  46730. + if (count == 0)
  46731. + return 0;
  46732. +
  46733. + inode_add_blocks(mapping_jnode(jnodes[0])->host, count);
  46734. +
  46735. + /*
  46736. + * prepare for tree modification: compose body of item and item data
  46737. + * structure needed for insertion
  46738. + */
  46739. + reiser4_set_extent(&new_ext, UNALLOCATED_EXTENT_START, count);
  46740. + init_new_extent(&idata, &new_ext, 1);
  46741. +
  46742. + /* insert extent item into the tree */
  46743. + result = insert_extent_by_coord(&uf_coord->coord, &idata, key,
  46744. + uf_coord->lh);
  46745. + if (result)
  46746. + return result;
  46747. +
  46748. + /*
  46749. + * make sure that we hold long term locked twig node containing all
  46750. + * jnodes we are about to capture
  46751. + */
  46752. + check_jnodes(uf_coord->lh->node, key, count);
  46753. + /*
  46754. + * assign fake block numbers to all jnodes, capture and mark them dirty
  46755. + */
  46756. + block = fake_blocknr_unformatted(count);
  46757. + for (i = 0; i < count; i ++, block ++) {
  46758. + node = jnodes[i];
  46759. + spin_lock_jnode(node);
  46760. + JF_SET(node, JNODE_CREATED);
  46761. + jnode_set_block(node, &block);
  46762. + result = reiser4_try_capture(node, ZNODE_WRITE_LOCK, 0);
  46763. + BUG_ON(result != 0);
  46764. + jnode_make_dirty_locked(node);
  46765. + spin_unlock_jnode(node);
  46766. + }
  46767. +
  46768. + /*
  46769. + * invalidate coordinate, research must be performed to continue
  46770. + * because write will continue on twig level
  46771. + */
  46772. + uf_coord->valid = 0;
  46773. + return count;
  46774. +}
  46775. +
  46776. +/**
  46777. + * plug_hole - replace hole extent with unallocated and holes
  46778. + * @uf_coord:
  46779. + * @key:
  46780. + * @node:
  46781. + * @h: structure containing coordinate, lock handle, key, etc
  46782. + *
  46783. + * Creates an unallocated extent of width 1 within a hole. In worst case two
  46784. + * additional extents can be created.
  46785. + */
  46786. +static int plug_hole(uf_coord_t *uf_coord, const reiser4_key *key, int *how)
  46787. +{
  46788. + struct replace_handle rh;
  46789. + reiser4_extent *ext;
  46790. + reiser4_block_nr width, pos_in_unit;
  46791. + coord_t *coord;
  46792. + struct extent_coord_extension *ext_coord;
  46793. + int return_inserted_position;
  46794. +
  46795. + check_uf_coord(uf_coord, key);
  46796. +
  46797. + rh.coord = coord_by_uf_coord(uf_coord);
  46798. + rh.lh = uf_coord->lh;
  46799. + rh.flags = 0;
  46800. +
  46801. + coord = coord_by_uf_coord(uf_coord);
  46802. + ext_coord = ext_coord_by_uf_coord(uf_coord);
  46803. + ext = ext_by_ext_coord(uf_coord);
  46804. +
  46805. + width = ext_coord->width;
  46806. + pos_in_unit = ext_coord->pos_in_unit;
  46807. +
  46808. + *how = 0;
  46809. + if (width == 1) {
  46810. + reiser4_set_extent(ext, UNALLOCATED_EXTENT_START, 1);
  46811. + znode_make_dirty(coord->node);
  46812. + /* update uf_coord */
  46813. + ON_DEBUG(ext_coord->extent = *ext);
  46814. + *how = 1;
  46815. + return 0;
  46816. + } else if (pos_in_unit == 0) {
  46817. + /* we deal with first element of extent */
  46818. + if (coord->unit_pos) {
  46819. + /* there is an extent to the left */
  46820. + if (state_of_extent(ext - 1) == UNALLOCATED_EXTENT) {
  46821. + /*
  46822. + * left neighboring unit is an unallocated
  46823. + * extent. Increase its width and decrease
  46824. + * width of hole
  46825. + */
  46826. + extent_set_width(ext - 1,
  46827. + extent_get_width(ext - 1) + 1);
  46828. + extent_set_width(ext, width - 1);
  46829. + znode_make_dirty(coord->node);
  46830. +
  46831. + /* update coord extension */
  46832. + coord->unit_pos--;
  46833. + ext_coord->width = extent_get_width(ext - 1);
  46834. + ext_coord->pos_in_unit = ext_coord->width - 1;
  46835. + ext_coord->ext_offset -= sizeof(reiser4_extent);
  46836. + ON_DEBUG(ext_coord->extent =
  46837. + *extent_by_coord(coord));
  46838. + *how = 2;
  46839. + return 0;
  46840. + }
  46841. + }
  46842. + /* extent for replace */
  46843. + reiser4_set_extent(&rh.overwrite, UNALLOCATED_EXTENT_START, 1);
  46844. + /* extent to be inserted */
  46845. + reiser4_set_extent(&rh.new_extents[0], HOLE_EXTENT_START,
  46846. + width - 1);
  46847. + rh.nr_new_extents = 1;
  46848. +
  46849. + /* have reiser4_replace_extent to return with @coord and
  46850. + @uf_coord->lh set to unit which was replaced */
  46851. + return_inserted_position = 0;
  46852. + *how = 3;
  46853. + } else if (pos_in_unit == width - 1) {
  46854. + /* we deal with last element of extent */
  46855. + if (coord->unit_pos < nr_units_extent(coord) - 1) {
  46856. + /* there is an extent unit to the right */
  46857. + if (state_of_extent(ext + 1) == UNALLOCATED_EXTENT) {
  46858. + /*
  46859. + * right neighboring unit is an unallocated
  46860. + * extent. Increase its width and decrease
  46861. + * width of hole
  46862. + */
  46863. + extent_set_width(ext + 1,
  46864. + extent_get_width(ext + 1) + 1);
  46865. + extent_set_width(ext, width - 1);
  46866. + znode_make_dirty(coord->node);
  46867. +
  46868. + /* update coord extension */
  46869. + coord->unit_pos++;
  46870. + ext_coord->width = extent_get_width(ext + 1);
  46871. + ext_coord->pos_in_unit = 0;
  46872. + ext_coord->ext_offset += sizeof(reiser4_extent);
  46873. + ON_DEBUG(ext_coord->extent =
  46874. + *extent_by_coord(coord));
  46875. + *how = 4;
  46876. + return 0;
  46877. + }
  46878. + }
  46879. + /* extent for replace */
  46880. + reiser4_set_extent(&rh.overwrite, HOLE_EXTENT_START, width - 1);
  46881. + /* extent to be inserted */
  46882. + reiser4_set_extent(&rh.new_extents[0], UNALLOCATED_EXTENT_START,
  46883. + 1);
  46884. + rh.nr_new_extents = 1;
  46885. +
  46886. + /* have reiser4_replace_extent to return with @coord and
  46887. + @uf_coord->lh set to unit which was inserted */
  46888. + return_inserted_position = 1;
  46889. + *how = 5;
  46890. + } else {
  46891. + /* extent for replace */
  46892. + reiser4_set_extent(&rh.overwrite, HOLE_EXTENT_START,
  46893. + pos_in_unit);
  46894. + /* extents to be inserted */
  46895. + reiser4_set_extent(&rh.new_extents[0], UNALLOCATED_EXTENT_START,
  46896. + 1);
  46897. + reiser4_set_extent(&rh.new_extents[1], HOLE_EXTENT_START,
  46898. + width - pos_in_unit - 1);
  46899. + rh.nr_new_extents = 2;
  46900. +
  46901. + /* have reiser4_replace_extent to return with @coord and
  46902. + @uf_coord->lh set to first of units which were inserted */
  46903. + return_inserted_position = 1;
  46904. + *how = 6;
  46905. + }
  46906. + unit_key_by_coord(coord, &rh.paste_key);
  46907. + set_key_offset(&rh.paste_key, get_key_offset(&rh.paste_key) +
  46908. + extent_get_width(&rh.overwrite) * current_blocksize);
  46909. +
  46910. + uf_coord->valid = 0;
  46911. + return reiser4_replace_extent(&rh, return_inserted_position);
  46912. +}
  46913. +
  46914. +/**
  46915. + * overwrite_one_block -
  46916. + * @uf_coord:
  46917. + * @key:
  46918. + * @node:
  46919. + *
  46920. + * If @node corresponds to hole extent - create unallocated extent for it and
  46921. + * assign fake block number. If @node corresponds to allocated extent - assign
  46922. + * block number of jnode
  46923. + */
  46924. +static int overwrite_one_block(uf_coord_t *uf_coord, const reiser4_key *key,
  46925. + jnode *node, int *hole_plugged)
  46926. +{
  46927. + int result;
  46928. + struct extent_coord_extension *ext_coord;
  46929. + reiser4_extent *ext;
  46930. + reiser4_block_nr block;
  46931. + int how;
  46932. +
  46933. + assert("vs-1312", uf_coord->coord.between == AT_UNIT);
  46934. +
  46935. + result = 0;
  46936. + ext_coord = ext_coord_by_uf_coord(uf_coord);
  46937. + check_uf_coord(uf_coord, NULL);
  46938. + ext = ext_by_ext_coord(uf_coord);
  46939. + assert("", state_of_extent(ext) != UNALLOCATED_EXTENT);
  46940. +
  46941. + switch (state_of_extent(ext)) {
  46942. + case ALLOCATED_EXTENT:
  46943. + block = extent_get_start(ext) + ext_coord->pos_in_unit;
  46944. + break;
  46945. +
  46946. + case HOLE_EXTENT:
  46947. + inode_add_blocks(mapping_jnode(node)->host, 1);
  46948. + result = plug_hole(uf_coord, key, &how);
  46949. + if (result)
  46950. + return result;
  46951. + block = fake_blocknr_unformatted(1);
  46952. + if (hole_plugged)
  46953. + *hole_plugged = 1;
  46954. + JF_SET(node, JNODE_CREATED);
  46955. + break;
  46956. +
  46957. + default:
  46958. + return RETERR(-EIO);
  46959. + }
  46960. +
  46961. + jnode_set_block(node, &block);
  46962. + return 0;
  46963. +}
  46964. +
  46965. +/**
  46966. + * move_coord - move coordinate forward
  46967. + * @uf_coord:
  46968. + *
  46969. + * Move coordinate one data block pointer forward. Return 1 if coord is set to
  46970. + * the last one already or is invalid.
  46971. + */
  46972. +static int move_coord(uf_coord_t *uf_coord)
  46973. +{
  46974. + struct extent_coord_extension *ext_coord;
  46975. +
  46976. + if (uf_coord->valid == 0)
  46977. + return 1;
  46978. + ext_coord = &uf_coord->extension.extent;
  46979. + ext_coord->pos_in_unit ++;
  46980. + if (ext_coord->pos_in_unit < ext_coord->width)
  46981. + /* coordinate moved within the unit */
  46982. + return 0;
  46983. +
  46984. + /* end of unit is reached. Try to move to next unit */
  46985. + ext_coord->pos_in_unit = 0;
  46986. + uf_coord->coord.unit_pos ++;
  46987. + if (uf_coord->coord.unit_pos < ext_coord->nr_units) {
  46988. + /* coordinate moved to next unit */
  46989. + ext_coord->ext_offset += sizeof(reiser4_extent);
  46990. + ext_coord->width =
  46991. + extent_get_width(ext_by_offset
  46992. + (uf_coord->coord.node,
  46993. + ext_coord->ext_offset));
  46994. + ON_DEBUG(ext_coord->extent =
  46995. + *ext_by_offset(uf_coord->coord.node,
  46996. + ext_coord->ext_offset));
  46997. + return 0;
  46998. + }
  46999. + /* end of item is reached */
  47000. + uf_coord->valid = 0;
  47001. + return 1;
  47002. +}
  47003. +
  47004. +/**
  47005. + * overwrite_extent -
  47006. + * @inode:
  47007. + *
  47008. + * Returns number of handled jnodes.
  47009. + */
  47010. +static int overwrite_extent(uf_coord_t *uf_coord, const reiser4_key *key,
  47011. + jnode **jnodes, int count, int *plugged_hole)
  47012. +{
  47013. + int result;
  47014. + reiser4_key k;
  47015. + int i;
  47016. + jnode *node;
  47017. +
  47018. + k = *key;
  47019. + for (i = 0; i < count; i ++) {
  47020. + node = jnodes[i];
  47021. + if (*jnode_get_block(node) == 0) {
  47022. + result = overwrite_one_block(uf_coord, &k, node, plugged_hole);
  47023. + if (result)
  47024. + return result;
  47025. + }
  47026. + /*
  47027. + * make sure that we hold long term locked twig node containing
  47028. + * all jnodes we are about to capture
  47029. + */
  47030. + check_jnodes(uf_coord->lh->node, &k, 1);
  47031. + /*
  47032. + * assign fake block numbers to all jnodes, capture and mark
  47033. + * them dirty
  47034. + */
  47035. + spin_lock_jnode(node);
  47036. + result = reiser4_try_capture(node, ZNODE_WRITE_LOCK, 0);
  47037. + BUG_ON(result != 0);
  47038. + jnode_make_dirty_locked(node);
  47039. + spin_unlock_jnode(node);
  47040. +
  47041. + if (uf_coord->valid == 0)
  47042. + return i + 1;
  47043. +
  47044. + check_uf_coord(uf_coord, &k);
  47045. +
  47046. + if (move_coord(uf_coord)) {
  47047. + /*
  47048. + * failed to move to the next node pointer. Either end
  47049. + * of file or end of twig node is reached. In the later
  47050. + * case we might go to the right neighbor.
  47051. + */
  47052. + uf_coord->valid = 0;
  47053. + return i + 1;
  47054. + }
  47055. + set_key_offset(&k, get_key_offset(&k) + PAGE_SIZE);
  47056. + }
  47057. +
  47058. + return count;
  47059. +}
  47060. +
  47061. +/**
  47062. + * reiser4_update_extent
  47063. + * @file:
  47064. + * @jnodes:
  47065. + * @count:
  47066. + * @off:
  47067. + *
  47068. + */
  47069. +int reiser4_update_extent(struct inode *inode, jnode *node, loff_t pos,
  47070. + int *plugged_hole)
  47071. +{
  47072. + int result;
  47073. + znode *loaded;
  47074. + uf_coord_t uf_coord;
  47075. + coord_t *coord;
  47076. + lock_handle lh;
  47077. + reiser4_key key;
  47078. +
  47079. + assert("", reiser4_lock_counters()->d_refs == 0);
  47080. +
  47081. + key_by_inode_and_offset_common(inode, pos, &key);
  47082. +
  47083. + init_uf_coord(&uf_coord, &lh);
  47084. + coord = &uf_coord.coord;
  47085. + result = find_file_item_nohint(coord, &lh, &key,
  47086. + ZNODE_WRITE_LOCK, inode);
  47087. + if (IS_CBKERR(result)) {
  47088. + assert("", reiser4_lock_counters()->d_refs == 0);
  47089. + return result;
  47090. + }
  47091. +
  47092. + result = zload(coord->node);
  47093. + BUG_ON(result != 0);
  47094. + loaded = coord->node;
  47095. +
  47096. + if (coord->between == AFTER_UNIT) {
  47097. + /*
  47098. + * append existing extent item with unallocated extent of width
  47099. + * nr_jnodes
  47100. + */
  47101. + init_coord_extension_extent(&uf_coord,
  47102. + get_key_offset(&key));
  47103. + result = append_last_extent(&uf_coord, &key,
  47104. + &node, 1);
  47105. + } else if (coord->between == AT_UNIT) {
  47106. + /*
  47107. + * overwrite
  47108. + * not optimal yet. Will be optimized if new write will show
  47109. + * performance win.
  47110. + */
  47111. + init_coord_extension_extent(&uf_coord,
  47112. + get_key_offset(&key));
  47113. + result = overwrite_extent(&uf_coord, &key,
  47114. + &node, 1, plugged_hole);
  47115. + } else {
  47116. + /*
  47117. + * there are no items of this file in the tree yet. Create
  47118. + * first item of the file inserting one unallocated extent of
  47119. + * width nr_jnodes
  47120. + */
  47121. + result = insert_first_extent(&uf_coord, &key, &node, 1, inode);
  47122. + }
  47123. + assert("", result == 1 || result < 0);
  47124. + zrelse(loaded);
  47125. + done_lh(&lh);
  47126. + assert("", reiser4_lock_counters()->d_refs == 0);
  47127. + return (result == 1) ? 0 : result;
  47128. +}
  47129. +
  47130. +/**
  47131. + * update_extents
  47132. + * @file:
  47133. + * @jnodes:
  47134. + * @count:
  47135. + * @off:
  47136. + *
  47137. + */
  47138. +static int update_extents(struct file *file, struct inode *inode,
  47139. + jnode **jnodes, int count, loff_t pos)
  47140. +{
  47141. + struct hint hint;
  47142. + reiser4_key key;
  47143. + int result;
  47144. + znode *loaded;
  47145. +
  47146. + result = load_file_hint(file, &hint);
  47147. + BUG_ON(result != 0);
  47148. +
  47149. + if (count != 0)
  47150. + /*
  47151. + * count == 0 is special case: expanding truncate
  47152. + */
  47153. + pos = (loff_t)index_jnode(jnodes[0]) << PAGE_SHIFT;
  47154. + key_by_inode_and_offset_common(inode, pos, &key);
  47155. +
  47156. + assert("", reiser4_lock_counters()->d_refs == 0);
  47157. +
  47158. + do {
  47159. + result = find_file_item(&hint, &key, ZNODE_WRITE_LOCK, inode);
  47160. + if (IS_CBKERR(result)) {
  47161. + assert("", reiser4_lock_counters()->d_refs == 0);
  47162. + return result;
  47163. + }
  47164. +
  47165. + result = zload(hint.ext_coord.coord.node);
  47166. + BUG_ON(result != 0);
  47167. + loaded = hint.ext_coord.coord.node;
  47168. +
  47169. + if (hint.ext_coord.coord.between == AFTER_UNIT) {
  47170. + /*
  47171. + * append existing extent item with unallocated extent
  47172. + * of width nr_jnodes
  47173. + */
  47174. + if (hint.ext_coord.valid == 0)
  47175. + /* NOTE: get statistics on this */
  47176. + init_coord_extension_extent(&hint.ext_coord,
  47177. + get_key_offset(&key));
  47178. + result = append_last_extent(&hint.ext_coord, &key,
  47179. + jnodes, count);
  47180. + } else if (hint.ext_coord.coord.between == AT_UNIT) {
  47181. + /*
  47182. + * overwrite
  47183. + * not optimal yet. Will be optimized if new write will
  47184. + * show performance win.
  47185. + */
  47186. + if (hint.ext_coord.valid == 0)
  47187. + /* NOTE: get statistics on this */
  47188. + init_coord_extension_extent(&hint.ext_coord,
  47189. + get_key_offset(&key));
  47190. + result = overwrite_extent(&hint.ext_coord, &key,
  47191. + jnodes, count, NULL);
  47192. + } else {
  47193. + /*
  47194. + * there are no items of this file in the tree
  47195. + * yet. Create first item of the file inserting one
  47196. + * unallocated extent of * width nr_jnodes
  47197. + */
  47198. + result = insert_first_extent(&hint.ext_coord, &key,
  47199. + jnodes, count, inode);
  47200. + }
  47201. + zrelse(loaded);
  47202. + if (result < 0) {
  47203. + done_lh(hint.ext_coord.lh);
  47204. + break;
  47205. + }
  47206. +
  47207. + jnodes += result;
  47208. + count -= result;
  47209. + set_key_offset(&key, get_key_offset(&key) + result * PAGE_SIZE);
  47210. +
  47211. + /* seal and unlock znode */
  47212. + if (hint.ext_coord.valid)
  47213. + reiser4_set_hint(&hint, &key, ZNODE_WRITE_LOCK);
  47214. + else
  47215. + reiser4_unset_hint(&hint);
  47216. +
  47217. + } while (count > 0);
  47218. +
  47219. + save_file_hint(file, &hint);
  47220. + assert("", reiser4_lock_counters()->d_refs == 0);
  47221. + return result;
  47222. +}
  47223. +
  47224. +/**
  47225. + * write_extent_reserve_space - reserve space for extent write operation
  47226. + * @inode:
  47227. + *
  47228. + * Estimates and reserves space which may be required for writing
  47229. + * WRITE_GRANULARITY pages of file.
  47230. + */
  47231. +static int write_extent_reserve_space(struct inode *inode)
  47232. +{
  47233. + __u64 count;
  47234. + reiser4_tree *tree;
  47235. +
  47236. + /*
  47237. + * to write WRITE_GRANULARITY pages to a file by extents we have to
  47238. + * reserve disk space for:
  47239. +
  47240. + * 1. find_file_item may have to insert empty node to the tree (empty
  47241. + * leaf node between two extent items). This requires 1 block and
  47242. + * number of blocks which are necessary to perform insertion of an
  47243. + * internal item into twig level.
  47244. +
  47245. + * 2. for each of written pages there might be needed 1 block and
  47246. + * number of blocks which might be necessary to perform insertion of or
  47247. + * paste to an extent item.
  47248. +
  47249. + * 3. stat data update
  47250. + */
  47251. + tree = reiser4_tree_by_inode(inode);
  47252. + count = estimate_one_insert_item(tree) +
  47253. + WRITE_GRANULARITY * (1 + estimate_one_insert_into_item(tree)) +
  47254. + estimate_one_insert_item(tree);
  47255. + grab_space_enable();
  47256. + return reiser4_grab_space(count, 0 /* flags */);
  47257. +}
  47258. +
  47259. +/*
  47260. + * filemap_copy_from_user no longer exists in generic code, because it
  47261. + * is deadlocky (copying from user while holding the page lock is bad).
  47262. + * As a temporary fix for reiser4, just define it here.
  47263. + */
  47264. +static inline size_t
  47265. +filemap_copy_from_user(struct page *page, unsigned long offset,
  47266. + const char __user *buf, unsigned bytes)
  47267. +{
  47268. + char *kaddr;
  47269. + int left;
  47270. +
  47271. + kaddr = kmap_atomic(page);
  47272. + left = __copy_from_user_inatomic(kaddr + offset, buf, bytes);
  47273. + kunmap_atomic(kaddr);
  47274. +
  47275. + if (left != 0) {
  47276. + /* Do it the slow way */
  47277. + kaddr = kmap(page);
  47278. + left = __copy_from_user(kaddr + offset, buf, bytes);
  47279. + kunmap(page);
  47280. + }
  47281. + return bytes - left;
  47282. +}
  47283. +
  47284. +/**
  47285. + * reiser4_write_extent - write method of extent item plugin
  47286. + * @file: file to write to
  47287. + * @buf: address of user-space buffer
  47288. + * @count: number of bytes to write
  47289. + * @pos: position in file to write to
  47290. + *
  47291. + */
  47292. +ssize_t reiser4_write_extent(struct file *file, struct inode * inode,
  47293. + const char __user *buf, size_t count, loff_t *pos)
  47294. +{
  47295. + int have_to_update_extent;
  47296. + int nr_pages, nr_dirty;
  47297. + struct page *page;
  47298. + jnode *jnodes[WRITE_GRANULARITY + 1];
  47299. + unsigned long index;
  47300. + unsigned long end;
  47301. + int i;
  47302. + int to_page, page_off;
  47303. + size_t left, written;
  47304. + int result = 0;
  47305. +
  47306. + if (write_extent_reserve_space(inode))
  47307. + return RETERR(-ENOSPC);
  47308. +
  47309. + if (count == 0) {
  47310. + /* truncate case */
  47311. + update_extents(file, inode, jnodes, 0, *pos);
  47312. + return 0;
  47313. + }
  47314. +
  47315. + BUG_ON(get_current_context()->trans->atom != NULL);
  47316. +
  47317. + left = count;
  47318. + index = *pos >> PAGE_SHIFT;
  47319. + /* calculate number of pages which are to be written */
  47320. + end = ((*pos + count - 1) >> PAGE_SHIFT);
  47321. + nr_pages = end - index + 1;
  47322. + nr_dirty = 0;
  47323. + assert("", nr_pages <= WRITE_GRANULARITY + 1);
  47324. +
  47325. + /* get pages and jnodes */
  47326. + for (i = 0; i < nr_pages; i ++) {
  47327. + page = find_or_create_page(inode->i_mapping, index + i,
  47328. + reiser4_ctx_gfp_mask_get());
  47329. + if (page == NULL) {
  47330. + nr_pages = i;
  47331. + result = RETERR(-ENOMEM);
  47332. + goto out;
  47333. + }
  47334. +
  47335. + jnodes[i] = jnode_of_page(page);
  47336. + if (IS_ERR(jnodes[i])) {
  47337. + unlock_page(page);
  47338. + put_page(page);
  47339. + nr_pages = i;
  47340. + result = RETERR(-ENOMEM);
  47341. + goto out;
  47342. + }
  47343. + /* prevent jnode and page from disconnecting */
  47344. + JF_SET(jnodes[i], JNODE_WRITE_PREPARED);
  47345. + unlock_page(page);
  47346. + }
  47347. +
  47348. + BUG_ON(get_current_context()->trans->atom != NULL);
  47349. +
  47350. + have_to_update_extent = 0;
  47351. +
  47352. + page_off = (*pos & (PAGE_SIZE - 1));
  47353. + for (i = 0; i < nr_pages; i ++) {
  47354. + to_page = PAGE_SIZE - page_off;
  47355. + if (to_page > left)
  47356. + to_page = left;
  47357. + page = jnode_page(jnodes[i]);
  47358. + if (page_offset(page) < inode->i_size &&
  47359. + !PageUptodate(page) && to_page != PAGE_SIZE) {
  47360. + /*
  47361. + * the above is not optimal for partial write to last
  47362. + * page of file when file size is not at boundary of
  47363. + * page
  47364. + */
  47365. + lock_page(page);
  47366. + if (!PageUptodate(page)) {
  47367. + result = readpage_unix_file(NULL, page);
  47368. + BUG_ON(result != 0);
  47369. + /* wait for read completion */
  47370. + lock_page(page);
  47371. + BUG_ON(!PageUptodate(page));
  47372. + } else
  47373. + result = 0;
  47374. + unlock_page(page);
  47375. + }
  47376. +
  47377. + BUG_ON(get_current_context()->trans->atom != NULL);
  47378. + fault_in_pages_readable(buf, to_page);
  47379. + BUG_ON(get_current_context()->trans->atom != NULL);
  47380. +
  47381. + lock_page(page);
  47382. + if (!PageUptodate(page) && to_page != PAGE_SIZE)
  47383. + zero_user_segments(page, 0, page_off,
  47384. + page_off + to_page,
  47385. + PAGE_SIZE);
  47386. +
  47387. + written = filemap_copy_from_user(page, page_off, buf, to_page);
  47388. + if (unlikely(written != to_page)) {
  47389. + unlock_page(page);
  47390. + result = RETERR(-EFAULT);
  47391. + break;
  47392. + }
  47393. +
  47394. + flush_dcache_page(page);
  47395. + set_page_dirty_notag(page);
  47396. + unlock_page(page);
  47397. + nr_dirty++;
  47398. +
  47399. + mark_page_accessed(page);
  47400. + SetPageUptodate(page);
  47401. +
  47402. + if (jnodes[i]->blocknr == 0)
  47403. + have_to_update_extent ++;
  47404. +
  47405. + page_off = 0;
  47406. + buf += to_page;
  47407. + left -= to_page;
  47408. + BUG_ON(get_current_context()->trans->atom != NULL);
  47409. + }
  47410. +
  47411. + if (have_to_update_extent) {
  47412. + update_extents(file, inode, jnodes, nr_dirty, *pos);
  47413. + } else {
  47414. + for (i = 0; i < nr_dirty; i ++) {
  47415. + int ret;
  47416. + spin_lock_jnode(jnodes[i]);
  47417. + ret = reiser4_try_capture(jnodes[i],
  47418. + ZNODE_WRITE_LOCK, 0);
  47419. + BUG_ON(ret != 0);
  47420. + jnode_make_dirty_locked(jnodes[i]);
  47421. + spin_unlock_jnode(jnodes[i]);
  47422. + }
  47423. + }
  47424. +out:
  47425. + for (i = 0; i < nr_pages; i ++) {
  47426. + put_page(jnode_page(jnodes[i]));
  47427. + JF_CLR(jnodes[i], JNODE_WRITE_PREPARED);
  47428. + jput(jnodes[i]);
  47429. + }
  47430. +
  47431. + /* the only errors handled so far is ENOMEM and
  47432. + EFAULT on copy_from_user */
  47433. +
  47434. + return (count - left) ? (count - left) : result;
  47435. +}
  47436. +
  47437. +int reiser4_do_readpage_extent(reiser4_extent * ext, reiser4_block_nr pos,
  47438. + struct page *page)
  47439. +{
  47440. + jnode *j;
  47441. + struct address_space *mapping;
  47442. + unsigned long index;
  47443. + oid_t oid;
  47444. + reiser4_block_nr block;
  47445. +
  47446. + mapping = page->mapping;
  47447. + oid = get_inode_oid(mapping->host);
  47448. + index = page->index;
  47449. +
  47450. + switch (state_of_extent(ext)) {
  47451. + case HOLE_EXTENT:
  47452. + /*
  47453. + * it is possible to have hole page with jnode, if page was
  47454. + * eflushed previously.
  47455. + */
  47456. + j = jfind(mapping, index);
  47457. + if (j == NULL) {
  47458. + zero_user(page, 0, PAGE_SIZE);
  47459. + SetPageUptodate(page);
  47460. + unlock_page(page);
  47461. + return 0;
  47462. + }
  47463. + spin_lock_jnode(j);
  47464. + if (!jnode_page(j)) {
  47465. + jnode_attach_page(j, page);
  47466. + } else {
  47467. + BUG_ON(jnode_page(j) != page);
  47468. + assert("vs-1504", jnode_page(j) == page);
  47469. + }
  47470. + block = *jnode_get_io_block(j);
  47471. + spin_unlock_jnode(j);
  47472. + if (block == 0) {
  47473. + zero_user(page, 0, PAGE_SIZE);
  47474. + SetPageUptodate(page);
  47475. + unlock_page(page);
  47476. + jput(j);
  47477. + return 0;
  47478. + }
  47479. + break;
  47480. +
  47481. + case ALLOCATED_EXTENT:
  47482. + j = jnode_of_page(page);
  47483. + if (IS_ERR(j))
  47484. + return PTR_ERR(j);
  47485. + if (*jnode_get_block(j) == 0) {
  47486. + reiser4_block_nr blocknr;
  47487. +
  47488. + blocknr = extent_get_start(ext) + pos;
  47489. + jnode_set_block(j, &blocknr);
  47490. + } else
  47491. + assert("vs-1403",
  47492. + j->blocknr == extent_get_start(ext) + pos);
  47493. + break;
  47494. +
  47495. + case UNALLOCATED_EXTENT:
  47496. + j = jfind(mapping, index);
  47497. + assert("nikita-2688", j);
  47498. + assert("vs-1426", jnode_page(j) == NULL);
  47499. +
  47500. + spin_lock_jnode(j);
  47501. + jnode_attach_page(j, page);
  47502. + spin_unlock_jnode(j);
  47503. + break;
  47504. +
  47505. + default:
  47506. + warning("vs-957", "wrong extent\n");
  47507. + return RETERR(-EIO);
  47508. + }
  47509. +
  47510. + BUG_ON(j == 0);
  47511. + reiser4_page_io(page, j, READ, reiser4_ctx_gfp_mask_get());
  47512. + jput(j);
  47513. + return 0;
  47514. +}
  47515. +
  47516. +/* Implements plugin->u.item.s.file.read operation for extent items. */
  47517. +int reiser4_read_extent(struct file *file, flow_t *flow, hint_t *hint)
  47518. +{
  47519. + int result;
  47520. + struct page *page;
  47521. + unsigned long page_idx;
  47522. + unsigned long page_off; /* offset within the page to start read from */
  47523. + unsigned long page_cnt; /* bytes which can be read from the page which
  47524. + contains file_off */
  47525. + struct address_space *mapping;
  47526. + loff_t file_off; /* offset in a file to start read from */
  47527. + uf_coord_t *uf_coord;
  47528. + coord_t *coord;
  47529. + struct extent_coord_extension *ext_coord;
  47530. + char *kaddr;
  47531. +
  47532. + assert("vs-1353", current_blocksize == PAGE_SIZE);
  47533. + assert("vs-572", flow->user == 1);
  47534. + assert("vs-1351", flow->length > 0);
  47535. +
  47536. + uf_coord = &hint->ext_coord;
  47537. +
  47538. + check_uf_coord(uf_coord, NULL);
  47539. + assert("vs-33", uf_coord->lh == &hint->lh);
  47540. +
  47541. + coord = &uf_coord->coord;
  47542. + assert("vs-1119", znode_is_rlocked(coord->node));
  47543. + assert("vs-1120", znode_is_loaded(coord->node));
  47544. + assert("vs-1256", coord_matches_key_extent(coord, &flow->key));
  47545. +
  47546. + mapping = file_inode(file)->i_mapping;
  47547. + ext_coord = &uf_coord->extension.extent;
  47548. +
  47549. + file_off = get_key_offset(&flow->key);
  47550. + page_off = (unsigned long)(file_off & (PAGE_SIZE - 1));
  47551. + page_cnt = PAGE_SIZE - page_off;
  47552. +
  47553. + page_idx = (unsigned long)(file_off >> PAGE_SHIFT);
  47554. +
  47555. + /* we start having twig node read locked. However, we do not want to
  47556. + keep that lock all the time readahead works. So, set a seal and
  47557. + release twig node. */
  47558. + reiser4_set_hint(hint, &flow->key, ZNODE_READ_LOCK);
  47559. + /* &hint->lh is done-ed */
  47560. +
  47561. + do {
  47562. + reiser4_txn_restart_current();
  47563. + page = read_mapping_page(mapping, page_idx, file);
  47564. + if (IS_ERR(page))
  47565. + return PTR_ERR(page);
  47566. + lock_page(page);
  47567. + if (!PageUptodate(page)) {
  47568. + unlock_page(page);
  47569. + put_page(page);
  47570. + warning("jmacd-97178",
  47571. + "extent_read: page is not up to date");
  47572. + return RETERR(-EIO);
  47573. + }
  47574. + mark_page_accessed(page);
  47575. + unlock_page(page);
  47576. +
  47577. + /* If users can be writing to this page using arbitrary virtual
  47578. + addresses, take care about potential aliasing before reading
  47579. + the page on the kernel side.
  47580. + */
  47581. + if (mapping_writably_mapped(mapping))
  47582. + flush_dcache_page(page);
  47583. +
  47584. + assert("nikita-3034", reiser4_schedulable());
  47585. +
  47586. + /* number of bytes which are to be read from the page */
  47587. + if (page_cnt > flow->length)
  47588. + page_cnt = flow->length;
  47589. +
  47590. + result = fault_in_pages_writeable(flow->data, page_cnt);
  47591. + if (result) {
  47592. + put_page(page);
  47593. + return RETERR(-EFAULT);
  47594. + }
  47595. +
  47596. + kaddr = kmap_atomic(page);
  47597. + result = __copy_to_user_inatomic(flow->data,
  47598. + kaddr + page_off, page_cnt);
  47599. + kunmap_atomic(kaddr);
  47600. + if (result != 0) {
  47601. + kaddr = kmap(page);
  47602. + result = __copy_to_user(flow->data,
  47603. + kaddr + page_off, page_cnt);
  47604. + kunmap(page);
  47605. + if (unlikely(result))
  47606. + return RETERR(-EFAULT);
  47607. + }
  47608. + put_page(page);
  47609. +
  47610. + /* increase (flow->key) offset,
  47611. + * update (flow->data) user area pointer
  47612. + */
  47613. + move_flow_forward(flow, page_cnt);
  47614. +
  47615. + page_off = 0;
  47616. + page_idx++;
  47617. +
  47618. + } while (flow->length);
  47619. + return 0;
  47620. +}
  47621. +
  47622. +/*
  47623. + * plugin->s.file.readpage
  47624. + *
  47625. + * reiser4_read->unix_file_read->page_cache_readahead->
  47626. + * ->reiser4_readpage_dispatch->readpage_unix_file->readpage_extent
  47627. + * or
  47628. + * filemap_fault->reiser4_readpage_dispatch->readpage_unix_file->
  47629. + * ->readpage_extent
  47630. + *
  47631. + * At the beginning: coord->node is read locked, zloaded, page is
  47632. + * locked, coord is set to existing unit inside of extent item (it
  47633. + * is not necessary that coord matches to page->index)
  47634. + */
  47635. +int reiser4_readpage_extent(void *vp, struct page *page)
  47636. +{
  47637. + uf_coord_t *uf_coord = vp;
  47638. + ON_DEBUG(coord_t * coord = &uf_coord->coord);
  47639. + ON_DEBUG(reiser4_key key);
  47640. +
  47641. + assert("vs-1040", PageLocked(page));
  47642. + assert("vs-1050", !PageUptodate(page));
  47643. + assert("vs-1039", page->mapping && page->mapping->host);
  47644. +
  47645. + assert("vs-1044", znode_is_loaded(coord->node));
  47646. + assert("vs-758", item_is_extent(coord));
  47647. + assert("vs-1046", coord_is_existing_unit(coord));
  47648. + assert("vs-1045", znode_is_rlocked(coord->node));
  47649. + assert("vs-1047",
  47650. + page->mapping->host->i_ino ==
  47651. + get_key_objectid(item_key_by_coord(coord, &key)));
  47652. + check_uf_coord(uf_coord, NULL);
  47653. +
  47654. + return reiser4_do_readpage_extent(ext_by_ext_coord(uf_coord),
  47655. + uf_coord->extension.extent.pos_in_unit,
  47656. + page);
  47657. +}
  47658. +
  47659. +int get_block_address_extent(const coord_t *coord, sector_t block,
  47660. + sector_t *result)
  47661. +{
  47662. + reiser4_extent *ext;
  47663. +
  47664. + if (!coord_is_existing_unit(coord))
  47665. + return RETERR(-EINVAL);
  47666. +
  47667. + ext = extent_by_coord(coord);
  47668. +
  47669. + if (state_of_extent(ext) != ALLOCATED_EXTENT)
  47670. + /* FIXME: bad things may happen if it is unallocated extent */
  47671. + *result = 0;
  47672. + else {
  47673. + reiser4_key key;
  47674. +
  47675. + unit_key_by_coord(coord, &key);
  47676. + assert("vs-1645",
  47677. + block >= get_key_offset(&key) >> current_blocksize_bits);
  47678. + assert("vs-1646",
  47679. + block <
  47680. + (get_key_offset(&key) >> current_blocksize_bits) +
  47681. + extent_get_width(ext));
  47682. + *result =
  47683. + extent_get_start(ext) + (block -
  47684. + (get_key_offset(&key) >>
  47685. + current_blocksize_bits));
  47686. + }
  47687. + return 0;
  47688. +}
  47689. +
  47690. +/*
  47691. + plugin->u.item.s.file.append_key
  47692. + key of first byte which is the next to last byte by addressed by this extent
  47693. +*/
  47694. +reiser4_key *append_key_extent(const coord_t * coord, reiser4_key * key)
  47695. +{
  47696. + item_key_by_coord(coord, key);
  47697. + set_key_offset(key,
  47698. + get_key_offset(key) + reiser4_extent_size(coord,
  47699. + nr_units_extent
  47700. + (coord)));
  47701. +
  47702. + assert("vs-610", get_key_offset(key)
  47703. + && (get_key_offset(key) & (current_blocksize - 1)) == 0);
  47704. + return key;
  47705. +}
  47706. +
  47707. +/* plugin->u.item.s.file.init_coord_extension */
  47708. +void init_coord_extension_extent(uf_coord_t * uf_coord, loff_t lookuped)
  47709. +{
  47710. + coord_t *coord;
  47711. + struct extent_coord_extension *ext_coord;
  47712. + reiser4_key key;
  47713. + loff_t offset;
  47714. +
  47715. + assert("vs-1295", uf_coord->valid == 0);
  47716. +
  47717. + coord = &uf_coord->coord;
  47718. + assert("vs-1288", coord_is_iplug_set(coord));
  47719. + assert("vs-1327", znode_is_loaded(coord->node));
  47720. +
  47721. + if (coord->between != AFTER_UNIT && coord->between != AT_UNIT)
  47722. + return;
  47723. +
  47724. + ext_coord = &uf_coord->extension.extent;
  47725. + ext_coord->nr_units = nr_units_extent(coord);
  47726. + ext_coord->ext_offset =
  47727. + (char *)extent_by_coord(coord) - zdata(coord->node);
  47728. + ext_coord->width = extent_get_width(extent_by_coord(coord));
  47729. + ON_DEBUG(ext_coord->extent = *extent_by_coord(coord));
  47730. + uf_coord->valid = 1;
  47731. +
  47732. + /* pos_in_unit is the only uninitialized field in extended coord */
  47733. + if (coord->between == AFTER_UNIT) {
  47734. + assert("vs-1330",
  47735. + coord->unit_pos == nr_units_extent(coord) - 1);
  47736. +
  47737. + ext_coord->pos_in_unit = ext_coord->width - 1;
  47738. + } else {
  47739. + /* AT_UNIT */
  47740. + unit_key_by_coord(coord, &key);
  47741. + offset = get_key_offset(&key);
  47742. +
  47743. + assert("vs-1328", offset <= lookuped);
  47744. + assert("vs-1329",
  47745. + lookuped <
  47746. + offset + ext_coord->width * current_blocksize);
  47747. + ext_coord->pos_in_unit =
  47748. + ((lookuped - offset) >> current_blocksize_bits);
  47749. + }
  47750. +}
  47751. +
  47752. +/*
  47753. + * Local variables:
  47754. + * c-indentation-style: "K&R"
  47755. + * mode-name: "LC"
  47756. + * c-basic-offset: 8
  47757. + * tab-width: 8
  47758. + * fill-column: 79
  47759. + * scroll-step: 1
  47760. + * End:
  47761. + */
  47762. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/item/extent_flush_ops.c linux-4.14.2/fs/reiser4/plugin/item/extent_flush_ops.c
  47763. --- linux-4.14.2.orig/fs/reiser4/plugin/item/extent_flush_ops.c 1970-01-01 01:00:00.000000000 +0100
  47764. +++ linux-4.14.2/fs/reiser4/plugin/item/extent_flush_ops.c 2017-11-26 22:13:09.000000000 +0100
  47765. @@ -0,0 +1,686 @@
  47766. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  47767. +
  47768. +#include "item.h"
  47769. +#include "../../tree.h"
  47770. +#include "../../jnode.h"
  47771. +#include "../../super.h"
  47772. +#include "../../flush.h"
  47773. +#include "../../carry.h"
  47774. +#include "../object.h"
  47775. +
  47776. +#include <linux/pagemap.h>
  47777. +
  47778. +static reiser4_block_nr extent_unit_start(const coord_t * item);
  47779. +
  47780. +/* Return either first or last extent (depending on @side) of the item
  47781. + @coord is set to. Set @pos_in_unit either to first or to last block
  47782. + of extent. */
  47783. +static reiser4_extent *extent_utmost_ext(const coord_t * coord, sideof side,
  47784. + reiser4_block_nr * pos_in_unit)
  47785. +{
  47786. + reiser4_extent *ext;
  47787. +
  47788. + if (side == LEFT_SIDE) {
  47789. + /* get first extent of item */
  47790. + ext = extent_item(coord);
  47791. + *pos_in_unit = 0;
  47792. + } else {
  47793. + /* get last extent of item and last position within it */
  47794. + assert("vs-363", side == RIGHT_SIDE);
  47795. + ext = extent_item(coord) + coord_last_unit_pos(coord);
  47796. + *pos_in_unit = extent_get_width(ext) - 1;
  47797. + }
  47798. +
  47799. + return ext;
  47800. +}
  47801. +
  47802. +/* item_plugin->f.utmost_child */
  47803. +/* Return the child. Coord is set to extent item. Find jnode corresponding
  47804. + either to first or to last unformatted node pointed by the item */
  47805. +int utmost_child_extent(const coord_t * coord, sideof side, jnode ** childp)
  47806. +{
  47807. + reiser4_extent *ext;
  47808. + reiser4_block_nr pos_in_unit;
  47809. +
  47810. + ext = extent_utmost_ext(coord, side, &pos_in_unit);
  47811. +
  47812. + switch (state_of_extent(ext)) {
  47813. + case HOLE_EXTENT:
  47814. + *childp = NULL;
  47815. + return 0;
  47816. + case ALLOCATED_EXTENT:
  47817. + case UNALLOCATED_EXTENT:
  47818. + break;
  47819. + default:
  47820. + /* this should never happen */
  47821. + assert("vs-1417", 0);
  47822. + }
  47823. +
  47824. + {
  47825. + reiser4_key key;
  47826. + reiser4_tree *tree;
  47827. + unsigned long index;
  47828. +
  47829. + if (side == LEFT_SIDE) {
  47830. + /* get key of first byte addressed by the extent */
  47831. + item_key_by_coord(coord, &key);
  47832. + } else {
  47833. + /* get key of byte which next after last byte addressed by the extent */
  47834. + append_key_extent(coord, &key);
  47835. + }
  47836. +
  47837. + assert("vs-544",
  47838. + (get_key_offset(&key) >> PAGE_SHIFT) < ~0ul);
  47839. + /* index of first or last (depending on @side) page addressed
  47840. + by the extent */
  47841. + index =
  47842. + (unsigned long)(get_key_offset(&key) >> PAGE_SHIFT);
  47843. + if (side == RIGHT_SIDE)
  47844. + index--;
  47845. +
  47846. + tree = coord->node->zjnode.tree;
  47847. + *childp = jlookup(tree, get_key_objectid(&key), index);
  47848. + }
  47849. +
  47850. + return 0;
  47851. +}
  47852. +
  47853. +/* item_plugin->f.utmost_child_real_block */
  47854. +/* Return the child's block, if allocated. */
  47855. +int
  47856. +utmost_child_real_block_extent(const coord_t * coord, sideof side,
  47857. + reiser4_block_nr * block)
  47858. +{
  47859. + reiser4_extent *ext;
  47860. +
  47861. + ext = extent_by_coord(coord);
  47862. +
  47863. + switch (state_of_extent(ext)) {
  47864. + case ALLOCATED_EXTENT:
  47865. + *block = extent_get_start(ext);
  47866. + if (side == RIGHT_SIDE)
  47867. + *block += extent_get_width(ext) - 1;
  47868. + break;
  47869. + case HOLE_EXTENT:
  47870. + case UNALLOCATED_EXTENT:
  47871. + *block = 0;
  47872. + break;
  47873. + default:
  47874. + /* this should never happen */
  47875. + assert("vs-1418", 0);
  47876. + }
  47877. +
  47878. + return 0;
  47879. +}
  47880. +
  47881. +/* item_plugin->f.scan */
  47882. +/* Performs leftward scanning starting from an unformatted node and its parent coordinate.
  47883. + This scan continues, advancing the parent coordinate, until either it encounters a
  47884. + formatted child or it finishes scanning this node.
  47885. +
  47886. + If unallocated, the entire extent must be dirty and in the same atom. (Actually, I'm
  47887. + not sure this is last property (same atom) is enforced, but it should be the case since
  47888. + one atom must write the parent and the others must read the parent, thus fusing?). In
  47889. + any case, the code below asserts this case for unallocated extents. Unallocated
  47890. + extents are thus optimized because we can skip to the endpoint when scanning.
  47891. +
  47892. + It returns control to reiser4_scan_extent, handles these terminating conditions,
  47893. + e.g., by loading the next twig.
  47894. +*/
  47895. +int reiser4_scan_extent(flush_scan * scan)
  47896. +{
  47897. + coord_t coord;
  47898. + jnode *neighbor;
  47899. + unsigned long scan_index, unit_index, unit_width, scan_max, scan_dist;
  47900. + reiser4_block_nr unit_start;
  47901. + __u64 oid;
  47902. + reiser4_key key;
  47903. + int ret = 0, allocated, incr;
  47904. + reiser4_tree *tree;
  47905. +
  47906. + if (!JF_ISSET(scan->node, JNODE_DIRTY)) {
  47907. + scan->stop = 1;
  47908. + return 0; /* Race with truncate, this node is already
  47909. + * truncated. */
  47910. + }
  47911. +
  47912. + coord_dup(&coord, &scan->parent_coord);
  47913. +
  47914. + assert("jmacd-1404", !reiser4_scan_finished(scan));
  47915. + assert("jmacd-1405", jnode_get_level(scan->node) == LEAF_LEVEL);
  47916. + assert("jmacd-1406", jnode_is_unformatted(scan->node));
  47917. +
  47918. + /* The scan_index variable corresponds to the current page index of the
  47919. + unformatted block scan position. */
  47920. + scan_index = index_jnode(scan->node);
  47921. +
  47922. + assert("jmacd-7889", item_is_extent(&coord));
  47923. +
  47924. + repeat:
  47925. + /* objectid of file */
  47926. + oid = get_key_objectid(item_key_by_coord(&coord, &key));
  47927. +
  47928. + allocated = !extent_is_unallocated(&coord);
  47929. + /* Get the values of this extent unit: */
  47930. + unit_index = extent_unit_index(&coord);
  47931. + unit_width = extent_unit_width(&coord);
  47932. + unit_start = extent_unit_start(&coord);
  47933. +
  47934. + assert("jmacd-7187", unit_width > 0);
  47935. + assert("jmacd-7188", scan_index >= unit_index);
  47936. + assert("jmacd-7189", scan_index <= unit_index + unit_width - 1);
  47937. +
  47938. + /* Depending on the scan direction, we set different maximum values for scan_index
  47939. + (scan_max) and the number of nodes that would be passed if the scan goes the
  47940. + entire way (scan_dist). Incr is an integer reflecting the incremental
  47941. + direction of scan_index. */
  47942. + if (reiser4_scanning_left(scan)) {
  47943. + scan_max = unit_index;
  47944. + scan_dist = scan_index - unit_index;
  47945. + incr = -1;
  47946. + } else {
  47947. + scan_max = unit_index + unit_width - 1;
  47948. + scan_dist = scan_max - unit_index;
  47949. + incr = +1;
  47950. + }
  47951. +
  47952. + tree = coord.node->zjnode.tree;
  47953. +
  47954. + /* If the extent is allocated we have to check each of its blocks. If the extent
  47955. + is unallocated we can skip to the scan_max. */
  47956. + if (allocated) {
  47957. + do {
  47958. + neighbor = jlookup(tree, oid, scan_index);
  47959. + if (neighbor == NULL)
  47960. + goto stop_same_parent;
  47961. +
  47962. + if (scan->node != neighbor
  47963. + && !reiser4_scan_goto(scan, neighbor)) {
  47964. + /* @neighbor was jput() by reiser4_scan_goto */
  47965. + goto stop_same_parent;
  47966. + }
  47967. +
  47968. + ret = scan_set_current(scan, neighbor, 1, &coord);
  47969. + if (ret != 0) {
  47970. + goto exit;
  47971. + }
  47972. +
  47973. + /* reference to @neighbor is stored in @scan, no need
  47974. + to jput(). */
  47975. + scan_index += incr;
  47976. +
  47977. + } while (incr + scan_max != scan_index);
  47978. +
  47979. + } else {
  47980. + /* Optimized case for unallocated extents, skip to the end. */
  47981. + neighbor = jlookup(tree, oid, scan_max /*index */ );
  47982. + if (neighbor == NULL) {
  47983. + /* Race with truncate */
  47984. + scan->stop = 1;
  47985. + ret = 0;
  47986. + goto exit;
  47987. + }
  47988. +
  47989. + assert("zam-1043",
  47990. + reiser4_blocknr_is_fake(jnode_get_block(neighbor)));
  47991. +
  47992. + ret = scan_set_current(scan, neighbor, scan_dist, &coord);
  47993. + if (ret != 0) {
  47994. + goto exit;
  47995. + }
  47996. + }
  47997. +
  47998. + if (coord_sideof_unit(&coord, scan->direction) == 0
  47999. + && item_is_extent(&coord)) {
  48000. + /* Continue as long as there are more extent units. */
  48001. +
  48002. + scan_index =
  48003. + extent_unit_index(&coord) +
  48004. + (reiser4_scanning_left(scan) ?
  48005. + extent_unit_width(&coord) - 1 : 0);
  48006. + goto repeat;
  48007. + }
  48008. +
  48009. + if (0) {
  48010. + stop_same_parent:
  48011. +
  48012. + /* If we are scanning left and we stop in the middle of an allocated
  48013. + extent, we know the preceder immediately.. */
  48014. + /* middle of extent is (scan_index - unit_index) != 0. */
  48015. + if (reiser4_scanning_left(scan) &&
  48016. + (scan_index - unit_index) != 0) {
  48017. + /* FIXME(B): Someone should step-through and verify that this preceder
  48018. + calculation is indeed correct. */
  48019. + /* @unit_start is starting block (number) of extent
  48020. + unit. Flush stopped at the @scan_index block from
  48021. + the beginning of the file, which is (scan_index -
  48022. + unit_index) block within extent.
  48023. + */
  48024. + if (unit_start) {
  48025. + /* skip preceder update when we are at hole */
  48026. + scan->preceder_blk =
  48027. + unit_start + scan_index - unit_index;
  48028. + check_preceder(scan->preceder_blk);
  48029. + }
  48030. + }
  48031. +
  48032. + /* In this case, we leave coord set to the parent of scan->node. */
  48033. + scan->stop = 1;
  48034. +
  48035. + } else {
  48036. + /* In this case, we are still scanning, coord is set to the next item which is
  48037. + either off-the-end of the node or not an extent. */
  48038. + assert("jmacd-8912", scan->stop == 0);
  48039. + assert("jmacd-7812",
  48040. + (coord_is_after_sideof_unit(&coord, scan->direction)
  48041. + || !item_is_extent(&coord)));
  48042. + }
  48043. +
  48044. + ret = 0;
  48045. + exit:
  48046. + return ret;
  48047. +}
  48048. +
  48049. +/**
  48050. + * When on flush time unallocated extent is to be replaced with allocated one
  48051. + * it may happen that one unallocated extent will have to be replaced with set
  48052. + * of allocated extents. In this case insert_into_item will be called which may
  48053. + * have to add new nodes into tree. Space for that is taken from inviolable
  48054. + * reserve (5%).
  48055. + */
  48056. +static reiser4_block_nr reserve_replace(void)
  48057. +{
  48058. + reiser4_block_nr grabbed, needed;
  48059. +
  48060. + grabbed = get_current_context()->grabbed_blocks;
  48061. + needed = estimate_one_insert_into_item(current_tree);
  48062. + check_me("vpf-340", !reiser4_grab_space_force(needed, BA_RESERVED));
  48063. + return grabbed;
  48064. +}
  48065. +
  48066. +static void free_replace_reserved(reiser4_block_nr grabbed)
  48067. +{
  48068. + reiser4_context *ctx;
  48069. +
  48070. + ctx = get_current_context();
  48071. + grabbed2free(ctx, get_super_private(ctx->super),
  48072. + ctx->grabbed_blocks - grabbed);
  48073. +}
  48074. +
  48075. +/* Block offset of first block addressed by unit */
  48076. +__u64 extent_unit_index(const coord_t * item)
  48077. +{
  48078. + reiser4_key key;
  48079. +
  48080. + assert("vs-648", coord_is_existing_unit(item));
  48081. + unit_key_by_coord(item, &key);
  48082. + return get_key_offset(&key) >> current_blocksize_bits;
  48083. +}
  48084. +
  48085. +/* AUDIT shouldn't return value be of reiser4_block_nr type?
  48086. + Josh's answer: who knows? Is a "number of blocks" the same type as "block offset"? */
  48087. +__u64 extent_unit_width(const coord_t * item)
  48088. +{
  48089. + assert("vs-649", coord_is_existing_unit(item));
  48090. + return width_by_coord(item);
  48091. +}
  48092. +
  48093. +/* Starting block location of this unit */
  48094. +static reiser4_block_nr extent_unit_start(const coord_t * item)
  48095. +{
  48096. + return extent_get_start(extent_by_coord(item));
  48097. +}
  48098. +
  48099. +/**
  48100. + * split_allocated_extent -
  48101. + * @coord:
  48102. + * @pos_in_unit:
  48103. + *
  48104. + * replace allocated extent with two allocated extents
  48105. + */
  48106. +int split_allocated_extent(coord_t *coord, reiser4_block_nr pos_in_unit)
  48107. +{
  48108. + int result;
  48109. + struct replace_handle *h;
  48110. + reiser4_extent *ext;
  48111. + reiser4_block_nr grabbed;
  48112. +
  48113. + ext = extent_by_coord(coord);
  48114. + assert("vs-1410", state_of_extent(ext) == ALLOCATED_EXTENT);
  48115. + assert("vs-1411", extent_get_width(ext) > pos_in_unit);
  48116. +
  48117. + h = kmalloc(sizeof(*h), reiser4_ctx_gfp_mask_get());
  48118. + if (h == NULL)
  48119. + return RETERR(-ENOMEM);
  48120. + h->coord = coord;
  48121. + h->lh = znode_lh(coord->node);
  48122. + h->pkey = &h->key;
  48123. + unit_key_by_coord(coord, h->pkey);
  48124. + set_key_offset(h->pkey,
  48125. + (get_key_offset(h->pkey) +
  48126. + pos_in_unit * current_blocksize));
  48127. + reiser4_set_extent(&h->overwrite, extent_get_start(ext),
  48128. + pos_in_unit);
  48129. + reiser4_set_extent(&h->new_extents[0],
  48130. + extent_get_start(ext) + pos_in_unit,
  48131. + extent_get_width(ext) - pos_in_unit);
  48132. + h->nr_new_extents = 1;
  48133. + h->flags = COPI_DONT_SHIFT_LEFT;
  48134. + h->paste_key = h->key;
  48135. +
  48136. + /* reserve space for extent unit paste, @grabbed is reserved before */
  48137. + grabbed = reserve_replace();
  48138. + result = reiser4_replace_extent(h, 0 /* leave @coord set to overwritten
  48139. + extent */);
  48140. + /* restore reserved */
  48141. + free_replace_reserved(grabbed);
  48142. + kfree(h);
  48143. + return result;
  48144. +}
  48145. +
  48146. +/* replace extent @ext by extent @replace. Try to merge @replace with previous extent of the item (if there is
  48147. + one). Return 1 if it succeeded, 0 - otherwise */
  48148. +static int try_to_merge_with_left(coord_t *coord, reiser4_extent *ext,
  48149. + reiser4_extent *replace)
  48150. +{
  48151. + assert("vs-1415", extent_by_coord(coord) == ext);
  48152. +
  48153. + if (coord->unit_pos == 0
  48154. + || state_of_extent(ext - 1) != ALLOCATED_EXTENT)
  48155. + /* @ext either does not exist or is not allocated extent */
  48156. + return 0;
  48157. + if (extent_get_start(ext - 1) + extent_get_width(ext - 1) !=
  48158. + extent_get_start(replace))
  48159. + return 0;
  48160. +
  48161. + /* we can glue, widen previous unit */
  48162. + extent_set_width(ext - 1,
  48163. + extent_get_width(ext - 1) + extent_get_width(replace));
  48164. +
  48165. + if (extent_get_width(ext) != extent_get_width(replace)) {
  48166. + /* make current extent narrower */
  48167. + if (state_of_extent(ext) == ALLOCATED_EXTENT)
  48168. + extent_set_start(ext,
  48169. + extent_get_start(ext) +
  48170. + extent_get_width(replace));
  48171. + extent_set_width(ext,
  48172. + extent_get_width(ext) -
  48173. + extent_get_width(replace));
  48174. + } else {
  48175. + /* current extent completely glued with its left neighbor, remove it */
  48176. + coord_t from, to;
  48177. +
  48178. + coord_dup(&from, coord);
  48179. + from.unit_pos = nr_units_extent(coord) - 1;
  48180. + coord_dup(&to, &from);
  48181. +
  48182. + /* currently cut from extent can cut either from the beginning or from the end. Move place which got
  48183. + freed after unit removal to end of item */
  48184. + memmove(ext, ext + 1,
  48185. + (from.unit_pos -
  48186. + coord->unit_pos) * sizeof(reiser4_extent));
  48187. + /* wipe part of item which is going to be cut, so that node_check will not be confused */
  48188. + cut_node_content(&from, &to, NULL, NULL, NULL);
  48189. + }
  48190. + znode_make_dirty(coord->node);
  48191. + /* move coord back */
  48192. + coord->unit_pos--;
  48193. + return 1;
  48194. +}
  48195. +
  48196. +/**
  48197. + * convert_extent - replace extent with 2 ones
  48198. + * @coord: coordinate of extent to be replaced
  48199. + * @replace: extent to overwrite the one @coord is set to
  48200. + *
  48201. + * Overwrites extent @coord is set to and paste one extent unit after
  48202. + * overwritten one if @replace is shorter than initial extent
  48203. + */
  48204. +int convert_extent(coord_t *coord, reiser4_extent *replace)
  48205. +{
  48206. + int result;
  48207. + struct replace_handle *h;
  48208. + reiser4_extent *ext;
  48209. + reiser4_block_nr start, width, new_width;
  48210. + reiser4_block_nr grabbed;
  48211. + extent_state state;
  48212. +
  48213. + ext = extent_by_coord(coord);
  48214. + state = state_of_extent(ext);
  48215. + start = extent_get_start(ext);
  48216. + width = extent_get_width(ext);
  48217. + new_width = extent_get_width(replace);
  48218. +
  48219. + assert("vs-1458", (state == UNALLOCATED_EXTENT ||
  48220. + state == ALLOCATED_EXTENT));
  48221. + assert("vs-1459", width >= new_width);
  48222. +
  48223. + if (try_to_merge_with_left(coord, ext, replace)) {
  48224. + /* merged @replace with left neighbor. Current unit is either
  48225. + removed or narrowed */
  48226. + return 0;
  48227. + }
  48228. +
  48229. + if (width == new_width) {
  48230. + /* replace current extent with @replace */
  48231. + *ext = *replace;
  48232. + znode_make_dirty(coord->node);
  48233. + return 0;
  48234. + }
  48235. +
  48236. + h = kmalloc(sizeof(*h), reiser4_ctx_gfp_mask_get());
  48237. + if (h == NULL)
  48238. + return RETERR(-ENOMEM);
  48239. + h->coord = coord;
  48240. + h->lh = znode_lh(coord->node);
  48241. + h->pkey = &h->key;
  48242. + unit_key_by_coord(coord, h->pkey);
  48243. + set_key_offset(h->pkey,
  48244. + (get_key_offset(h->pkey) + new_width * current_blocksize));
  48245. + h->overwrite = *replace;
  48246. +
  48247. + /* replace @ext with @replace and padding extent */
  48248. + reiser4_set_extent(&h->new_extents[0],
  48249. + (state == ALLOCATED_EXTENT) ?
  48250. + (start + new_width) :
  48251. + UNALLOCATED_EXTENT_START,
  48252. + width - new_width);
  48253. + h->nr_new_extents = 1;
  48254. + h->flags = COPI_DONT_SHIFT_LEFT;
  48255. + h->paste_key = h->key;
  48256. +
  48257. + /* reserve space for extent unit paste, @grabbed is reserved before */
  48258. + grabbed = reserve_replace();
  48259. + result = reiser4_replace_extent(h, 0 /* leave @coord set to overwritten
  48260. + extent */);
  48261. +
  48262. + /* restore reserved */
  48263. + free_replace_reserved(grabbed);
  48264. + kfree(h);
  48265. + return result;
  48266. +}
  48267. +
  48268. +/**
  48269. + * assign_real_blocknrs
  48270. + * @flush_pos:
  48271. + * @oid: objectid of file jnodes to assign block number to belongs to
  48272. + * @index: first jnode on the range
  48273. + * @count: number of jnodes to assign block numbers to
  48274. + * @first: start of allocated block range
  48275. + *
  48276. + * Assigns block numbers to each of @count jnodes. Index of first jnode is
  48277. + * @index. Jnodes get lookuped with jlookup.
  48278. + */
  48279. +void assign_real_blocknrs(flush_pos_t *flush_pos, oid_t oid,
  48280. + unsigned long index, reiser4_block_nr count,
  48281. + reiser4_block_nr first)
  48282. +{
  48283. + unsigned long i;
  48284. + reiser4_tree *tree;
  48285. + txn_atom *atom;
  48286. + int nr;
  48287. +
  48288. + atom = atom_locked_by_fq(flush_pos->fq);
  48289. + assert("vs-1468", atom);
  48290. + BUG_ON(atom == NULL);
  48291. +
  48292. + nr = 0;
  48293. + tree = current_tree;
  48294. + for (i = 0; i < count; ++i, ++index) {
  48295. + jnode *node;
  48296. +
  48297. + node = jlookup(tree, oid, index);
  48298. + assert("", node != NULL);
  48299. + BUG_ON(node == NULL);
  48300. +
  48301. + spin_lock_jnode(node);
  48302. + assert("", !jnode_is_flushprepped(node));
  48303. + assert("vs-1475", node->atom == atom);
  48304. + assert("vs-1476", atomic_read(&node->x_count) > 0);
  48305. +
  48306. + JF_CLR(node, JNODE_FLUSH_RESERVED);
  48307. + jnode_set_block(node, &first);
  48308. + unformatted_make_reloc(node, flush_pos->fq);
  48309. + ON_DEBUG(count_jnode(node->atom, node, NODE_LIST(node),
  48310. + FQ_LIST, 0));
  48311. + spin_unlock_jnode(node);
  48312. + first++;
  48313. +
  48314. + atomic_dec(&node->x_count);
  48315. + nr ++;
  48316. + }
  48317. +
  48318. + spin_unlock_atom(atom);
  48319. + return;
  48320. +}
  48321. +
  48322. +/**
  48323. + * allocated_extent_slum_size
  48324. + * @flush_pos:
  48325. + * @oid:
  48326. + * @index:
  48327. + * @count:
  48328. + *
  48329. + *
  48330. + */
  48331. +int allocated_extent_slum_size(flush_pos_t *flush_pos, oid_t oid,
  48332. + unsigned long index, unsigned long count)
  48333. +{
  48334. + unsigned long i;
  48335. + reiser4_tree *tree;
  48336. + txn_atom *atom;
  48337. + int nr;
  48338. +
  48339. + atom = atom_locked_by_fq(reiser4_pos_fq(flush_pos));
  48340. + assert("vs-1468", atom);
  48341. +
  48342. + nr = 0;
  48343. + tree = current_tree;
  48344. + for (i = 0; i < count; ++i, ++index) {
  48345. + jnode *node;
  48346. +
  48347. + node = jlookup(tree, oid, index);
  48348. + if (!node)
  48349. + break;
  48350. +
  48351. + if (jnode_check_flushprepped(node)) {
  48352. + atomic_dec(&node->x_count);
  48353. + break;
  48354. + }
  48355. +
  48356. + if (node->atom != atom) {
  48357. + /*
  48358. + * this is possible on overwrite: extent_write may
  48359. + * capture several unformatted nodes without capturing
  48360. + * any formatted nodes.
  48361. + */
  48362. + atomic_dec(&node->x_count);
  48363. + break;
  48364. + }
  48365. +
  48366. + assert("vs-1476", atomic_read(&node->x_count) > 1);
  48367. + atomic_dec(&node->x_count);
  48368. + nr ++;
  48369. + }
  48370. +
  48371. + spin_unlock_atom(atom);
  48372. + return nr;
  48373. +}
  48374. +
  48375. +/* if @key is glueable to the item @coord is set to */
  48376. +static int must_insert(const coord_t *coord, const reiser4_key *key)
  48377. +{
  48378. + reiser4_key last;
  48379. +
  48380. + if (item_id_by_coord(coord) == EXTENT_POINTER_ID
  48381. + && keyeq(append_key_extent(coord, &last), key))
  48382. + return 0;
  48383. + return 1;
  48384. +}
  48385. +
  48386. +/**
  48387. + * copy extent @copy to the end of @node.
  48388. + * It may have to either insert new item after the last one,
  48389. + * or append last item, or modify last unit of last item to have
  48390. + * greater width
  48391. + */
  48392. +int put_unit_to_end(znode *node,
  48393. + const reiser4_key *key, reiser4_extent *copy_ext)
  48394. +{
  48395. + int result;
  48396. + coord_t coord;
  48397. + cop_insert_flag flags;
  48398. + reiser4_extent *last_ext;
  48399. + reiser4_item_data data;
  48400. +
  48401. + /* set coord after last unit in an item */
  48402. + coord_init_last_unit(&coord, node);
  48403. + coord.between = AFTER_UNIT;
  48404. +
  48405. + flags =
  48406. + COPI_DONT_SHIFT_LEFT | COPI_DONT_SHIFT_RIGHT | COPI_DONT_ALLOCATE;
  48407. + if (must_insert(&coord, key)) {
  48408. + result =
  48409. + insert_by_coord(&coord, init_new_extent(&data, copy_ext, 1),
  48410. + key, NULL /*lh */ , flags);
  48411. +
  48412. + } else {
  48413. + /* try to glue with last unit */
  48414. + last_ext = extent_by_coord(&coord);
  48415. + if (state_of_extent(last_ext) &&
  48416. + extent_get_start(last_ext) + extent_get_width(last_ext) ==
  48417. + extent_get_start(copy_ext)) {
  48418. + /* widen last unit of node */
  48419. + extent_set_width(last_ext,
  48420. + extent_get_width(last_ext) +
  48421. + extent_get_width(copy_ext));
  48422. + znode_make_dirty(node);
  48423. + return 0;
  48424. + }
  48425. +
  48426. + /* FIXME: put an assertion here that we can not merge last unit in @node and new unit */
  48427. + result =
  48428. + insert_into_item(&coord, NULL /*lh */ , key,
  48429. + init_new_extent(&data, copy_ext, 1),
  48430. + flags);
  48431. + }
  48432. +
  48433. + assert("vs-438", result == 0 || result == -E_NODE_FULL);
  48434. + return result;
  48435. +}
  48436. +
  48437. +int key_by_offset_extent(struct inode *inode, loff_t off, reiser4_key * key)
  48438. +{
  48439. + return key_by_inode_and_offset_common(inode, off, key);
  48440. +}
  48441. +
  48442. +/*
  48443. + * Local variables:
  48444. + * c-indentation-style: "K&R"
  48445. + * mode-name: "LC"
  48446. + * c-basic-offset: 8
  48447. + * tab-width: 8
  48448. + * fill-column: 79
  48449. + * scroll-step: 1
  48450. + * End:
  48451. + */
  48452. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/item/extent.h linux-4.14.2/fs/reiser4/plugin/item/extent.h
  48453. --- linux-4.14.2.orig/fs/reiser4/plugin/item/extent.h 1970-01-01 01:00:00.000000000 +0100
  48454. +++ linux-4.14.2/fs/reiser4/plugin/item/extent.h 2017-11-26 22:13:09.000000000 +0100
  48455. @@ -0,0 +1,231 @@
  48456. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  48457. +
  48458. +#ifndef __REISER4_EXTENT_H__
  48459. +#define __REISER4_EXTENT_H__
  48460. +
  48461. +/* on disk extent */
  48462. +typedef struct {
  48463. + reiser4_dblock_nr start;
  48464. + reiser4_dblock_nr width;
  48465. +} reiser4_extent;
  48466. +
  48467. +struct extent_stat {
  48468. + int unallocated_units;
  48469. + int unallocated_blocks;
  48470. + int allocated_units;
  48471. + int allocated_blocks;
  48472. + int hole_units;
  48473. + int hole_blocks;
  48474. +};
  48475. +
  48476. +/* extents in an extent item can be either holes, or unallocated or allocated
  48477. + extents */
  48478. +typedef enum {
  48479. + HOLE_EXTENT,
  48480. + UNALLOCATED_EXTENT,
  48481. + ALLOCATED_EXTENT
  48482. +} extent_state;
  48483. +
  48484. +#define HOLE_EXTENT_START 0
  48485. +#define UNALLOCATED_EXTENT_START 1
  48486. +#define UNALLOCATED_EXTENT_START2 2
  48487. +
  48488. +struct extent_coord_extension {
  48489. + reiser4_block_nr pos_in_unit;
  48490. + reiser4_block_nr width; /* width of current unit */
  48491. + pos_in_node_t nr_units; /* number of units */
  48492. + int ext_offset; /* offset from the beginning of zdata() */
  48493. + unsigned long expected_page;
  48494. +#if REISER4_DEBUG
  48495. + reiser4_extent extent;
  48496. +#endif
  48497. +};
  48498. +
  48499. +/* macros to set/get fields of on-disk extent */
  48500. +static inline reiser4_block_nr extent_get_start(const reiser4_extent * ext)
  48501. +{
  48502. + return le64_to_cpu(ext->start);
  48503. +}
  48504. +
  48505. +static inline reiser4_block_nr extent_get_width(const reiser4_extent * ext)
  48506. +{
  48507. + return le64_to_cpu(ext->width);
  48508. +}
  48509. +
  48510. +extern __u64 reiser4_current_block_count(void);
  48511. +
  48512. +static inline void
  48513. +extent_set_start(reiser4_extent * ext, reiser4_block_nr start)
  48514. +{
  48515. + cassert(sizeof(ext->start) == 8);
  48516. + assert("nikita-2510",
  48517. + ergo(start > 1, start < reiser4_current_block_count()));
  48518. + put_unaligned(cpu_to_le64(start), &ext->start);
  48519. +}
  48520. +
  48521. +static inline void
  48522. +extent_set_width(reiser4_extent * ext, reiser4_block_nr width)
  48523. +{
  48524. + cassert(sizeof(ext->width) == 8);
  48525. + assert("", width > 0);
  48526. + put_unaligned(cpu_to_le64(width), &ext->width);
  48527. + assert("nikita-2511",
  48528. + ergo(extent_get_start(ext) > 1,
  48529. + extent_get_start(ext) + width <=
  48530. + reiser4_current_block_count()));
  48531. +}
  48532. +
  48533. +#define extent_item(coord) \
  48534. +({ \
  48535. + assert("nikita-3143", item_is_extent(coord)); \
  48536. + ((reiser4_extent *)item_body_by_coord (coord)); \
  48537. +})
  48538. +
  48539. +#define extent_by_coord(coord) \
  48540. +({ \
  48541. + assert("nikita-3144", item_is_extent(coord)); \
  48542. + (extent_item (coord) + (coord)->unit_pos); \
  48543. +})
  48544. +
  48545. +#define width_by_coord(coord) \
  48546. +({ \
  48547. + assert("nikita-3145", item_is_extent(coord)); \
  48548. + extent_get_width (extent_by_coord(coord)); \
  48549. +})
  48550. +
  48551. +struct carry_cut_data;
  48552. +struct carry_kill_data;
  48553. +
  48554. +/* plugin->u.item.b.* */
  48555. +reiser4_key *max_key_inside_extent(const coord_t *, reiser4_key *);
  48556. +int can_contain_key_extent(const coord_t * coord, const reiser4_key * key,
  48557. + const reiser4_item_data *);
  48558. +int mergeable_extent(const coord_t * p1, const coord_t * p2);
  48559. +pos_in_node_t nr_units_extent(const coord_t *);
  48560. +lookup_result lookup_extent(const reiser4_key *, lookup_bias, coord_t *);
  48561. +void init_coord_extent(coord_t *);
  48562. +int init_extent(coord_t *, reiser4_item_data *);
  48563. +int paste_extent(coord_t *, reiser4_item_data *, carry_plugin_info *);
  48564. +int can_shift_extent(unsigned free_space,
  48565. + coord_t * source, znode * target, shift_direction,
  48566. + unsigned *size, unsigned want);
  48567. +void copy_units_extent(coord_t * target, coord_t * source, unsigned from,
  48568. + unsigned count, shift_direction where_is_free_space,
  48569. + unsigned free_space);
  48570. +int kill_hook_extent(const coord_t *, pos_in_node_t from, pos_in_node_t count,
  48571. + struct carry_kill_data *);
  48572. +int create_hook_extent(const coord_t * coord, void *arg);
  48573. +int cut_units_extent(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
  48574. + struct carry_cut_data *, reiser4_key * smallest_removed,
  48575. + reiser4_key * new_first);
  48576. +int kill_units_extent(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
  48577. + struct carry_kill_data *, reiser4_key * smallest_removed,
  48578. + reiser4_key * new_first);
  48579. +reiser4_key *unit_key_extent(const coord_t *, reiser4_key *);
  48580. +reiser4_key *max_unit_key_extent(const coord_t *, reiser4_key *);
  48581. +void print_extent(const char *, coord_t *);
  48582. +int utmost_child_extent(const coord_t * coord, sideof side, jnode ** child);
  48583. +int utmost_child_real_block_extent(const coord_t * coord, sideof side,
  48584. + reiser4_block_nr * block);
  48585. +void item_stat_extent(const coord_t * coord, void *vp);
  48586. +int reiser4_check_extent(const coord_t * coord, const char **error);
  48587. +
  48588. +/* plugin->u.item.s.file.* */
  48589. +ssize_t reiser4_write_extent(struct file *, struct inode * inode,
  48590. + const char __user *, size_t, loff_t *);
  48591. +int reiser4_read_extent(struct file *, flow_t *, hint_t *);
  48592. +int reiser4_readpage_extent(void *, struct page *);
  48593. +int reiser4_do_readpage_extent(reiser4_extent*, reiser4_block_nr, struct page*);
  48594. +reiser4_key *append_key_extent(const coord_t *, reiser4_key *);
  48595. +void init_coord_extension_extent(uf_coord_t *, loff_t offset);
  48596. +int get_block_address_extent(const coord_t *, sector_t block,
  48597. + sector_t * result);
  48598. +
  48599. +/* these are used in flush.c
  48600. + FIXME-VS: should they be somewhere in item_plugin? */
  48601. +int allocate_extent_item_in_place(coord_t *, lock_handle *, flush_pos_t * pos);
  48602. +int allocate_and_copy_extent(znode * left, coord_t * right, flush_pos_t * pos,
  48603. + reiser4_key * stop_key);
  48604. +
  48605. +int extent_is_unallocated(const coord_t * item); /* True if this extent is unallocated (i.e., not a hole, not allocated). */
  48606. +__u64 extent_unit_index(const coord_t * item); /* Block offset of this unit. */
  48607. +__u64 extent_unit_width(const coord_t * item); /* Number of blocks in this unit. */
  48608. +
  48609. +/* plugin->u.item.f. */
  48610. +int reiser4_scan_extent(flush_scan * scan);
  48611. +extern int key_by_offset_extent(struct inode *, loff_t, reiser4_key *);
  48612. +
  48613. +reiser4_item_data *init_new_extent(reiser4_item_data * data, void *ext_unit,
  48614. + int nr_extents);
  48615. +reiser4_block_nr reiser4_extent_size(const coord_t * coord, pos_in_node_t nr);
  48616. +extent_state state_of_extent(reiser4_extent * ext);
  48617. +void reiser4_set_extent(reiser4_extent *, reiser4_block_nr start,
  48618. + reiser4_block_nr width);
  48619. +int reiser4_update_extent(struct inode *, jnode *, loff_t pos,
  48620. + int *plugged_hole);
  48621. +
  48622. +#include "../../coord.h"
  48623. +#include "../../lock.h"
  48624. +#include "../../tap.h"
  48625. +
  48626. +struct replace_handle {
  48627. + /* these are to be set before calling reiser4_replace_extent */
  48628. + coord_t *coord;
  48629. + lock_handle *lh;
  48630. + reiser4_key key;
  48631. + reiser4_key *pkey;
  48632. + reiser4_extent overwrite;
  48633. + reiser4_extent new_extents[2];
  48634. + int nr_new_extents;
  48635. + unsigned flags;
  48636. +
  48637. + /* these are used by reiser4_replace_extent */
  48638. + reiser4_item_data item;
  48639. + coord_t coord_after;
  48640. + lock_handle lh_after;
  48641. + tap_t watch;
  48642. + reiser4_key paste_key;
  48643. +#if REISER4_DEBUG
  48644. + reiser4_extent orig_ext;
  48645. + reiser4_key tmp;
  48646. +#endif
  48647. +};
  48648. +
  48649. +/* this structure is kmalloced before calling make_extent to avoid excessive
  48650. + stack consumption on plug_hole->reiser4_replace_extent */
  48651. +struct make_extent_handle {
  48652. + uf_coord_t *uf_coord;
  48653. + reiser4_block_nr blocknr;
  48654. + int created;
  48655. + struct inode *inode;
  48656. + union {
  48657. + struct {
  48658. + } append;
  48659. + struct replace_handle replace;
  48660. + } u;
  48661. +};
  48662. +
  48663. +int reiser4_replace_extent(struct replace_handle *,
  48664. + int return_inserted_position);
  48665. +lock_handle *znode_lh(znode *);
  48666. +
  48667. +/* the reiser4 repacker support */
  48668. +struct repacker_cursor;
  48669. +extern int process_extent_backward_for_repacking(tap_t *,
  48670. + struct repacker_cursor *);
  48671. +extern int mark_extent_for_repacking(tap_t *, int);
  48672. +
  48673. +#define coord_by_uf_coord(uf_coord) (&((uf_coord)->coord))
  48674. +#define ext_coord_by_uf_coord(uf_coord) (&((uf_coord)->extension.extent))
  48675. +
  48676. +/* __REISER4_EXTENT_H__ */
  48677. +#endif
  48678. +/*
  48679. + Local variables:
  48680. + c-indentation-style: "K&R"
  48681. + mode-name: "LC"
  48682. + c-basic-offset: 8
  48683. + tab-width: 8
  48684. + fill-column: 120
  48685. + End:
  48686. +*/
  48687. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/item/extent_item_ops.c linux-4.14.2/fs/reiser4/plugin/item/extent_item_ops.c
  48688. --- linux-4.14.2.orig/fs/reiser4/plugin/item/extent_item_ops.c 1970-01-01 01:00:00.000000000 +0100
  48689. +++ linux-4.14.2/fs/reiser4/plugin/item/extent_item_ops.c 2017-11-26 22:13:09.000000000 +0100
  48690. @@ -0,0 +1,887 @@
  48691. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  48692. +
  48693. +#include "item.h"
  48694. +#include "../../inode.h"
  48695. +#include "../../tree_walk.h" /* check_sibling_list() */
  48696. +#include "../../page_cache.h"
  48697. +#include "../../carry.h"
  48698. +
  48699. +/* item_plugin->b.max_key_inside */
  48700. +reiser4_key *max_key_inside_extent(const coord_t * coord, reiser4_key * key)
  48701. +{
  48702. + item_key_by_coord(coord, key);
  48703. + set_key_offset(key, get_key_offset(reiser4_max_key()));
  48704. + return key;
  48705. +}
  48706. +
  48707. +/* item_plugin->b.can_contain_key
  48708. + this checks whether @key of @data is matching to position set by @coord */
  48709. +int
  48710. +can_contain_key_extent(const coord_t * coord, const reiser4_key * key,
  48711. + const reiser4_item_data * data)
  48712. +{
  48713. + reiser4_key item_key;
  48714. +
  48715. + if (item_plugin_by_coord(coord) != data->iplug)
  48716. + return 0;
  48717. +
  48718. + item_key_by_coord(coord, &item_key);
  48719. + if (get_key_locality(key) != get_key_locality(&item_key) ||
  48720. + get_key_objectid(key) != get_key_objectid(&item_key) ||
  48721. + get_key_ordering(key) != get_key_ordering(&item_key))
  48722. + return 0;
  48723. +
  48724. + return 1;
  48725. +}
  48726. +
  48727. +/* item_plugin->b.mergeable
  48728. + first item is of extent type */
  48729. +/* Audited by: green(2002.06.13) */
  48730. +int mergeable_extent(const coord_t * p1, const coord_t * p2)
  48731. +{
  48732. + reiser4_key key1, key2;
  48733. +
  48734. + assert("vs-299", item_id_by_coord(p1) == EXTENT_POINTER_ID);
  48735. + /* FIXME-VS: Which is it? Assert or return 0 */
  48736. + if (item_id_by_coord(p2) != EXTENT_POINTER_ID) {
  48737. + return 0;
  48738. + }
  48739. +
  48740. + item_key_by_coord(p1, &key1);
  48741. + item_key_by_coord(p2, &key2);
  48742. + if (get_key_locality(&key1) != get_key_locality(&key2) ||
  48743. + get_key_objectid(&key1) != get_key_objectid(&key2) ||
  48744. + get_key_ordering(&key1) != get_key_ordering(&key2) ||
  48745. + get_key_type(&key1) != get_key_type(&key2))
  48746. + return 0;
  48747. + if (get_key_offset(&key1) +
  48748. + reiser4_extent_size(p1, nr_units_extent(p1)) !=
  48749. + get_key_offset(&key2))
  48750. + return 0;
  48751. + return 1;
  48752. +}
  48753. +
  48754. +/* item_plugin->b.nr_units */
  48755. +pos_in_node_t nr_units_extent(const coord_t * coord)
  48756. +{
  48757. + /* length of extent item has to be multiple of extent size */
  48758. + assert("vs-1424",
  48759. + (item_length_by_coord(coord) % sizeof(reiser4_extent)) == 0);
  48760. + return item_length_by_coord(coord) / sizeof(reiser4_extent);
  48761. +}
  48762. +
  48763. +/* item_plugin->b.lookup */
  48764. +lookup_result
  48765. +lookup_extent(const reiser4_key * key, lookup_bias bias UNUSED_ARG,
  48766. + coord_t * coord)
  48767. +{ /* znode and item_pos are
  48768. + set to an extent item to
  48769. + look through */
  48770. + reiser4_key item_key;
  48771. + reiser4_block_nr lookuped, offset;
  48772. + unsigned i, nr_units;
  48773. + reiser4_extent *ext;
  48774. + unsigned blocksize;
  48775. + unsigned char blocksize_bits;
  48776. +
  48777. + item_key_by_coord(coord, &item_key);
  48778. + offset = get_key_offset(&item_key);
  48779. +
  48780. + /* key we are looking for must be greater than key of item @coord */
  48781. + assert("vs-414", keygt(key, &item_key));
  48782. +
  48783. + assert("umka-99945",
  48784. + !keygt(key, max_key_inside_extent(coord, &item_key)));
  48785. +
  48786. + ext = extent_item(coord);
  48787. + assert("vs-1350", (char *)ext == (zdata(coord->node) + coord->offset));
  48788. +
  48789. + blocksize = current_blocksize;
  48790. + blocksize_bits = current_blocksize_bits;
  48791. +
  48792. + /* offset we are looking for */
  48793. + lookuped = get_key_offset(key);
  48794. +
  48795. + nr_units = nr_units_extent(coord);
  48796. + /* go through all extents until the one which address given offset */
  48797. + for (i = 0; i < nr_units; i++, ext++) {
  48798. + offset += (extent_get_width(ext) << blocksize_bits);
  48799. + if (offset > lookuped) {
  48800. + /* desired byte is somewhere in this extent */
  48801. + coord->unit_pos = i;
  48802. + coord->between = AT_UNIT;
  48803. + return CBK_COORD_FOUND;
  48804. + }
  48805. + }
  48806. +
  48807. + /* set coord after last unit */
  48808. + coord->unit_pos = nr_units - 1;
  48809. + coord->between = AFTER_UNIT;
  48810. + return CBK_COORD_FOUND;
  48811. +}
  48812. +
  48813. +/* item_plugin->b.paste
  48814. + item @coord is set to has been appended with @data->length of free
  48815. + space. data->data contains data to be pasted into the item in position
  48816. + @coord->in_item.unit_pos. It must fit into that free space.
  48817. + @coord must be set between units.
  48818. +*/
  48819. +int
  48820. +paste_extent(coord_t * coord, reiser4_item_data * data,
  48821. + carry_plugin_info * info UNUSED_ARG)
  48822. +{
  48823. + unsigned old_nr_units;
  48824. + reiser4_extent *ext;
  48825. + int item_length;
  48826. +
  48827. + ext = extent_item(coord);
  48828. + item_length = item_length_by_coord(coord);
  48829. + old_nr_units = (item_length - data->length) / sizeof(reiser4_extent);
  48830. +
  48831. + /* this is also used to copy extent into newly created item, so
  48832. + old_nr_units could be 0 */
  48833. + assert("vs-260", item_length >= data->length);
  48834. +
  48835. + /* make sure that coord is set properly */
  48836. + assert("vs-35",
  48837. + ((!coord_is_existing_unit(coord))
  48838. + || (!old_nr_units && !coord->unit_pos)));
  48839. +
  48840. + /* first unit to be moved */
  48841. + switch (coord->between) {
  48842. + case AFTER_UNIT:
  48843. + coord->unit_pos++;
  48844. + case BEFORE_UNIT:
  48845. + coord->between = AT_UNIT;
  48846. + break;
  48847. + case AT_UNIT:
  48848. + assert("vs-331", !old_nr_units && !coord->unit_pos);
  48849. + break;
  48850. + default:
  48851. + impossible("vs-330", "coord is set improperly");
  48852. + }
  48853. +
  48854. + /* prepare space for new units */
  48855. + memmove(ext + coord->unit_pos + data->length / sizeof(reiser4_extent),
  48856. + ext + coord->unit_pos,
  48857. + (old_nr_units - coord->unit_pos) * sizeof(reiser4_extent));
  48858. +
  48859. + /* copy new data from kernel space */
  48860. + assert("vs-556", data->user == 0);
  48861. + memcpy(ext + coord->unit_pos, data->data, (unsigned)data->length);
  48862. +
  48863. + /* after paste @coord is set to first of pasted units */
  48864. + assert("vs-332", coord_is_existing_unit(coord));
  48865. + assert("vs-333",
  48866. + !memcmp(data->data, extent_by_coord(coord),
  48867. + (unsigned)data->length));
  48868. + return 0;
  48869. +}
  48870. +
  48871. +/* item_plugin->b.can_shift */
  48872. +int
  48873. +can_shift_extent(unsigned free_space, coord_t * source,
  48874. + znode * target UNUSED_ARG, shift_direction pend UNUSED_ARG,
  48875. + unsigned *size, unsigned want)
  48876. +{
  48877. + *size = item_length_by_coord(source);
  48878. + if (*size > free_space)
  48879. + /* never split a unit of extent item */
  48880. + *size = free_space - free_space % sizeof(reiser4_extent);
  48881. +
  48882. + /* we can shift *size bytes, calculate how many do we want to shift */
  48883. + if (*size > want * sizeof(reiser4_extent))
  48884. + *size = want * sizeof(reiser4_extent);
  48885. +
  48886. + if (*size % sizeof(reiser4_extent) != 0)
  48887. + impossible("vs-119", "Wrong extent size: %i %zd", *size,
  48888. + sizeof(reiser4_extent));
  48889. + return *size / sizeof(reiser4_extent);
  48890. +
  48891. +}
  48892. +
  48893. +/* item_plugin->b.copy_units */
  48894. +void
  48895. +copy_units_extent(coord_t * target, coord_t * source,
  48896. + unsigned from, unsigned count,
  48897. + shift_direction where_is_free_space, unsigned free_space)
  48898. +{
  48899. + char *from_ext, *to_ext;
  48900. +
  48901. + assert("vs-217", free_space == count * sizeof(reiser4_extent));
  48902. +
  48903. + from_ext = item_body_by_coord(source);
  48904. + to_ext = item_body_by_coord(target);
  48905. +
  48906. + if (where_is_free_space == SHIFT_LEFT) {
  48907. + assert("vs-215", from == 0);
  48908. +
  48909. + /* At this moment, item length was already updated in the item
  48910. + header by shifting code, hence nr_units_extent() will
  48911. + return "new" number of units---one we obtain after copying
  48912. + units.
  48913. + */
  48914. + to_ext +=
  48915. + (nr_units_extent(target) - count) * sizeof(reiser4_extent);
  48916. + } else {
  48917. + reiser4_key key;
  48918. + coord_t coord;
  48919. +
  48920. + assert("vs-216",
  48921. + from + count == coord_last_unit_pos(source) + 1);
  48922. +
  48923. + from_ext += item_length_by_coord(source) - free_space;
  48924. +
  48925. + /* new units are inserted before first unit in an item,
  48926. + therefore, we have to update item key */
  48927. + coord = *source;
  48928. + coord.unit_pos = from;
  48929. + unit_key_extent(&coord, &key);
  48930. +
  48931. + node_plugin_by_node(target->node)->update_item_key(target, &key,
  48932. + NULL /*info */);
  48933. + }
  48934. +
  48935. + memcpy(to_ext, from_ext, free_space);
  48936. +}
  48937. +
  48938. +/* item_plugin->b.create_hook
  48939. + @arg is znode of leaf node for which we need to update right delimiting key */
  48940. +int create_hook_extent(const coord_t * coord, void *arg)
  48941. +{
  48942. + coord_t *child_coord;
  48943. + znode *node;
  48944. + reiser4_key key;
  48945. + reiser4_tree *tree;
  48946. +
  48947. + if (!arg)
  48948. + return 0;
  48949. +
  48950. + child_coord = arg;
  48951. + tree = znode_get_tree(coord->node);
  48952. +
  48953. + assert("nikita-3246", znode_get_level(child_coord->node) == LEAF_LEVEL);
  48954. +
  48955. + write_lock_tree(tree);
  48956. + write_lock_dk(tree);
  48957. + /* find a node on the left level for which right delimiting key has to
  48958. + be updated */
  48959. + if (coord_wrt(child_coord) == COORD_ON_THE_LEFT) {
  48960. + assert("vs-411", znode_is_left_connected(child_coord->node));
  48961. + node = child_coord->node->left;
  48962. + } else {
  48963. + assert("vs-412", coord_wrt(child_coord) == COORD_ON_THE_RIGHT);
  48964. + node = child_coord->node;
  48965. + assert("nikita-3314", node != NULL);
  48966. + }
  48967. +
  48968. + if (node != NULL) {
  48969. + znode_set_rd_key(node, item_key_by_coord(coord, &key));
  48970. +
  48971. + assert("nikita-3282", check_sibling_list(node));
  48972. + /* break sibling links */
  48973. + if (ZF_ISSET(node, JNODE_RIGHT_CONNECTED) && node->right) {
  48974. + ON_DEBUG(node->right->left_version =
  48975. + atomic_inc_return(&delim_key_version);
  48976. + node->right_version =
  48977. + atomic_inc_return(&delim_key_version););
  48978. +
  48979. + node->right->left = NULL;
  48980. + node->right = NULL;
  48981. + }
  48982. + }
  48983. + write_unlock_dk(tree);
  48984. + write_unlock_tree(tree);
  48985. + return 0;
  48986. +}
  48987. +
  48988. +#define ITEM_TAIL_KILLED 0
  48989. +#define ITEM_HEAD_KILLED 1
  48990. +#define ITEM_KILLED 2
  48991. +
  48992. +/* item_plugin->b.kill_hook
  48993. + this is called when @count units starting from @from-th one are going to be removed
  48994. + */
  48995. +int
  48996. +kill_hook_extent(const coord_t * coord, pos_in_node_t from, pos_in_node_t count,
  48997. + struct carry_kill_data *kdata)
  48998. +{
  48999. + reiser4_extent *ext;
  49000. + reiser4_block_nr start, length;
  49001. + const reiser4_key *pfrom_key, *pto_key;
  49002. + struct inode *inode;
  49003. + reiser4_tree *tree;
  49004. + pgoff_t from_off, to_off, offset, skip;
  49005. + int retval;
  49006. +
  49007. + /* these are located in memory kmalloc-ed by kill_node_content */
  49008. + reiser4_key *min_item_key, *max_item_key, *from_key, *to_key, *key;
  49009. + coord_t *dup, *next;
  49010. +
  49011. + assert("zam-811", znode_is_write_locked(coord->node));
  49012. + assert("nikita-3315", kdata != NULL);
  49013. + assert("vs-34", kdata->buf != NULL);
  49014. +
  49015. + /* map structures to kdata->buf */
  49016. + min_item_key = (reiser4_key *) (kdata->buf);
  49017. + max_item_key = min_item_key + 1;
  49018. + from_key = max_item_key + 1;
  49019. + to_key = from_key + 1;
  49020. + key = to_key + 1;
  49021. + dup = (coord_t *) (key + 1);
  49022. + next = dup + 1;
  49023. +
  49024. + item_key_by_coord(coord, min_item_key);
  49025. + max_item_key_by_coord(coord, max_item_key);
  49026. +
  49027. + if (kdata->params.from_key) {
  49028. + pfrom_key = kdata->params.from_key;
  49029. + pto_key = kdata->params.to_key;
  49030. + } else {
  49031. + assert("vs-1549", from == coord->unit_pos);
  49032. + unit_key_by_coord(coord, from_key);
  49033. + pfrom_key = from_key;
  49034. +
  49035. + coord_dup(dup, coord);
  49036. + dup->unit_pos = from + count - 1;
  49037. + max_unit_key_by_coord(dup, to_key);
  49038. + pto_key = to_key;
  49039. + }
  49040. +
  49041. + if (!keylt(pto_key, max_item_key)) {
  49042. + if (!keygt(pfrom_key, min_item_key)) {
  49043. + znode *left, *right;
  49044. +
  49045. + /* item is to be removed completely */
  49046. + assert("nikita-3316", kdata->left != NULL
  49047. + && kdata->right != NULL);
  49048. +
  49049. + left = kdata->left->node;
  49050. + right = kdata->right->node;
  49051. +
  49052. + tree = current_tree;
  49053. + /* we have to do two things:
  49054. + *
  49055. + * 1. link left and right formatted neighbors of
  49056. + * extent being removed, and
  49057. + *
  49058. + * 2. update their delimiting keys.
  49059. + *
  49060. + * atomicity of these operations is protected by
  49061. + * taking dk-lock and tree-lock.
  49062. + */
  49063. + /* if neighbors of item being removed are znodes -
  49064. + * link them */
  49065. + write_lock_tree(tree);
  49066. + write_lock_dk(tree);
  49067. + link_left_and_right(left, right);
  49068. + if (left) {
  49069. + /* update right delimiting key of left
  49070. + * neighbor of extent item */
  49071. + /*coord_t next;
  49072. + reiser4_key key; */
  49073. +
  49074. + coord_dup(next, coord);
  49075. +
  49076. + if (coord_next_item(next))
  49077. + *key = *znode_get_rd_key(coord->node);
  49078. + else
  49079. + item_key_by_coord(next, key);
  49080. + znode_set_rd_key(left, key);
  49081. + }
  49082. + write_unlock_dk(tree);
  49083. + write_unlock_tree(tree);
  49084. +
  49085. + from_off =
  49086. + get_key_offset(min_item_key) >> PAGE_SHIFT;
  49087. + to_off =
  49088. + (get_key_offset(max_item_key) +
  49089. + 1) >> PAGE_SHIFT;
  49090. + retval = ITEM_KILLED;
  49091. + } else {
  49092. + /* tail of item is to be removed */
  49093. + from_off =
  49094. + (get_key_offset(pfrom_key) + PAGE_SIZE -
  49095. + 1) >> PAGE_SHIFT;
  49096. + to_off =
  49097. + (get_key_offset(max_item_key) +
  49098. + 1) >> PAGE_SHIFT;
  49099. + retval = ITEM_TAIL_KILLED;
  49100. + }
  49101. + } else {
  49102. + /* head of item is to be removed */
  49103. + assert("vs-1571", keyeq(pfrom_key, min_item_key));
  49104. + assert("vs-1572",
  49105. + (get_key_offset(pfrom_key) & (PAGE_SIZE - 1)) ==
  49106. + 0);
  49107. + assert("vs-1573",
  49108. + ((get_key_offset(pto_key) + 1) & (PAGE_SIZE -
  49109. + 1)) == 0);
  49110. +
  49111. + if (kdata->left->node) {
  49112. + /* update right delimiting key of left neighbor of extent item */
  49113. + /*reiser4_key key; */
  49114. +
  49115. + *key = *pto_key;
  49116. + set_key_offset(key, get_key_offset(pto_key) + 1);
  49117. +
  49118. + write_lock_dk(current_tree);
  49119. + znode_set_rd_key(kdata->left->node, key);
  49120. + write_unlock_dk(current_tree);
  49121. + }
  49122. +
  49123. + from_off = get_key_offset(pfrom_key) >> PAGE_SHIFT;
  49124. + to_off = (get_key_offset(pto_key) + 1) >> PAGE_SHIFT;
  49125. + retval = ITEM_HEAD_KILLED;
  49126. + }
  49127. +
  49128. + inode = kdata->inode;
  49129. + assert("vs-1545", inode != NULL);
  49130. + if (inode != NULL)
  49131. + /* take care of pages and jnodes corresponding to part of item being killed */
  49132. + reiser4_invalidate_pages(inode->i_mapping, from_off,
  49133. + to_off - from_off,
  49134. + kdata->params.truncate);
  49135. +
  49136. + ext = extent_item(coord) + from;
  49137. + offset =
  49138. + (get_key_offset(min_item_key) +
  49139. + reiser4_extent_size(coord, from)) >> PAGE_SHIFT;
  49140. +
  49141. + assert("vs-1551", from_off >= offset);
  49142. + assert("vs-1552", from_off - offset <= extent_get_width(ext));
  49143. + skip = from_off - offset;
  49144. + offset = from_off;
  49145. +
  49146. + while (offset < to_off) {
  49147. + length = extent_get_width(ext) - skip;
  49148. + if (state_of_extent(ext) == HOLE_EXTENT) {
  49149. + skip = 0;
  49150. + offset += length;
  49151. + ext++;
  49152. + continue;
  49153. + }
  49154. +
  49155. + if (offset + length > to_off) {
  49156. + length = to_off - offset;
  49157. + }
  49158. +
  49159. + inode_sub_blocks(inode, length);
  49160. +
  49161. + if (state_of_extent(ext) == UNALLOCATED_EXTENT) {
  49162. + /* some jnodes corresponding to this unallocated extent */
  49163. + fake_allocated2free(length, 0 /* unformatted */ );
  49164. +
  49165. + skip = 0;
  49166. + offset += length;
  49167. + ext++;
  49168. + continue;
  49169. + }
  49170. +
  49171. + assert("vs-1218", state_of_extent(ext) == ALLOCATED_EXTENT);
  49172. +
  49173. + if (length != 0) {
  49174. + start = extent_get_start(ext) + skip;
  49175. +
  49176. + /* BA_DEFER bit parameter is turned on because blocks which get freed are not safe to be freed
  49177. + immediately */
  49178. + reiser4_dealloc_blocks(&start, &length,
  49179. + 0 /* not used */ ,
  49180. + BA_DEFER
  49181. + /* unformatted with defer */ );
  49182. + }
  49183. + skip = 0;
  49184. + offset += length;
  49185. + ext++;
  49186. + }
  49187. + return retval;
  49188. +}
  49189. +
  49190. +/* item_plugin->b.kill_units */
  49191. +int
  49192. +kill_units_extent(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
  49193. + struct carry_kill_data *kdata, reiser4_key * smallest_removed,
  49194. + reiser4_key * new_first)
  49195. +{
  49196. + reiser4_extent *ext;
  49197. + reiser4_key item_key;
  49198. + pos_in_node_t count;
  49199. + reiser4_key from_key, to_key;
  49200. + const reiser4_key *pfrom_key, *pto_key;
  49201. + loff_t off;
  49202. + int result;
  49203. +
  49204. + assert("vs-1541",
  49205. + ((kdata->params.from_key == NULL && kdata->params.to_key == NULL)
  49206. + || (kdata->params.from_key != NULL
  49207. + && kdata->params.to_key != NULL)));
  49208. +
  49209. + if (kdata->params.from_key) {
  49210. + pfrom_key = kdata->params.from_key;
  49211. + pto_key = kdata->params.to_key;
  49212. + } else {
  49213. + coord_t dup;
  49214. +
  49215. + /* calculate key range of kill */
  49216. + assert("vs-1549", from == coord->unit_pos);
  49217. + unit_key_by_coord(coord, &from_key);
  49218. + pfrom_key = &from_key;
  49219. +
  49220. + coord_dup(&dup, coord);
  49221. + dup.unit_pos = to;
  49222. + max_unit_key_by_coord(&dup, &to_key);
  49223. + pto_key = &to_key;
  49224. + }
  49225. +
  49226. + item_key_by_coord(coord, &item_key);
  49227. +
  49228. +#if REISER4_DEBUG
  49229. + {
  49230. + reiser4_key max_item_key;
  49231. +
  49232. + max_item_key_by_coord(coord, &max_item_key);
  49233. +
  49234. + if (new_first) {
  49235. + /* head of item is to be cut */
  49236. + assert("vs-1542", keyeq(pfrom_key, &item_key));
  49237. + assert("vs-1538", keylt(pto_key, &max_item_key));
  49238. + } else {
  49239. + /* tail of item is to be cut */
  49240. + assert("vs-1540", keygt(pfrom_key, &item_key));
  49241. + assert("vs-1543", !keylt(pto_key, &max_item_key));
  49242. + }
  49243. + }
  49244. +#endif
  49245. +
  49246. + if (smallest_removed)
  49247. + *smallest_removed = *pfrom_key;
  49248. +
  49249. + if (new_first) {
  49250. + /* item head is cut. Item key will change. This new key is calculated here */
  49251. + assert("vs-1556",
  49252. + (get_key_offset(pto_key) & (PAGE_SIZE - 1)) ==
  49253. + (PAGE_SIZE - 1));
  49254. + *new_first = *pto_key;
  49255. + set_key_offset(new_first, get_key_offset(new_first) + 1);
  49256. + }
  49257. +
  49258. + count = to - from + 1;
  49259. + result = kill_hook_extent(coord, from, count, kdata);
  49260. + if (result == ITEM_TAIL_KILLED) {
  49261. + assert("vs-1553",
  49262. + get_key_offset(pfrom_key) >=
  49263. + get_key_offset(&item_key) +
  49264. + reiser4_extent_size(coord, from));
  49265. + off =
  49266. + get_key_offset(pfrom_key) -
  49267. + (get_key_offset(&item_key) +
  49268. + reiser4_extent_size(coord, from));
  49269. + if (off) {
  49270. + /* unit @from is to be cut partially. Its width decreases */
  49271. + ext = extent_item(coord) + from;
  49272. + extent_set_width(ext,
  49273. + (off + PAGE_SIZE -
  49274. + 1) >> PAGE_SHIFT);
  49275. + count--;
  49276. + }
  49277. + } else {
  49278. + __u64 max_to_offset;
  49279. + __u64 rest;
  49280. +
  49281. + assert("vs-1575", result == ITEM_HEAD_KILLED);
  49282. + assert("", from == 0);
  49283. + assert("",
  49284. + ((get_key_offset(pto_key) + 1) & (PAGE_SIZE -
  49285. + 1)) == 0);
  49286. + assert("",
  49287. + get_key_offset(pto_key) + 1 >
  49288. + get_key_offset(&item_key) +
  49289. + reiser4_extent_size(coord, to));
  49290. + max_to_offset =
  49291. + get_key_offset(&item_key) +
  49292. + reiser4_extent_size(coord, to + 1) - 1;
  49293. + assert("", get_key_offset(pto_key) <= max_to_offset);
  49294. +
  49295. + rest =
  49296. + (max_to_offset -
  49297. + get_key_offset(pto_key)) >> PAGE_SHIFT;
  49298. + if (rest) {
  49299. + /* unit @to is to be cut partially */
  49300. + ext = extent_item(coord) + to;
  49301. +
  49302. + assert("", extent_get_width(ext) > rest);
  49303. +
  49304. + if (state_of_extent(ext) == ALLOCATED_EXTENT)
  49305. + extent_set_start(ext,
  49306. + extent_get_start(ext) +
  49307. + (extent_get_width(ext) -
  49308. + rest));
  49309. +
  49310. + extent_set_width(ext, rest);
  49311. + count--;
  49312. + }
  49313. + }
  49314. + return count * sizeof(reiser4_extent);
  49315. +}
  49316. +
  49317. +/* item_plugin->b.cut_units
  49318. + this is too similar to kill_units_extent */
  49319. +int
  49320. +cut_units_extent(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
  49321. + struct carry_cut_data *cdata, reiser4_key * smallest_removed,
  49322. + reiser4_key * new_first)
  49323. +{
  49324. + reiser4_extent *ext;
  49325. + reiser4_key item_key;
  49326. + pos_in_node_t count;
  49327. + reiser4_key from_key, to_key;
  49328. + const reiser4_key *pfrom_key, *pto_key;
  49329. + loff_t off;
  49330. +
  49331. + assert("vs-1541",
  49332. + ((cdata->params.from_key == NULL && cdata->params.to_key == NULL)
  49333. + || (cdata->params.from_key != NULL
  49334. + && cdata->params.to_key != NULL)));
  49335. +
  49336. + if (cdata->params.from_key) {
  49337. + pfrom_key = cdata->params.from_key;
  49338. + pto_key = cdata->params.to_key;
  49339. + } else {
  49340. + coord_t dup;
  49341. +
  49342. + /* calculate key range of kill */
  49343. + coord_dup(&dup, coord);
  49344. + dup.unit_pos = from;
  49345. + unit_key_by_coord(&dup, &from_key);
  49346. +
  49347. + dup.unit_pos = to;
  49348. + max_unit_key_by_coord(&dup, &to_key);
  49349. +
  49350. + pfrom_key = &from_key;
  49351. + pto_key = &to_key;
  49352. + }
  49353. +
  49354. + assert("vs-1555",
  49355. + (get_key_offset(pfrom_key) & (PAGE_SIZE - 1)) == 0);
  49356. + assert("vs-1556",
  49357. + (get_key_offset(pto_key) & (PAGE_SIZE - 1)) ==
  49358. + (PAGE_SIZE - 1));
  49359. +
  49360. + item_key_by_coord(coord, &item_key);
  49361. +
  49362. +#if REISER4_DEBUG
  49363. + {
  49364. + reiser4_key max_item_key;
  49365. +
  49366. + assert("vs-1584",
  49367. + get_key_locality(pfrom_key) ==
  49368. + get_key_locality(&item_key));
  49369. + assert("vs-1585",
  49370. + get_key_type(pfrom_key) == get_key_type(&item_key));
  49371. + assert("vs-1586",
  49372. + get_key_objectid(pfrom_key) ==
  49373. + get_key_objectid(&item_key));
  49374. + assert("vs-1587",
  49375. + get_key_ordering(pfrom_key) ==
  49376. + get_key_ordering(&item_key));
  49377. +
  49378. + max_item_key_by_coord(coord, &max_item_key);
  49379. +
  49380. + if (new_first != NULL) {
  49381. + /* head of item is to be cut */
  49382. + assert("vs-1542", keyeq(pfrom_key, &item_key));
  49383. + assert("vs-1538", keylt(pto_key, &max_item_key));
  49384. + } else {
  49385. + /* tail of item is to be cut */
  49386. + assert("vs-1540", keygt(pfrom_key, &item_key));
  49387. + assert("vs-1543", keyeq(pto_key, &max_item_key));
  49388. + }
  49389. + }
  49390. +#endif
  49391. +
  49392. + if (smallest_removed)
  49393. + *smallest_removed = *pfrom_key;
  49394. +
  49395. + if (new_first) {
  49396. + /* item head is cut. Item key will change. This new key is calculated here */
  49397. + *new_first = *pto_key;
  49398. + set_key_offset(new_first, get_key_offset(new_first) + 1);
  49399. + }
  49400. +
  49401. + count = to - from + 1;
  49402. +
  49403. + assert("vs-1553",
  49404. + get_key_offset(pfrom_key) >=
  49405. + get_key_offset(&item_key) + reiser4_extent_size(coord, from));
  49406. + off =
  49407. + get_key_offset(pfrom_key) - (get_key_offset(&item_key) +
  49408. + reiser4_extent_size(coord, from));
  49409. + if (off) {
  49410. + /* tail of unit @from is to be cut partially. Its width decreases */
  49411. + assert("vs-1582", new_first == NULL);
  49412. + ext = extent_item(coord) + from;
  49413. + extent_set_width(ext, off >> PAGE_SHIFT);
  49414. + count--;
  49415. + }
  49416. +
  49417. + assert("vs-1554",
  49418. + get_key_offset(pto_key) <=
  49419. + get_key_offset(&item_key) +
  49420. + reiser4_extent_size(coord, to + 1) - 1);
  49421. + off =
  49422. + (get_key_offset(&item_key) +
  49423. + reiser4_extent_size(coord, to + 1) - 1) -
  49424. + get_key_offset(pto_key);
  49425. + if (off) {
  49426. + /* @to_key is smaller than max key of unit @to. Unit @to will not be removed. It gets start increased
  49427. + and width decreased. */
  49428. + assert("vs-1583", (off & (PAGE_SIZE - 1)) == 0);
  49429. + ext = extent_item(coord) + to;
  49430. + if (state_of_extent(ext) == ALLOCATED_EXTENT)
  49431. + extent_set_start(ext,
  49432. + extent_get_start(ext) +
  49433. + (extent_get_width(ext) -
  49434. + (off >> PAGE_SHIFT)));
  49435. +
  49436. + extent_set_width(ext, (off >> PAGE_SHIFT));
  49437. + count--;
  49438. + }
  49439. + return count * sizeof(reiser4_extent);
  49440. +}
  49441. +
  49442. +/* item_plugin->b.unit_key */
  49443. +reiser4_key *unit_key_extent(const coord_t * coord, reiser4_key * key)
  49444. +{
  49445. + assert("vs-300", coord_is_existing_unit(coord));
  49446. +
  49447. + item_key_by_coord(coord, key);
  49448. + set_key_offset(key,
  49449. + (get_key_offset(key) +
  49450. + reiser4_extent_size(coord, coord->unit_pos)));
  49451. +
  49452. + return key;
  49453. +}
  49454. +
  49455. +/* item_plugin->b.max_unit_key */
  49456. +reiser4_key *max_unit_key_extent(const coord_t * coord, reiser4_key * key)
  49457. +{
  49458. + assert("vs-300", coord_is_existing_unit(coord));
  49459. +
  49460. + item_key_by_coord(coord, key);
  49461. + set_key_offset(key,
  49462. + (get_key_offset(key) +
  49463. + reiser4_extent_size(coord, coord->unit_pos + 1) - 1));
  49464. + return key;
  49465. +}
  49466. +
  49467. +/* item_plugin->b.estimate
  49468. + item_plugin->b.item_data_by_flow */
  49469. +
  49470. +#if REISER4_DEBUG
  49471. +
  49472. +/* item_plugin->b.check
  49473. + used for debugging, every item should have here the most complete
  49474. + possible check of the consistency of the item that the inventor can
  49475. + construct
  49476. +*/
  49477. +int reiser4_check_extent(const coord_t * coord /* coord of item to check */,
  49478. + const char **error /* where to store error message */)
  49479. +{
  49480. + reiser4_extent *ext, *first;
  49481. + unsigned i, j;
  49482. + reiser4_block_nr start, width, blk_cnt;
  49483. + unsigned num_units;
  49484. + reiser4_tree *tree;
  49485. + oid_t oid;
  49486. + reiser4_key key;
  49487. + coord_t scan;
  49488. +
  49489. + assert("vs-933", REISER4_DEBUG);
  49490. +
  49491. + if (znode_get_level(coord->node) != TWIG_LEVEL) {
  49492. + *error = "Extent on the wrong level";
  49493. + return -1;
  49494. + }
  49495. + if (item_length_by_coord(coord) % sizeof(reiser4_extent) != 0) {
  49496. + *error = "Wrong item size";
  49497. + return -1;
  49498. + }
  49499. + ext = first = extent_item(coord);
  49500. + blk_cnt = reiser4_block_count(reiser4_get_current_sb());
  49501. + num_units = coord_num_units(coord);
  49502. + tree = znode_get_tree(coord->node);
  49503. + item_key_by_coord(coord, &key);
  49504. + oid = get_key_objectid(&key);
  49505. + coord_dup(&scan, coord);
  49506. +
  49507. + for (i = 0; i < num_units; ++i, ++ext) {
  49508. + __u64 index;
  49509. +
  49510. + scan.unit_pos = i;
  49511. + index = extent_unit_index(&scan);
  49512. +
  49513. +#if 0
  49514. + /* check that all jnodes are present for the unallocated
  49515. + * extent */
  49516. + if (state_of_extent(ext) == UNALLOCATED_EXTENT) {
  49517. + for (j = 0; j < extent_get_width(ext); j++) {
  49518. + jnode *node;
  49519. +
  49520. + node = jlookup(tree, oid, index + j);
  49521. + if (node == NULL) {
  49522. + print_coord("scan", &scan, 0);
  49523. + *error = "Jnode missing";
  49524. + return -1;
  49525. + }
  49526. + jput(node);
  49527. + }
  49528. + }
  49529. +#endif
  49530. +
  49531. + start = extent_get_start(ext);
  49532. + if (start < 2)
  49533. + continue;
  49534. + /* extent is allocated one */
  49535. + width = extent_get_width(ext);
  49536. + if (start >= blk_cnt) {
  49537. + *error = "Start too large";
  49538. + return -1;
  49539. + }
  49540. + if (start + width > blk_cnt) {
  49541. + *error = "End too large";
  49542. + return -1;
  49543. + }
  49544. + /* make sure that this extent does not overlap with other
  49545. + allocated extents extents */
  49546. + for (j = 0; j < i; j++) {
  49547. + if (state_of_extent(first + j) != ALLOCATED_EXTENT)
  49548. + continue;
  49549. + if (!
  49550. + ((extent_get_start(ext) >=
  49551. + extent_get_start(first + j) +
  49552. + extent_get_width(first + j))
  49553. + || (extent_get_start(ext) +
  49554. + extent_get_width(ext) <=
  49555. + extent_get_start(first + j)))) {
  49556. + *error = "Extent overlaps with others";
  49557. + return -1;
  49558. + }
  49559. + }
  49560. +
  49561. + }
  49562. +
  49563. + return 0;
  49564. +}
  49565. +
  49566. +#endif /* REISER4_DEBUG */
  49567. +
  49568. +/*
  49569. + Local variables:
  49570. + c-indentation-style: "K&R"
  49571. + mode-name: "LC"
  49572. + c-basic-offset: 8
  49573. + tab-width: 8
  49574. + fill-column: 120
  49575. + scroll-step: 1
  49576. + End:
  49577. +*/
  49578. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/item/internal.c linux-4.14.2/fs/reiser4/plugin/item/internal.c
  49579. --- linux-4.14.2.orig/fs/reiser4/plugin/item/internal.c 1970-01-01 01:00:00.000000000 +0100
  49580. +++ linux-4.14.2/fs/reiser4/plugin/item/internal.c 2017-11-26 22:13:09.000000000 +0100
  49581. @@ -0,0 +1,404 @@
  49582. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  49583. +
  49584. +/* Implementation of internal-item plugin methods. */
  49585. +
  49586. +#include "../../forward.h"
  49587. +#include "../../debug.h"
  49588. +#include "../../dformat.h"
  49589. +#include "../../key.h"
  49590. +#include "../../coord.h"
  49591. +#include "internal.h"
  49592. +#include "item.h"
  49593. +#include "../node/node.h"
  49594. +#include "../plugin.h"
  49595. +#include "../../jnode.h"
  49596. +#include "../../znode.h"
  49597. +#include "../../tree_walk.h"
  49598. +#include "../../tree_mod.h"
  49599. +#include "../../tree.h"
  49600. +#include "../../super.h"
  49601. +#include "../../block_alloc.h"
  49602. +
  49603. +/* see internal.h for explanation */
  49604. +
  49605. +/* plugin->u.item.b.mergeable */
  49606. +int mergeable_internal(const coord_t * p1 UNUSED_ARG /* first item */ ,
  49607. + const coord_t * p2 UNUSED_ARG /* second item */ )
  49608. +{
  49609. + /* internal items are not mergeable */
  49610. + return 0;
  49611. +}
  49612. +
  49613. +/* ->lookup() method for internal items */
  49614. +lookup_result lookup_internal(const reiser4_key * key /* key to look up */ ,
  49615. + lookup_bias bias UNUSED_ARG /* lookup bias */ ,
  49616. + coord_t * coord /* coord of item */ )
  49617. +{
  49618. + reiser4_key ukey;
  49619. +
  49620. + switch (keycmp(unit_key_by_coord(coord, &ukey), key)) {
  49621. + default:
  49622. + impossible("", "keycmp()?!");
  49623. + case LESS_THAN:
  49624. + /* FIXME-VS: AFTER_ITEM used to be here. But with new coord
  49625. + item plugin can not be taken using coord set this way */
  49626. + assert("vs-681", coord->unit_pos == 0);
  49627. + coord->between = AFTER_UNIT;
  49628. + case EQUAL_TO:
  49629. + return CBK_COORD_FOUND;
  49630. + case GREATER_THAN:
  49631. + return CBK_COORD_NOTFOUND;
  49632. + }
  49633. +}
  49634. +
  49635. +/* return body of internal item at @coord */
  49636. +static internal_item_layout *internal_at(const coord_t * coord /* coord of
  49637. + * item */ )
  49638. +{
  49639. + assert("nikita-607", coord != NULL);
  49640. + assert("nikita-1650",
  49641. + item_plugin_by_coord(coord) ==
  49642. + item_plugin_by_id(NODE_POINTER_ID));
  49643. + return (internal_item_layout *) item_body_by_coord(coord);
  49644. +}
  49645. +
  49646. +void reiser4_update_internal(const coord_t * coord,
  49647. + const reiser4_block_nr * blocknr)
  49648. +{
  49649. + internal_item_layout *item = internal_at(coord);
  49650. + assert("nikita-2959", reiser4_blocknr_is_sane(blocknr));
  49651. +
  49652. + put_unaligned(cpu_to_le64(*blocknr), &item->pointer);
  49653. +}
  49654. +
  49655. +/* return child block number stored in the internal item at @coord */
  49656. +static reiser4_block_nr pointer_at(const coord_t * coord /* coord of item */ )
  49657. +{
  49658. + assert("nikita-608", coord != NULL);
  49659. + return le64_to_cpu(get_unaligned(&internal_at(coord)->pointer));
  49660. +}
  49661. +
  49662. +/* get znode pointed to by internal @item */
  49663. +static znode *znode_at(const coord_t * item /* coord of item */ ,
  49664. + znode * parent /* parent node */ )
  49665. +{
  49666. + return child_znode(item, parent, 1, 0);
  49667. +}
  49668. +
  49669. +/* store pointer from internal item into "block". Implementation of
  49670. + ->down_link() method */
  49671. +void down_link_internal(const coord_t * coord /* coord of item */ ,
  49672. + const reiser4_key * key UNUSED_ARG /* key to get
  49673. + * pointer for */ ,
  49674. + reiser4_block_nr * block /* resulting block number */ )
  49675. +{
  49676. + ON_DEBUG(reiser4_key item_key);
  49677. +
  49678. + assert("nikita-609", coord != NULL);
  49679. + assert("nikita-611", block != NULL);
  49680. + assert("nikita-612", (key == NULL) ||
  49681. + /* twig horrors */
  49682. + (znode_get_level(coord->node) == TWIG_LEVEL)
  49683. + || keyle(item_key_by_coord(coord, &item_key), key));
  49684. +
  49685. + *block = pointer_at(coord);
  49686. + assert("nikita-2960", reiser4_blocknr_is_sane(block));
  49687. +}
  49688. +
  49689. +/* Get the child's block number, or 0 if the block is unallocated. */
  49690. +int
  49691. +utmost_child_real_block_internal(const coord_t * coord, sideof side UNUSED_ARG,
  49692. + reiser4_block_nr * block)
  49693. +{
  49694. + assert("jmacd-2059", coord != NULL);
  49695. +
  49696. + *block = pointer_at(coord);
  49697. + assert("nikita-2961", reiser4_blocknr_is_sane(block));
  49698. +
  49699. + if (reiser4_blocknr_is_fake(block)) {
  49700. + *block = 0;
  49701. + }
  49702. +
  49703. + return 0;
  49704. +}
  49705. +
  49706. +/* Return the child. */
  49707. +int
  49708. +utmost_child_internal(const coord_t * coord, sideof side UNUSED_ARG,
  49709. + jnode ** childp)
  49710. +{
  49711. + reiser4_block_nr block = pointer_at(coord);
  49712. + znode *child;
  49713. +
  49714. + assert("jmacd-2059", childp != NULL);
  49715. + assert("nikita-2962", reiser4_blocknr_is_sane(&block));
  49716. +
  49717. + child = zlook(znode_get_tree(coord->node), &block);
  49718. +
  49719. + if (IS_ERR(child)) {
  49720. + return PTR_ERR(child);
  49721. + }
  49722. +
  49723. + *childp = ZJNODE(child);
  49724. +
  49725. + return 0;
  49726. +}
  49727. +
  49728. +#if REISER4_DEBUG
  49729. +
  49730. +static void check_link(znode * left, znode * right)
  49731. +{
  49732. + znode *scan;
  49733. +
  49734. + for (scan = left; scan != right; scan = scan->right) {
  49735. + if (ZF_ISSET(scan, JNODE_RIP))
  49736. + break;
  49737. + if (znode_is_right_connected(scan) && scan->right != NULL) {
  49738. + if (ZF_ISSET(scan->right, JNODE_RIP))
  49739. + break;
  49740. + assert("nikita-3285",
  49741. + znode_is_left_connected(scan->right));
  49742. + assert("nikita-3265",
  49743. + ergo(scan != left,
  49744. + ZF_ISSET(scan, JNODE_HEARD_BANSHEE)));
  49745. + assert("nikita-3284", scan->right->left == scan);
  49746. + } else
  49747. + break;
  49748. + }
  49749. +}
  49750. +
  49751. +int check__internal(const coord_t * coord, const char **error)
  49752. +{
  49753. + reiser4_block_nr blk;
  49754. + znode *child;
  49755. + coord_t cpy;
  49756. +
  49757. + blk = pointer_at(coord);
  49758. + if (!reiser4_blocknr_is_sane(&blk)) {
  49759. + *error = "Invalid pointer";
  49760. + return -1;
  49761. + }
  49762. + coord_dup(&cpy, coord);
  49763. + child = znode_at(&cpy, cpy.node);
  49764. + if (child != NULL) {
  49765. + znode *left_child;
  49766. + znode *right_child;
  49767. +
  49768. + left_child = right_child = NULL;
  49769. +
  49770. + assert("nikita-3256", znode_invariant(child));
  49771. + if (coord_prev_item(&cpy) == 0 && item_is_internal(&cpy)) {
  49772. + left_child = znode_at(&cpy, cpy.node);
  49773. + if (left_child != NULL) {
  49774. + read_lock_tree(znode_get_tree(child));
  49775. + check_link(left_child, child);
  49776. + read_unlock_tree(znode_get_tree(child));
  49777. + zput(left_child);
  49778. + }
  49779. + }
  49780. + coord_dup(&cpy, coord);
  49781. + if (coord_next_item(&cpy) == 0 && item_is_internal(&cpy)) {
  49782. + right_child = znode_at(&cpy, cpy.node);
  49783. + if (right_child != NULL) {
  49784. + read_lock_tree(znode_get_tree(child));
  49785. + check_link(child, right_child);
  49786. + read_unlock_tree(znode_get_tree(child));
  49787. + zput(right_child);
  49788. + }
  49789. + }
  49790. + zput(child);
  49791. + }
  49792. + return 0;
  49793. +}
  49794. +
  49795. +#endif /* REISER4_DEBUG */
  49796. +
  49797. +/* return true only if this item really points to "block" */
  49798. +/* Audited by: green(2002.06.14) */
  49799. +int has_pointer_to_internal(const coord_t * coord /* coord of item */ ,
  49800. + const reiser4_block_nr * block /* block number to
  49801. + * check */ )
  49802. +{
  49803. + assert("nikita-613", coord != NULL);
  49804. + assert("nikita-614", block != NULL);
  49805. +
  49806. + return pointer_at(coord) == *block;
  49807. +}
  49808. +
  49809. +/* hook called by ->create_item() method of node plugin after new internal
  49810. + item was just created.
  49811. +
  49812. + This is point where pointer to new node is inserted into tree. Initialize
  49813. + parent pointer in child znode, insert child into sibling list and slum.
  49814. +
  49815. +*/
  49816. +int create_hook_internal(const coord_t * item /* coord of item */ ,
  49817. + void *arg /* child's left neighbor, if any */ )
  49818. +{
  49819. + znode *child;
  49820. + __u64 child_ptr;
  49821. +
  49822. + assert("nikita-1252", item != NULL);
  49823. + assert("nikita-1253", item->node != NULL);
  49824. + assert("nikita-1181", znode_get_level(item->node) > LEAF_LEVEL);
  49825. + assert("nikita-1450", item->unit_pos == 0);
  49826. +
  49827. + /*
  49828. + * preparing to item insertion build_child_ptr_data sets pointer to
  49829. + * data to be inserted to jnode's blocknr which is in cpu byte
  49830. + * order. Node's create_item simply copied those data. As result we
  49831. + * have child pointer in cpu's byte order. Convert content of internal
  49832. + * item to little endian byte order.
  49833. + */
  49834. + child_ptr = get_unaligned((__u64 *)item_body_by_coord(item));
  49835. + reiser4_update_internal(item, &child_ptr);
  49836. +
  49837. + child = znode_at(item, item->node);
  49838. + if (child != NULL && !IS_ERR(child)) {
  49839. + znode *left;
  49840. + int result = 0;
  49841. + reiser4_tree *tree;
  49842. +
  49843. + left = arg;
  49844. + tree = znode_get_tree(item->node);
  49845. + write_lock_tree(tree);
  49846. + write_lock_dk(tree);
  49847. + assert("nikita-1400", (child->in_parent.node == NULL)
  49848. + || (znode_above_root(child->in_parent.node)));
  49849. + ++item->node->c_count;
  49850. + coord_to_parent_coord(item, &child->in_parent);
  49851. + sibling_list_insert_nolock(child, left);
  49852. +
  49853. + assert("nikita-3297", ZF_ISSET(child, JNODE_ORPHAN));
  49854. + ZF_CLR(child, JNODE_ORPHAN);
  49855. +
  49856. + if ((left != NULL) && !keyeq(znode_get_rd_key(left),
  49857. + znode_get_rd_key(child))) {
  49858. + znode_set_rd_key(child, znode_get_rd_key(left));
  49859. + }
  49860. + write_unlock_dk(tree);
  49861. + write_unlock_tree(tree);
  49862. + zput(child);
  49863. + return result;
  49864. + } else {
  49865. + if (child == NULL)
  49866. + child = ERR_PTR(-EIO);
  49867. + return PTR_ERR(child);
  49868. + }
  49869. +}
  49870. +
  49871. +/* hook called by ->cut_and_kill() method of node plugin just before internal
  49872. + item is removed.
  49873. +
  49874. + This is point where empty node is removed from the tree. Clear parent
  49875. + pointer in child, and mark node for pending deletion.
  49876. +
  49877. + Node will be actually deleted later and in several installations:
  49878. +
  49879. + . when last lock on this node will be released, node will be removed from
  49880. + the sibling list and its lock will be invalidated
  49881. +
  49882. + . when last reference to this node will be dropped, bitmap will be updated
  49883. + and node will be actually removed from the memory.
  49884. +
  49885. +*/
  49886. +int kill_hook_internal(const coord_t * item /* coord of item */ ,
  49887. + pos_in_node_t from UNUSED_ARG /* start unit */ ,
  49888. + pos_in_node_t count UNUSED_ARG /* stop unit */ ,
  49889. + struct carry_kill_data *p UNUSED_ARG)
  49890. +{
  49891. + znode *child;
  49892. + int result = 0;
  49893. +
  49894. + assert("nikita-1222", item != NULL);
  49895. + assert("nikita-1224", from == 0);
  49896. + assert("nikita-1225", count == 1);
  49897. +
  49898. + child = znode_at(item, item->node);
  49899. + if (child == NULL)
  49900. + return 0;
  49901. + if (IS_ERR(child))
  49902. + return PTR_ERR(child);
  49903. + result = zload(child);
  49904. + if (result) {
  49905. + zput(child);
  49906. + return result;
  49907. + }
  49908. + if (node_is_empty(child)) {
  49909. + reiser4_tree *tree;
  49910. +
  49911. + assert("nikita-1397", znode_is_write_locked(child));
  49912. + assert("nikita-1398", child->c_count == 0);
  49913. + assert("nikita-2546", ZF_ISSET(child, JNODE_HEARD_BANSHEE));
  49914. +
  49915. + tree = znode_get_tree(item->node);
  49916. + write_lock_tree(tree);
  49917. + init_parent_coord(&child->in_parent, NULL);
  49918. + --item->node->c_count;
  49919. + write_unlock_tree(tree);
  49920. + } else {
  49921. + warning("nikita-1223",
  49922. + "Cowardly refuse to remove link to non-empty node");
  49923. + result = RETERR(-EIO);
  49924. + }
  49925. + zrelse(child);
  49926. + zput(child);
  49927. + return result;
  49928. +}
  49929. +
  49930. +/* hook called by ->shift() node plugin method when iternal item was just
  49931. + moved from one node to another.
  49932. +
  49933. + Update parent pointer in child and c_counts in old and new parent
  49934. +
  49935. +*/
  49936. +int shift_hook_internal(const coord_t * item /* coord of item */ ,
  49937. + unsigned from UNUSED_ARG /* start unit */ ,
  49938. + unsigned count UNUSED_ARG /* stop unit */ ,
  49939. + znode * old_node /* old parent */ )
  49940. +{
  49941. + znode *child;
  49942. + znode *new_node;
  49943. + reiser4_tree *tree;
  49944. +
  49945. + assert("nikita-1276", item != NULL);
  49946. + assert("nikita-1277", from == 0);
  49947. + assert("nikita-1278", count == 1);
  49948. + assert("nikita-1451", item->unit_pos == 0);
  49949. +
  49950. + new_node = item->node;
  49951. + assert("nikita-2132", new_node != old_node);
  49952. + tree = znode_get_tree(item->node);
  49953. + child = child_znode(item, old_node, 1, 0);
  49954. + if (child == NULL)
  49955. + return 0;
  49956. + if (!IS_ERR(child)) {
  49957. + write_lock_tree(tree);
  49958. + ++new_node->c_count;
  49959. + assert("nikita-1395", znode_parent(child) == old_node);
  49960. + assert("nikita-1396", old_node->c_count > 0);
  49961. + coord_to_parent_coord(item, &child->in_parent);
  49962. + assert("nikita-1781", znode_parent(child) == new_node);
  49963. + assert("nikita-1782",
  49964. + check_tree_pointer(item, child) == NS_FOUND);
  49965. + --old_node->c_count;
  49966. + write_unlock_tree(tree);
  49967. + zput(child);
  49968. + return 0;
  49969. + } else
  49970. + return PTR_ERR(child);
  49971. +}
  49972. +
  49973. +/* plugin->u.item.b.max_key_inside - not defined */
  49974. +
  49975. +/* plugin->u.item.b.nr_units - item.c:single_unit */
  49976. +
  49977. +/* Make Linus happy.
  49978. + Local variables:
  49979. + c-indentation-style: "K&R"
  49980. + mode-name: "LC"
  49981. + c-basic-offset: 8
  49982. + tab-width: 8
  49983. + fill-column: 120
  49984. + End:
  49985. +*/
  49986. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/item/internal.h linux-4.14.2/fs/reiser4/plugin/item/internal.h
  49987. --- linux-4.14.2.orig/fs/reiser4/plugin/item/internal.h 1970-01-01 01:00:00.000000000 +0100
  49988. +++ linux-4.14.2/fs/reiser4/plugin/item/internal.h 2017-11-26 22:13:09.000000000 +0100
  49989. @@ -0,0 +1,57 @@
  49990. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  49991. +/* Internal item contains down-link to the child of the internal/twig
  49992. + node in a tree. It is internal items that are actually used during
  49993. + tree traversal. */
  49994. +
  49995. +#if !defined( __FS_REISER4_PLUGIN_ITEM_INTERNAL_H__ )
  49996. +#define __FS_REISER4_PLUGIN_ITEM_INTERNAL_H__
  49997. +
  49998. +#include "../../forward.h"
  49999. +#include "../../dformat.h"
  50000. +
  50001. +/* on-disk layout of internal item */
  50002. +typedef struct internal_item_layout {
  50003. + /* 0 */ reiser4_dblock_nr pointer;
  50004. + /* 4 */
  50005. +} internal_item_layout;
  50006. +
  50007. +struct cut_list;
  50008. +
  50009. +int mergeable_internal(const coord_t * p1, const coord_t * p2);
  50010. +lookup_result lookup_internal(const reiser4_key * key, lookup_bias bias,
  50011. + coord_t * coord);
  50012. +/* store pointer from internal item into "block". Implementation of
  50013. + ->down_link() method */
  50014. +extern void down_link_internal(const coord_t * coord, const reiser4_key * key,
  50015. + reiser4_block_nr * block);
  50016. +extern int has_pointer_to_internal(const coord_t * coord,
  50017. + const reiser4_block_nr * block);
  50018. +extern int create_hook_internal(const coord_t * item, void *arg);
  50019. +extern int kill_hook_internal(const coord_t * item, pos_in_node_t from,
  50020. + pos_in_node_t count, struct carry_kill_data *);
  50021. +extern int shift_hook_internal(const coord_t * item, unsigned from,
  50022. + unsigned count, znode * old_node);
  50023. +extern void reiser4_print_internal(const char *prefix, coord_t * coord);
  50024. +
  50025. +extern int utmost_child_internal(const coord_t * coord, sideof side,
  50026. + jnode ** child);
  50027. +int utmost_child_real_block_internal(const coord_t * coord, sideof side,
  50028. + reiser4_block_nr * block);
  50029. +
  50030. +extern void reiser4_update_internal(const coord_t * coord,
  50031. + const reiser4_block_nr * blocknr);
  50032. +/* FIXME: reiserfs has check_internal */
  50033. +extern int check__internal(const coord_t * coord, const char **error);
  50034. +
  50035. +/* __FS_REISER4_PLUGIN_ITEM_INTERNAL_H__ */
  50036. +#endif
  50037. +
  50038. +/* Make Linus happy.
  50039. + Local variables:
  50040. + c-indentation-style: "K&R"
  50041. + mode-name: "LC"
  50042. + c-basic-offset: 8
  50043. + tab-width: 8
  50044. + fill-column: 120
  50045. + End:
  50046. +*/
  50047. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/item/item.c linux-4.14.2/fs/reiser4/plugin/item/item.c
  50048. --- linux-4.14.2.orig/fs/reiser4/plugin/item/item.c 1970-01-01 01:00:00.000000000 +0100
  50049. +++ linux-4.14.2/fs/reiser4/plugin/item/item.c 2017-11-26 22:13:09.000000000 +0100
  50050. @@ -0,0 +1,719 @@
  50051. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  50052. +
  50053. +/* definition of item plugins. */
  50054. +
  50055. +#include "../../forward.h"
  50056. +#include "../../debug.h"
  50057. +#include "../../key.h"
  50058. +#include "../../coord.h"
  50059. +#include "../plugin_header.h"
  50060. +#include "sde.h"
  50061. +#include "internal.h"
  50062. +#include "item.h"
  50063. +#include "static_stat.h"
  50064. +#include "../plugin.h"
  50065. +#include "../../znode.h"
  50066. +#include "../../tree.h"
  50067. +#include "../../context.h"
  50068. +#include "ctail.h"
  50069. +
  50070. +/* return pointer to item body */
  50071. +void item_body_by_coord_hard(coord_t * coord /* coord to query */ )
  50072. +{
  50073. + assert("nikita-324", coord != NULL);
  50074. + assert("nikita-325", coord->node != NULL);
  50075. + assert("nikita-326", znode_is_loaded(coord->node));
  50076. + assert("nikita-3200", coord->offset == INVALID_OFFSET);
  50077. +
  50078. + coord->offset =
  50079. + node_plugin_by_node(coord->node)->item_by_coord(coord) -
  50080. + zdata(coord->node);
  50081. + ON_DEBUG(coord->body_v = coord->node->times_locked);
  50082. +}
  50083. +
  50084. +void *item_body_by_coord_easy(const coord_t * coord /* coord to query */ )
  50085. +{
  50086. + return zdata(coord->node) + coord->offset;
  50087. +}
  50088. +
  50089. +#if REISER4_DEBUG
  50090. +
  50091. +int item_body_is_valid(const coord_t * coord)
  50092. +{
  50093. + return
  50094. + coord->offset ==
  50095. + node_plugin_by_node(coord->node)->item_by_coord(coord) -
  50096. + zdata(coord->node);
  50097. +}
  50098. +
  50099. +#endif
  50100. +
  50101. +/* return length of item at @coord */
  50102. +pos_in_node_t item_length_by_coord(const coord_t * coord /* coord to query */ )
  50103. +{
  50104. + int len;
  50105. +
  50106. + assert("nikita-327", coord != NULL);
  50107. + assert("nikita-328", coord->node != NULL);
  50108. + assert("nikita-329", znode_is_loaded(coord->node));
  50109. +
  50110. + len = node_plugin_by_node(coord->node)->length_by_coord(coord);
  50111. + return len;
  50112. +}
  50113. +
  50114. +void obtain_item_plugin(const coord_t * coord)
  50115. +{
  50116. + assert("nikita-330", coord != NULL);
  50117. + assert("nikita-331", coord->node != NULL);
  50118. + assert("nikita-332", znode_is_loaded(coord->node));
  50119. +
  50120. + coord_set_iplug((coord_t *) coord,
  50121. + node_plugin_by_node(coord->node)->
  50122. + plugin_by_coord(coord));
  50123. + assert("nikita-2479",
  50124. + coord_iplug(coord) ==
  50125. + node_plugin_by_node(coord->node)->plugin_by_coord(coord));
  50126. +}
  50127. +
  50128. +/* return id of item */
  50129. +/* Audited by: green(2002.06.15) */
  50130. +item_id item_id_by_coord(const coord_t * coord /* coord to query */ )
  50131. +{
  50132. + assert("vs-539", coord != NULL);
  50133. + assert("vs-538", coord->node != NULL);
  50134. + assert("vs-537", znode_is_loaded(coord->node));
  50135. + assert("vs-536", item_plugin_by_coord(coord) != NULL);
  50136. + assert("vs-540",
  50137. + item_id_by_plugin(item_plugin_by_coord(coord)) < LAST_ITEM_ID);
  50138. +
  50139. + return item_id_by_plugin(item_plugin_by_coord(coord));
  50140. +}
  50141. +
  50142. +/* return key of item at @coord */
  50143. +/* Audited by: green(2002.06.15) */
  50144. +reiser4_key *item_key_by_coord(const coord_t * coord /* coord to query */ ,
  50145. + reiser4_key * key /* result */ )
  50146. +{
  50147. + assert("nikita-338", coord != NULL);
  50148. + assert("nikita-339", coord->node != NULL);
  50149. + assert("nikita-340", znode_is_loaded(coord->node));
  50150. +
  50151. + return node_plugin_by_node(coord->node)->key_at(coord, key);
  50152. +}
  50153. +
  50154. +/* this returns max key in the item */
  50155. +reiser4_key *max_item_key_by_coord(const coord_t * coord /* coord to query */ ,
  50156. + reiser4_key * key /* result */ )
  50157. +{
  50158. + coord_t last;
  50159. +
  50160. + assert("nikita-338", coord != NULL);
  50161. + assert("nikita-339", coord->node != NULL);
  50162. + assert("nikita-340", znode_is_loaded(coord->node));
  50163. +
  50164. + /* make coord pointing to last item's unit */
  50165. + coord_dup(&last, coord);
  50166. + last.unit_pos = coord_num_units(&last) - 1;
  50167. + assert("vs-1560", coord_is_existing_unit(&last));
  50168. +
  50169. + max_unit_key_by_coord(&last, key);
  50170. + return key;
  50171. +}
  50172. +
  50173. +/* return key of unit at @coord */
  50174. +reiser4_key *unit_key_by_coord(const coord_t * coord /* coord to query */ ,
  50175. + reiser4_key * key /* result */ )
  50176. +{
  50177. + assert("nikita-772", coord != NULL);
  50178. + assert("nikita-774", coord->node != NULL);
  50179. + assert("nikita-775", znode_is_loaded(coord->node));
  50180. +
  50181. + if (item_plugin_by_coord(coord)->b.unit_key != NULL)
  50182. + return item_plugin_by_coord(coord)->b.unit_key(coord, key);
  50183. + else
  50184. + return item_key_by_coord(coord, key);
  50185. +}
  50186. +
  50187. +/* return the biggest key contained the unit @coord */
  50188. +reiser4_key *max_unit_key_by_coord(const coord_t * coord /* coord to query */ ,
  50189. + reiser4_key * key /* result */ )
  50190. +{
  50191. + assert("nikita-772", coord != NULL);
  50192. + assert("nikita-774", coord->node != NULL);
  50193. + assert("nikita-775", znode_is_loaded(coord->node));
  50194. +
  50195. + if (item_plugin_by_coord(coord)->b.max_unit_key != NULL)
  50196. + return item_plugin_by_coord(coord)->b.max_unit_key(coord, key);
  50197. + else
  50198. + return unit_key_by_coord(coord, key);
  50199. +}
  50200. +
  50201. +/* ->max_key_inside() method for items consisting of exactly one key (like
  50202. + stat-data) */
  50203. +static reiser4_key *max_key_inside_single_key(const coord_t *
  50204. + coord /* coord of item */ ,
  50205. + reiser4_key *
  50206. + result /* resulting key */ )
  50207. +{
  50208. + assert("nikita-604", coord != NULL);
  50209. +
  50210. + /* coord -> key is starting key of this item and it has to be already
  50211. + filled in */
  50212. + return unit_key_by_coord(coord, result);
  50213. +}
  50214. +
  50215. +/* ->nr_units() method for items consisting of exactly one unit always */
  50216. +pos_in_node_t
  50217. +nr_units_single_unit(const coord_t * coord UNUSED_ARG /* coord of item */ )
  50218. +{
  50219. + return 1;
  50220. +}
  50221. +
  50222. +static int
  50223. +paste_no_paste(coord_t * coord UNUSED_ARG,
  50224. + reiser4_item_data * data UNUSED_ARG,
  50225. + carry_plugin_info * info UNUSED_ARG)
  50226. +{
  50227. + return 0;
  50228. +}
  50229. +
  50230. +/* default ->fast_paste() method */
  50231. +static int
  50232. +agree_to_fast_op(const coord_t * coord UNUSED_ARG /* coord of item */ )
  50233. +{
  50234. + return 1;
  50235. +}
  50236. +
  50237. +int item_can_contain_key(const coord_t * item /* coord of item */ ,
  50238. + const reiser4_key * key /* key to check */ ,
  50239. + const reiser4_item_data * data /* parameters of item
  50240. + * being created */ )
  50241. +{
  50242. + item_plugin *iplug;
  50243. + reiser4_key min_key_in_item;
  50244. + reiser4_key max_key_in_item;
  50245. +
  50246. + assert("nikita-1658", item != NULL);
  50247. + assert("nikita-1659", key != NULL);
  50248. +
  50249. + iplug = item_plugin_by_coord(item);
  50250. + if (iplug->b.can_contain_key != NULL)
  50251. + return iplug->b.can_contain_key(item, key, data);
  50252. + else {
  50253. + assert("nikita-1681", iplug->b.max_key_inside != NULL);
  50254. + item_key_by_coord(item, &min_key_in_item);
  50255. + iplug->b.max_key_inside(item, &max_key_in_item);
  50256. +
  50257. + /* can contain key if
  50258. + min_key_in_item <= key &&
  50259. + key <= max_key_in_item
  50260. + */
  50261. + return keyle(&min_key_in_item, key)
  50262. + && keyle(key, &max_key_in_item);
  50263. + }
  50264. +}
  50265. +
  50266. +/* mergeable method for non mergeable items */
  50267. +static int
  50268. +not_mergeable(const coord_t * i1 UNUSED_ARG, const coord_t * i2 UNUSED_ARG)
  50269. +{
  50270. + return 0;
  50271. +}
  50272. +
  50273. +/* return 0 if @item1 and @item2 are not mergeable, !0 - otherwise */
  50274. +int are_items_mergeable(const coord_t * i1 /* coord of first item */ ,
  50275. + const coord_t * i2 /* coord of second item */ )
  50276. +{
  50277. + item_plugin *iplug;
  50278. + reiser4_key k1;
  50279. + reiser4_key k2;
  50280. +
  50281. + assert("nikita-1336", i1 != NULL);
  50282. + assert("nikita-1337", i2 != NULL);
  50283. +
  50284. + iplug = item_plugin_by_coord(i1);
  50285. + assert("nikita-1338", iplug != NULL);
  50286. +
  50287. + /* NOTE-NIKITA are_items_mergeable() is also called by assertions in
  50288. + shifting code when nodes are in "suspended" state. */
  50289. + assert("nikita-1663",
  50290. + keyle(item_key_by_coord(i1, &k1), item_key_by_coord(i2, &k2)));
  50291. +
  50292. + if (iplug->b.mergeable != NULL) {
  50293. + return iplug->b.mergeable(i1, i2);
  50294. + } else if (iplug->b.max_key_inside != NULL) {
  50295. + iplug->b.max_key_inside(i1, &k1);
  50296. + item_key_by_coord(i2, &k2);
  50297. +
  50298. + /* mergeable if ->max_key_inside() >= key of i2; */
  50299. + return keyge(iplug->b.max_key_inside(i1, &k1),
  50300. + item_key_by_coord(i2, &k2));
  50301. + } else {
  50302. + item_key_by_coord(i1, &k1);
  50303. + item_key_by_coord(i2, &k2);
  50304. +
  50305. + return
  50306. + (get_key_locality(&k1) == get_key_locality(&k2)) &&
  50307. + (get_key_objectid(&k1) == get_key_objectid(&k2))
  50308. + && (iplug == item_plugin_by_coord(i2));
  50309. + }
  50310. +}
  50311. +
  50312. +int item_is_extent(const coord_t * item)
  50313. +{
  50314. + assert("vs-482", coord_is_existing_item(item));
  50315. + return item_id_by_coord(item) == EXTENT_POINTER_ID;
  50316. +}
  50317. +
  50318. +int item_is_tail(const coord_t * item)
  50319. +{
  50320. + assert("vs-482", coord_is_existing_item(item));
  50321. + return item_id_by_coord(item) == FORMATTING_ID;
  50322. +}
  50323. +
  50324. +#if REISER4_DEBUG
  50325. +
  50326. +int item_is_statdata(const coord_t * item)
  50327. +{
  50328. + assert("vs-516", coord_is_existing_item(item));
  50329. + return plugin_of_group(item_plugin_by_coord(item), STAT_DATA_ITEM_TYPE);
  50330. +}
  50331. +
  50332. +int item_is_ctail(const coord_t * item)
  50333. +{
  50334. + assert("edward-xx", coord_is_existing_item(item));
  50335. + return item_id_by_coord(item) == CTAIL_ID;
  50336. +}
  50337. +
  50338. +#endif /* REISER4_DEBUG */
  50339. +
  50340. +static int change_item(struct inode *inode,
  50341. + reiser4_plugin * plugin,
  50342. + pset_member memb)
  50343. +{
  50344. + /* cannot change constituent item (sd, or dir_item) */
  50345. + return RETERR(-EINVAL);
  50346. +}
  50347. +
  50348. +static reiser4_plugin_ops item_plugin_ops = {
  50349. + .init = NULL,
  50350. + .load = NULL,
  50351. + .save_len = NULL,
  50352. + .save = NULL,
  50353. + .change = change_item
  50354. +};
  50355. +
  50356. +item_plugin item_plugins[LAST_ITEM_ID] = {
  50357. + [STATIC_STAT_DATA_ID] = {
  50358. + .h = {
  50359. + .type_id = REISER4_ITEM_PLUGIN_TYPE,
  50360. + .id = STATIC_STAT_DATA_ID,
  50361. + .groups = (1 << STAT_DATA_ITEM_TYPE),
  50362. + .pops = &item_plugin_ops,
  50363. + .label = "sd",
  50364. + .desc = "stat-data",
  50365. + .linkage = {NULL, NULL}
  50366. + },
  50367. + .b = {
  50368. + .max_key_inside = max_key_inside_single_key,
  50369. + .can_contain_key = NULL,
  50370. + .mergeable = not_mergeable,
  50371. + .nr_units = nr_units_single_unit,
  50372. + .lookup = NULL,
  50373. + .init = NULL,
  50374. + .paste = paste_no_paste,
  50375. + .fast_paste = NULL,
  50376. + .can_shift = NULL,
  50377. + .copy_units = NULL,
  50378. + .create_hook = NULL,
  50379. + .kill_hook = NULL,
  50380. + .shift_hook = NULL,
  50381. + .cut_units = NULL,
  50382. + .kill_units = NULL,
  50383. + .unit_key = NULL,
  50384. + .max_unit_key = NULL,
  50385. + .estimate = NULL,
  50386. + .item_data_by_flow = NULL,
  50387. +#if REISER4_DEBUG
  50388. + .check = NULL
  50389. +#endif
  50390. + },
  50391. + .f = {
  50392. + .utmost_child = NULL,
  50393. + .utmost_child_real_block = NULL,
  50394. + .update = NULL,
  50395. + .scan = NULL,
  50396. + .convert = NULL
  50397. + },
  50398. + .s = {
  50399. + .sd = {
  50400. + .init_inode = init_inode_static_sd,
  50401. + .save_len = save_len_static_sd,
  50402. + .save = save_static_sd
  50403. + }
  50404. + }
  50405. + },
  50406. + [SIMPLE_DIR_ENTRY_ID] = {
  50407. + .h = {
  50408. + .type_id = REISER4_ITEM_PLUGIN_TYPE,
  50409. + .id = SIMPLE_DIR_ENTRY_ID,
  50410. + .groups = (1 << DIR_ENTRY_ITEM_TYPE),
  50411. + .pops = &item_plugin_ops,
  50412. + .label = "de",
  50413. + .desc = "directory entry",
  50414. + .linkage = {NULL, NULL}
  50415. + },
  50416. + .b = {
  50417. + .max_key_inside = max_key_inside_single_key,
  50418. + .can_contain_key = NULL,
  50419. + .mergeable = NULL,
  50420. + .nr_units = nr_units_single_unit,
  50421. + .lookup = NULL,
  50422. + .init = NULL,
  50423. + .paste = NULL,
  50424. + .fast_paste = NULL,
  50425. + .can_shift = NULL,
  50426. + .copy_units = NULL,
  50427. + .create_hook = NULL,
  50428. + .kill_hook = NULL,
  50429. + .shift_hook = NULL,
  50430. + .cut_units = NULL,
  50431. + .kill_units = NULL,
  50432. + .unit_key = NULL,
  50433. + .max_unit_key = NULL,
  50434. + .estimate = NULL,
  50435. + .item_data_by_flow = NULL,
  50436. +#if REISER4_DEBUG
  50437. + .check = NULL
  50438. +#endif
  50439. + },
  50440. + .f = {
  50441. + .utmost_child = NULL,
  50442. + .utmost_child_real_block = NULL,
  50443. + .update = NULL,
  50444. + .scan = NULL,
  50445. + .convert = NULL
  50446. + },
  50447. + .s = {
  50448. + .dir = {
  50449. + .extract_key = extract_key_de,
  50450. + .update_key = update_key_de,
  50451. + .extract_name = extract_name_de,
  50452. + .extract_file_type = extract_file_type_de,
  50453. + .add_entry = add_entry_de,
  50454. + .rem_entry = rem_entry_de,
  50455. + .max_name_len = max_name_len_de
  50456. + }
  50457. + }
  50458. + },
  50459. + [COMPOUND_DIR_ID] = {
  50460. + .h = {
  50461. + .type_id = REISER4_ITEM_PLUGIN_TYPE,
  50462. + .id = COMPOUND_DIR_ID,
  50463. + .groups = (1 << DIR_ENTRY_ITEM_TYPE),
  50464. + .pops = &item_plugin_ops,
  50465. + .label = "cde",
  50466. + .desc = "compressed directory entry",
  50467. + .linkage = {NULL, NULL}
  50468. + },
  50469. + .b = {
  50470. + .max_key_inside = max_key_inside_cde,
  50471. + .can_contain_key = can_contain_key_cde,
  50472. + .mergeable = mergeable_cde,
  50473. + .nr_units = nr_units_cde,
  50474. + .lookup = lookup_cde,
  50475. + .init = init_cde,
  50476. + .paste = paste_cde,
  50477. + .fast_paste = agree_to_fast_op,
  50478. + .can_shift = can_shift_cde,
  50479. + .copy_units = copy_units_cde,
  50480. + .create_hook = NULL,
  50481. + .kill_hook = NULL,
  50482. + .shift_hook = NULL,
  50483. + .cut_units = cut_units_cde,
  50484. + .kill_units = kill_units_cde,
  50485. + .unit_key = unit_key_cde,
  50486. + .max_unit_key = unit_key_cde,
  50487. + .estimate = estimate_cde,
  50488. + .item_data_by_flow = NULL,
  50489. +#if REISER4_DEBUG
  50490. + .check = reiser4_check_cde
  50491. +#endif
  50492. + },
  50493. + .f = {
  50494. + .utmost_child = NULL,
  50495. + .utmost_child_real_block = NULL,
  50496. + .update = NULL,
  50497. + .scan = NULL,
  50498. + .convert = NULL
  50499. + },
  50500. + .s = {
  50501. + .dir = {
  50502. + .extract_key = extract_key_cde,
  50503. + .update_key = update_key_cde,
  50504. + .extract_name = extract_name_cde,
  50505. + .extract_file_type = extract_file_type_de,
  50506. + .add_entry = add_entry_cde,
  50507. + .rem_entry = rem_entry_cde,
  50508. + .max_name_len = max_name_len_cde
  50509. + }
  50510. + }
  50511. + },
  50512. + [NODE_POINTER_ID] = {
  50513. + .h = {
  50514. + .type_id = REISER4_ITEM_PLUGIN_TYPE,
  50515. + .id = NODE_POINTER_ID,
  50516. + .groups = (1 << INTERNAL_ITEM_TYPE),
  50517. + .pops = NULL,
  50518. + .label = "internal",
  50519. + .desc = "internal item",
  50520. + .linkage = {NULL, NULL}
  50521. + },
  50522. + .b = {
  50523. + .max_key_inside = NULL,
  50524. + .can_contain_key = NULL,
  50525. + .mergeable = mergeable_internal,
  50526. + .nr_units = nr_units_single_unit,
  50527. + .lookup = lookup_internal,
  50528. + .init = NULL,
  50529. + .paste = NULL,
  50530. + .fast_paste = NULL,
  50531. + .can_shift = NULL,
  50532. + .copy_units = NULL,
  50533. + .create_hook = create_hook_internal,
  50534. + .kill_hook = kill_hook_internal,
  50535. + .shift_hook = shift_hook_internal,
  50536. + .cut_units = NULL,
  50537. + .kill_units = NULL,
  50538. + .unit_key = NULL,
  50539. + .max_unit_key = NULL,
  50540. + .estimate = NULL,
  50541. + .item_data_by_flow = NULL,
  50542. +#if REISER4_DEBUG
  50543. + .check = check__internal
  50544. +#endif
  50545. + },
  50546. + .f = {
  50547. + .utmost_child = utmost_child_internal,
  50548. + .utmost_child_real_block =
  50549. + utmost_child_real_block_internal,
  50550. + .update = reiser4_update_internal,
  50551. + .scan = NULL,
  50552. + .convert = NULL
  50553. + },
  50554. + .s = {
  50555. + .internal = {
  50556. + .down_link = down_link_internal,
  50557. + .has_pointer_to = has_pointer_to_internal
  50558. + }
  50559. + }
  50560. + },
  50561. + [EXTENT_POINTER_ID] = {
  50562. + .h = {
  50563. + .type_id = REISER4_ITEM_PLUGIN_TYPE,
  50564. + .id = EXTENT_POINTER_ID,
  50565. + .groups = (1 << UNIX_FILE_METADATA_ITEM_TYPE),
  50566. + .pops = NULL,
  50567. + .label = "extent",
  50568. + .desc = "extent item",
  50569. + .linkage = {NULL, NULL}
  50570. + },
  50571. + .b = {
  50572. + .max_key_inside = max_key_inside_extent,
  50573. + .can_contain_key = can_contain_key_extent,
  50574. + .mergeable = mergeable_extent,
  50575. + .nr_units = nr_units_extent,
  50576. + .lookup = lookup_extent,
  50577. + .init = NULL,
  50578. + .paste = paste_extent,
  50579. + .fast_paste = agree_to_fast_op,
  50580. + .can_shift = can_shift_extent,
  50581. + .create_hook = create_hook_extent,
  50582. + .copy_units = copy_units_extent,
  50583. + .kill_hook = kill_hook_extent,
  50584. + .shift_hook = NULL,
  50585. + .cut_units = cut_units_extent,
  50586. + .kill_units = kill_units_extent,
  50587. + .unit_key = unit_key_extent,
  50588. + .max_unit_key = max_unit_key_extent,
  50589. + .estimate = NULL,
  50590. + .item_data_by_flow = NULL,
  50591. +#if REISER4_DEBUG
  50592. + .check = reiser4_check_extent
  50593. +#endif
  50594. + },
  50595. + .f = {
  50596. + .utmost_child = utmost_child_extent,
  50597. + .utmost_child_real_block =
  50598. + utmost_child_real_block_extent,
  50599. + .update = NULL,
  50600. + .scan = reiser4_scan_extent,
  50601. + .convert = NULL,
  50602. + .key_by_offset = key_by_offset_extent
  50603. + },
  50604. + .s = {
  50605. + .file = {
  50606. + .write = reiser4_write_extent,
  50607. + .read = reiser4_read_extent,
  50608. + .readpage = reiser4_readpage_extent,
  50609. + .get_block = get_block_address_extent,
  50610. + .append_key = append_key_extent,
  50611. + .init_coord_extension =
  50612. + init_coord_extension_extent
  50613. + }
  50614. + }
  50615. + },
  50616. + [FORMATTING_ID] = {
  50617. + .h = {
  50618. + .type_id = REISER4_ITEM_PLUGIN_TYPE,
  50619. + .id = FORMATTING_ID,
  50620. + .groups = (1 << UNIX_FILE_METADATA_ITEM_TYPE),
  50621. + .pops = NULL,
  50622. + .label = "body",
  50623. + .desc = "body (or tail?) item",
  50624. + .linkage = {NULL, NULL}
  50625. + },
  50626. + .b = {
  50627. + .max_key_inside = max_key_inside_tail,
  50628. + .can_contain_key = can_contain_key_tail,
  50629. + .mergeable = mergeable_tail,
  50630. + .nr_units = nr_units_tail,
  50631. + .lookup = lookup_tail,
  50632. + .init = NULL,
  50633. + .paste = paste_tail,
  50634. + .fast_paste = agree_to_fast_op,
  50635. + .can_shift = can_shift_tail,
  50636. + .create_hook = NULL,
  50637. + .copy_units = copy_units_tail,
  50638. + .kill_hook = kill_hook_tail,
  50639. + .shift_hook = NULL,
  50640. + .cut_units = cut_units_tail,
  50641. + .kill_units = kill_units_tail,
  50642. + .unit_key = unit_key_tail,
  50643. + .max_unit_key = unit_key_tail,
  50644. + .estimate = NULL,
  50645. + .item_data_by_flow = NULL,
  50646. +#if REISER4_DEBUG
  50647. + .check = NULL
  50648. +#endif
  50649. + },
  50650. + .f = {
  50651. + .utmost_child = NULL,
  50652. + .utmost_child_real_block = NULL,
  50653. + .update = NULL,
  50654. + .scan = NULL,
  50655. + .convert = NULL
  50656. + },
  50657. + .s = {
  50658. + .file = {
  50659. + .write = reiser4_write_tail,
  50660. + .read = reiser4_read_tail,
  50661. + .readpage = readpage_tail,
  50662. + .get_block = get_block_address_tail,
  50663. + .append_key = append_key_tail,
  50664. + .init_coord_extension =
  50665. + init_coord_extension_tail
  50666. + }
  50667. + }
  50668. + },
  50669. + [CTAIL_ID] = {
  50670. + .h = {
  50671. + .type_id = REISER4_ITEM_PLUGIN_TYPE,
  50672. + .id = CTAIL_ID,
  50673. + .groups = (1 << UNIX_FILE_METADATA_ITEM_TYPE),
  50674. + .pops = NULL,
  50675. + .label = "ctail",
  50676. + .desc = "cryptcompress tail item",
  50677. + .linkage = {NULL, NULL}
  50678. + },
  50679. + .b = {
  50680. + .max_key_inside = max_key_inside_tail,
  50681. + .can_contain_key = can_contain_key_ctail,
  50682. + .mergeable = mergeable_ctail,
  50683. + .nr_units = nr_units_ctail,
  50684. + .lookup = NULL,
  50685. + .init = init_ctail,
  50686. + .paste = paste_ctail,
  50687. + .fast_paste = agree_to_fast_op,
  50688. + .can_shift = can_shift_ctail,
  50689. + .create_hook = create_hook_ctail,
  50690. + .copy_units = copy_units_ctail,
  50691. + .kill_hook = kill_hook_ctail,
  50692. + .shift_hook = shift_hook_ctail,
  50693. + .cut_units = cut_units_ctail,
  50694. + .kill_units = kill_units_ctail,
  50695. + .unit_key = unit_key_tail,
  50696. + .max_unit_key = unit_key_tail,
  50697. + .estimate = estimate_ctail,
  50698. + .item_data_by_flow = NULL,
  50699. +#if REISER4_DEBUG
  50700. + .check = check_ctail
  50701. +#endif
  50702. + },
  50703. + .f = {
  50704. + .utmost_child = utmost_child_ctail,
  50705. + /* FIXME-EDWARD: write this */
  50706. + .utmost_child_real_block = NULL,
  50707. + .update = NULL,
  50708. + .scan = scan_ctail,
  50709. + .convert = convert_ctail
  50710. + },
  50711. + .s = {
  50712. + .file = {
  50713. + .write = NULL,
  50714. + .read = read_ctail,
  50715. + .readpage = readpage_ctail,
  50716. + .get_block = get_block_address_tail,
  50717. + .append_key = append_key_ctail,
  50718. + .init_coord_extension =
  50719. + init_coord_extension_tail
  50720. + }
  50721. + }
  50722. + },
  50723. + [BLACK_BOX_ID] = {
  50724. + .h = {
  50725. + .type_id = REISER4_ITEM_PLUGIN_TYPE,
  50726. + .id = BLACK_BOX_ID,
  50727. + .groups = (1 << OTHER_ITEM_TYPE),
  50728. + .pops = NULL,
  50729. + .label = "blackbox",
  50730. + .desc = "black box item",
  50731. + .linkage = {NULL, NULL}
  50732. + },
  50733. + .b = {
  50734. + .max_key_inside = NULL,
  50735. + .can_contain_key = NULL,
  50736. + .mergeable = not_mergeable,
  50737. + .nr_units = nr_units_single_unit,
  50738. + /* to need for ->lookup method */
  50739. + .lookup = NULL,
  50740. + .init = NULL,
  50741. + .paste = NULL,
  50742. + .fast_paste = NULL,
  50743. + .can_shift = NULL,
  50744. + .copy_units = NULL,
  50745. + .create_hook = NULL,
  50746. + .kill_hook = NULL,
  50747. + .shift_hook = NULL,
  50748. + .cut_units = NULL,
  50749. + .kill_units = NULL,
  50750. + .unit_key = NULL,
  50751. + .max_unit_key = NULL,
  50752. + .estimate = NULL,
  50753. + .item_data_by_flow = NULL,
  50754. +#if REISER4_DEBUG
  50755. + .check = NULL
  50756. +#endif
  50757. + }
  50758. + }
  50759. +};
  50760. +
  50761. +/* Make Linus happy.
  50762. + Local variables:
  50763. + c-indentation-style: "K&R"
  50764. + mode-name: "LC"
  50765. + c-basic-offset: 8
  50766. + tab-width: 8
  50767. + fill-column: 120
  50768. + End:
  50769. +*/
  50770. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/item/item.h linux-4.14.2/fs/reiser4/plugin/item/item.h
  50771. --- linux-4.14.2.orig/fs/reiser4/plugin/item/item.h 1970-01-01 01:00:00.000000000 +0100
  50772. +++ linux-4.14.2/fs/reiser4/plugin/item/item.h 2017-11-26 22:13:09.000000000 +0100
  50773. @@ -0,0 +1,398 @@
  50774. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  50775. +
  50776. +/* first read balance.c comments before reading this */
  50777. +
  50778. +/* An item_plugin implements all of the operations required for
  50779. + balancing that are item specific. */
  50780. +
  50781. +/* an item plugin also implements other operations that are specific to that
  50782. + item. These go into the item specific operations portion of the item
  50783. + handler, and all of the item specific portions of the item handler are put
  50784. + into a union. */
  50785. +
  50786. +#if !defined( __REISER4_ITEM_H__ )
  50787. +#define __REISER4_ITEM_H__
  50788. +
  50789. +#include "../../forward.h"
  50790. +#include "../plugin_header.h"
  50791. +#include "../../dformat.h"
  50792. +#include "../../seal.h"
  50793. +#include "../../plugin/file/file.h"
  50794. +
  50795. +#include <linux/fs.h> /* for struct file, struct inode */
  50796. +#include <linux/mm.h> /* for struct page */
  50797. +#include <linux/dcache.h> /* for struct dentry */
  50798. +
  50799. +typedef enum {
  50800. + STAT_DATA_ITEM_TYPE,
  50801. + DIR_ENTRY_ITEM_TYPE,
  50802. + INTERNAL_ITEM_TYPE,
  50803. + UNIX_FILE_METADATA_ITEM_TYPE,
  50804. + OTHER_ITEM_TYPE
  50805. +} item_type_id;
  50806. +
  50807. +/* this is the part of each item plugin that all items are expected to
  50808. + support or at least explicitly fail to support by setting the
  50809. + pointer to null. */
  50810. +struct balance_ops {
  50811. + /* operations called by balancing
  50812. +
  50813. + It is interesting to consider that some of these item
  50814. + operations could be given sources or targets that are not
  50815. + really items in nodes. This could be ok/useful.
  50816. +
  50817. + */
  50818. + /* maximal key that can _possibly_ be occupied by this item
  50819. +
  50820. + When inserting, and node ->lookup() method (called by
  50821. + coord_by_key()) reaches an item after binary search,
  50822. + the ->max_key_inside() item plugin method is used to determine
  50823. + whether new item should pasted into existing item
  50824. + (new_key<=max_key_inside()) or new item has to be created
  50825. + (new_key>max_key_inside()).
  50826. +
  50827. + For items that occupy exactly one key (like stat-data)
  50828. + this method should return this key. For items that can
  50829. + grow indefinitely (extent, directory item) this should
  50830. + return reiser4_max_key().
  50831. +
  50832. + For example extent with the key
  50833. +
  50834. + (LOCALITY,4,OBJID,STARTING-OFFSET), and length BLK blocks,
  50835. +
  50836. + ->max_key_inside is (LOCALITY,4,OBJID,0xffffffffffffffff), and
  50837. + */
  50838. + reiser4_key *(*max_key_inside) (const coord_t *, reiser4_key *);
  50839. +
  50840. + /* true if item @coord can merge data at @key. */
  50841. + int (*can_contain_key) (const coord_t *, const reiser4_key *,
  50842. + const reiser4_item_data *);
  50843. + /* mergeable() - check items for mergeability
  50844. +
  50845. + Optional method. Returns true if two items can be merged.
  50846. +
  50847. + */
  50848. + int (*mergeable) (const coord_t *, const coord_t *);
  50849. +
  50850. + /* number of atomic things in an item.
  50851. + NOTE FOR CONTRIBUTORS: use a generic method
  50852. + nr_units_single_unit() for solid (atomic) items, as
  50853. + tree operations use it as a criterion of solidness
  50854. + (see is_solid_item macro) */
  50855. + pos_in_node_t(*nr_units) (const coord_t *);
  50856. +
  50857. + /* search within item for a unit within the item, and return a
  50858. + pointer to it. This can be used to calculate how many
  50859. + bytes to shrink an item if you use pointer arithmetic and
  50860. + compare to the start of the item body if the item's data
  50861. + are continuous in the node, if the item's data are not
  50862. + continuous in the node, all sorts of other things are maybe
  50863. + going to break as well. */
  50864. + lookup_result(*lookup) (const reiser4_key *, lookup_bias, coord_t *);
  50865. + /* method called by ode_plugin->create_item() to initialise new
  50866. + item */
  50867. + int (*init) (coord_t * target, coord_t * from,
  50868. + reiser4_item_data * data);
  50869. + /* method called (e.g., by reiser4_resize_item()) to place new data
  50870. + into item when it grows */
  50871. + int (*paste) (coord_t *, reiser4_item_data *, carry_plugin_info *);
  50872. + /* return true if paste into @coord is allowed to skip
  50873. + carry. That is, if such paste would require any changes
  50874. + at the parent level
  50875. + */
  50876. + int (*fast_paste) (const coord_t *);
  50877. + /* how many but not more than @want units of @source can be
  50878. + shifted into @target node. If pend == append - we try to
  50879. + append last item of @target by first units of @source. If
  50880. + pend == prepend - we try to "prepend" first item in @target
  50881. + by last units of @source. @target node has @free_space
  50882. + bytes of free space. Total size of those units are returned
  50883. + via @size.
  50884. +
  50885. + @target is not NULL if shifting to the mergeable item and
  50886. + NULL is new item will be created during shifting.
  50887. + */
  50888. + int (*can_shift) (unsigned free_space, coord_t *,
  50889. + znode *, shift_direction, unsigned *size,
  50890. + unsigned want);
  50891. +
  50892. + /* starting off @from-th unit of item @source append or
  50893. + prepend @count units to @target. @target has been already
  50894. + expanded by @free_space bytes. That must be exactly what is
  50895. + needed for those items in @target. If @where_is_free_space
  50896. + == SHIFT_LEFT - free space is at the end of @target item,
  50897. + othersize - it is in the beginning of it. */
  50898. + void (*copy_units) (coord_t *, coord_t *,
  50899. + unsigned from, unsigned count,
  50900. + shift_direction where_is_free_space,
  50901. + unsigned free_space);
  50902. +
  50903. + int (*create_hook) (const coord_t *, void *);
  50904. + /* do whatever is necessary to do when @count units starting
  50905. + from @from-th one are removed from the tree */
  50906. + /* FIXME-VS: this is used to be here for, in particular,
  50907. + extents and items of internal type to free blocks they point
  50908. + to at the same time with removing items from a
  50909. + tree. Problems start, however, when dealloc_block fails due
  50910. + to some reason. Item gets removed, but blocks it pointed to
  50911. + are not freed. It is not clear how to fix this for items of
  50912. + internal type because a need to remove internal item may
  50913. + appear in the middle of balancing, and there is no way to
  50914. + undo changes made. OTOH, if space allocator involves
  50915. + balancing to perform dealloc_block - this will probably
  50916. + break balancing due to deadlock issues
  50917. + */
  50918. + int (*kill_hook) (const coord_t *, pos_in_node_t from,
  50919. + pos_in_node_t count, struct carry_kill_data *);
  50920. + int (*shift_hook) (const coord_t *, unsigned from, unsigned count,
  50921. + znode * _node);
  50922. +
  50923. + /* unit @*from contains @from_key. unit @*to contains @to_key. Cut all keys between @from_key and @to_key
  50924. + including boundaries. When units are cut from item beginning - move space which gets freed to head of
  50925. + item. When units are cut from item end - move freed space to item end. When units are cut from the middle of
  50926. + item - move freed space to item head. Return amount of space which got freed. Save smallest removed key in
  50927. + @smallest_removed if it is not 0. Save new first item key in @new_first_key if it is not 0
  50928. + */
  50929. + int (*cut_units) (coord_t *, pos_in_node_t from, pos_in_node_t to,
  50930. + struct carry_cut_data *,
  50931. + reiser4_key * smallest_removed,
  50932. + reiser4_key * new_first_key);
  50933. +
  50934. + /* like cut_units, except that these units are removed from the
  50935. + tree, not only from a node */
  50936. + int (*kill_units) (coord_t *, pos_in_node_t from, pos_in_node_t to,
  50937. + struct carry_kill_data *,
  50938. + reiser4_key * smallest_removed,
  50939. + reiser4_key * new_first);
  50940. +
  50941. + /* if @key_of_coord == 1 - returned key of coord, otherwise -
  50942. + key of unit is returned. If @coord is not set to certain
  50943. + unit - ERR_PTR(-ENOENT) is returned */
  50944. + reiser4_key *(*unit_key) (const coord_t *, reiser4_key *);
  50945. + reiser4_key *(*max_unit_key) (const coord_t *, reiser4_key *);
  50946. + /* estimate how much space is needed for paste @data into item at
  50947. + @coord. if @coord==0 - estimate insertion, otherwise - estimate
  50948. + pasting
  50949. + */
  50950. + int (*estimate) (const coord_t *, const reiser4_item_data *);
  50951. +
  50952. + /* converts flow @f to item data. @coord == 0 on insert */
  50953. + int (*item_data_by_flow) (const coord_t *, const flow_t *,
  50954. + reiser4_item_data *);
  50955. +
  50956. + /*void (*show) (struct seq_file *, coord_t *); */
  50957. +
  50958. +#if REISER4_DEBUG
  50959. + /* used for debugging, every item should have here the most
  50960. + complete possible check of the consistency of the item that
  50961. + the inventor can construct */
  50962. + int (*check) (const coord_t *, const char **error);
  50963. +#endif
  50964. +
  50965. +};
  50966. +
  50967. +struct flush_ops {
  50968. + /* return the right or left child of @coord, only if it is in memory */
  50969. + int (*utmost_child) (const coord_t *, sideof side, jnode ** child);
  50970. +
  50971. + /* return whether the right or left child of @coord has a non-fake
  50972. + block number. */
  50973. + int (*utmost_child_real_block) (const coord_t *, sideof side,
  50974. + reiser4_block_nr *);
  50975. + /* relocate child at @coord to the @block */
  50976. + void (*update) (const coord_t *, const reiser4_block_nr *);
  50977. + /* count unformatted nodes per item for leave relocation policy, etc.. */
  50978. + int (*scan) (flush_scan * scan);
  50979. + /* convert item by flush */
  50980. + int (*convert) (flush_pos_t * pos);
  50981. + /* backward mapping from jnode offset to a key. */
  50982. + int (*key_by_offset) (struct inode *, loff_t, reiser4_key *);
  50983. +};
  50984. +
  50985. +/* operations specific to the directory item */
  50986. +struct dir_entry_iops {
  50987. + /* extract stat-data key from directory entry at @coord and place it
  50988. + into @key. */
  50989. + int (*extract_key) (const coord_t *, reiser4_key * key);
  50990. + /* update object key in item. */
  50991. + int (*update_key) (const coord_t *, const reiser4_key *, lock_handle *);
  50992. + /* extract name from directory entry at @coord and return it */
  50993. + char *(*extract_name) (const coord_t *, char *buf);
  50994. + /* extract file type (DT_* stuff) from directory entry at @coord and
  50995. + return it */
  50996. + unsigned (*extract_file_type) (const coord_t *);
  50997. + int (*add_entry) (struct inode * dir,
  50998. + coord_t *, lock_handle *,
  50999. + const struct dentry * name,
  51000. + reiser4_dir_entry_desc * entry);
  51001. + int (*rem_entry) (struct inode * dir, const struct qstr * name,
  51002. + coord_t *, lock_handle *,
  51003. + reiser4_dir_entry_desc * entry);
  51004. + int (*max_name_len) (const struct inode * dir);
  51005. +};
  51006. +
  51007. +/* operations specific to items regular (unix) file metadata are built of */
  51008. +struct file_iops{
  51009. + ssize_t (*write) (struct file *, struct inode *,
  51010. + const char __user *, size_t, loff_t *pos);
  51011. + int (*read) (struct file *, flow_t *, hint_t *);
  51012. + int (*readpage) (void *, struct page *);
  51013. + int (*get_block) (const coord_t *, sector_t, sector_t *);
  51014. + /*
  51015. + * key of first byte which is not addressed by the item @coord is set
  51016. + * to.
  51017. + * For example, for extent item with the key
  51018. + *
  51019. + * (LOCALITY,4,OBJID,STARTING-OFFSET), and length BLK blocks,
  51020. + *
  51021. + * ->append_key is
  51022. + *
  51023. + * (LOCALITY,4,OBJID,STARTING-OFFSET + BLK * block_size)
  51024. + */
  51025. + reiser4_key *(*append_key) (const coord_t *, reiser4_key *);
  51026. +
  51027. + void (*init_coord_extension) (uf_coord_t *, loff_t);
  51028. +};
  51029. +
  51030. +/* operations specific to items of stat data type */
  51031. +struct sd_iops {
  51032. + int (*init_inode) (struct inode * inode, char *sd, int len);
  51033. + int (*save_len) (struct inode * inode);
  51034. + int (*save) (struct inode * inode, char **area);
  51035. +};
  51036. +
  51037. +/* operations specific to internal item */
  51038. +struct internal_iops{
  51039. + /* all tree traversal want to know from internal item is where
  51040. + to go next. */
  51041. + void (*down_link) (const coord_t * coord,
  51042. + const reiser4_key * key, reiser4_block_nr * block);
  51043. + /* check that given internal item contains given pointer. */
  51044. + int (*has_pointer_to) (const coord_t * coord,
  51045. + const reiser4_block_nr * block);
  51046. +};
  51047. +
  51048. +struct item_plugin {
  51049. + /* generic fields */
  51050. + plugin_header h;
  51051. + /* methods common for all item types */
  51052. + struct balance_ops b; /* balance operations */
  51053. + struct flush_ops f; /* flush operates with items via this methods */
  51054. +
  51055. + /* methods specific to particular type of item */
  51056. + union {
  51057. + struct dir_entry_iops dir;
  51058. + struct file_iops file;
  51059. + struct sd_iops sd;
  51060. + struct internal_iops internal;
  51061. + } s;
  51062. +};
  51063. +
  51064. +#define is_solid_item(iplug) ((iplug)->b.nr_units == nr_units_single_unit)
  51065. +
  51066. +static inline item_id item_id_by_plugin(item_plugin * plugin)
  51067. +{
  51068. + return plugin->h.id;
  51069. +}
  51070. +
  51071. +static inline char get_iplugid(item_plugin * iplug)
  51072. +{
  51073. + assert("nikita-2838", iplug != NULL);
  51074. + assert("nikita-2839", iplug->h.id < 0xff);
  51075. + return (char)item_id_by_plugin(iplug);
  51076. +}
  51077. +
  51078. +extern unsigned long znode_times_locked(const znode * z);
  51079. +
  51080. +static inline void coord_set_iplug(coord_t * coord, item_plugin * iplug)
  51081. +{
  51082. + assert("nikita-2837", coord != NULL);
  51083. + assert("nikita-2838", iplug != NULL);
  51084. + coord->iplugid = get_iplugid(iplug);
  51085. + ON_DEBUG(coord->plug_v = znode_times_locked(coord->node));
  51086. +}
  51087. +
  51088. +static inline item_plugin *coord_iplug(const coord_t * coord)
  51089. +{
  51090. + assert("nikita-2833", coord != NULL);
  51091. + assert("nikita-2834", coord->iplugid != INVALID_PLUGID);
  51092. + assert("nikita-3549", coord->plug_v == znode_times_locked(coord->node));
  51093. + return (item_plugin *) plugin_by_id(REISER4_ITEM_PLUGIN_TYPE,
  51094. + coord->iplugid);
  51095. +}
  51096. +
  51097. +extern int item_can_contain_key(const coord_t * item, const reiser4_key * key,
  51098. + const reiser4_item_data *);
  51099. +extern int are_items_mergeable(const coord_t * i1, const coord_t * i2);
  51100. +extern int item_is_extent(const coord_t *);
  51101. +extern int item_is_tail(const coord_t *);
  51102. +extern int item_is_statdata(const coord_t * item);
  51103. +extern int item_is_ctail(const coord_t *);
  51104. +
  51105. +extern pos_in_node_t item_length_by_coord(const coord_t * coord);
  51106. +extern pos_in_node_t nr_units_single_unit(const coord_t * coord);
  51107. +extern item_id item_id_by_coord(const coord_t * coord /* coord to query */ );
  51108. +extern reiser4_key *item_key_by_coord(const coord_t * coord, reiser4_key * key);
  51109. +extern reiser4_key *max_item_key_by_coord(const coord_t *, reiser4_key *);
  51110. +extern reiser4_key *unit_key_by_coord(const coord_t * coord, reiser4_key * key);
  51111. +extern reiser4_key *max_unit_key_by_coord(const coord_t * coord,
  51112. + reiser4_key * key);
  51113. +extern void obtain_item_plugin(const coord_t * coord);
  51114. +
  51115. +#if defined(REISER4_DEBUG)
  51116. +extern int znode_is_loaded(const znode * node);
  51117. +#endif
  51118. +
  51119. +/* return plugin of item at @coord */
  51120. +static inline item_plugin *item_plugin_by_coord(const coord_t *
  51121. + coord /* coord to query */ )
  51122. +{
  51123. + assert("nikita-330", coord != NULL);
  51124. + assert("nikita-331", coord->node != NULL);
  51125. + assert("nikita-332", znode_is_loaded(coord->node));
  51126. +
  51127. + if (unlikely(!coord_is_iplug_set(coord)))
  51128. + obtain_item_plugin(coord);
  51129. + return coord_iplug(coord);
  51130. +}
  51131. +
  51132. +/* this returns true if item is of internal type */
  51133. +static inline int item_is_internal(const coord_t * item)
  51134. +{
  51135. + assert("vs-483", coord_is_existing_item(item));
  51136. + return plugin_of_group(item_plugin_by_coord(item), INTERNAL_ITEM_TYPE);
  51137. +}
  51138. +
  51139. +extern void item_body_by_coord_hard(coord_t * coord);
  51140. +extern void *item_body_by_coord_easy(const coord_t * coord);
  51141. +#if REISER4_DEBUG
  51142. +extern int item_body_is_valid(const coord_t * coord);
  51143. +#endif
  51144. +
  51145. +/* return pointer to item body */
  51146. +static inline void *item_body_by_coord(const coord_t *
  51147. + coord /* coord to query */ )
  51148. +{
  51149. + assert("nikita-324", coord != NULL);
  51150. + assert("nikita-325", coord->node != NULL);
  51151. + assert("nikita-326", znode_is_loaded(coord->node));
  51152. +
  51153. + if (coord->offset == INVALID_OFFSET)
  51154. + item_body_by_coord_hard((coord_t *) coord);
  51155. + assert("nikita-3201", item_body_is_valid(coord));
  51156. + assert("nikita-3550", coord->body_v == znode_times_locked(coord->node));
  51157. + return item_body_by_coord_easy(coord);
  51158. +}
  51159. +
  51160. +/* __REISER4_ITEM_H__ */
  51161. +#endif
  51162. +/* Make Linus happy.
  51163. + Local variables:
  51164. + c-indentation-style: "K&R"
  51165. + mode-name: "LC"
  51166. + c-basic-offset: 8
  51167. + tab-width: 8
  51168. + fill-column: 120
  51169. + scroll-step: 1
  51170. + End:
  51171. +*/
  51172. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/item/Makefile linux-4.14.2/fs/reiser4/plugin/item/Makefile
  51173. --- linux-4.14.2.orig/fs/reiser4/plugin/item/Makefile 1970-01-01 01:00:00.000000000 +0100
  51174. +++ linux-4.14.2/fs/reiser4/plugin/item/Makefile 2017-11-26 22:13:09.000000000 +0100
  51175. @@ -0,0 +1,18 @@
  51176. +obj-$(CONFIG_REISER4_FS) += item_plugins.o
  51177. +
  51178. +item_plugins-objs := \
  51179. + item.o \
  51180. + static_stat.o \
  51181. + sde.o \
  51182. + cde.o \
  51183. + blackbox.o \
  51184. + internal.o \
  51185. + tail.o \
  51186. + ctail.o \
  51187. + extent.o \
  51188. + extent_item_ops.o \
  51189. + extent_file_ops.o \
  51190. + extent_flush_ops.o
  51191. +
  51192. +
  51193. +
  51194. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/item/sde.c linux-4.14.2/fs/reiser4/plugin/item/sde.c
  51195. --- linux-4.14.2.orig/fs/reiser4/plugin/item/sde.c 1970-01-01 01:00:00.000000000 +0100
  51196. +++ linux-4.14.2/fs/reiser4/plugin/item/sde.c 2017-11-26 22:13:09.000000000 +0100
  51197. @@ -0,0 +1,186 @@
  51198. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  51199. +
  51200. +/* Directory entry implementation */
  51201. +#include "../../forward.h"
  51202. +#include "../../debug.h"
  51203. +#include "../../dformat.h"
  51204. +#include "../../kassign.h"
  51205. +#include "../../coord.h"
  51206. +#include "sde.h"
  51207. +#include "item.h"
  51208. +#include "../plugin.h"
  51209. +#include "../../znode.h"
  51210. +#include "../../carry.h"
  51211. +#include "../../tree.h"
  51212. +#include "../../inode.h"
  51213. +
  51214. +#include <linux/fs.h> /* for struct inode */
  51215. +#include <linux/dcache.h> /* for struct dentry */
  51216. +
  51217. +/* ->extract_key() method of simple directory item plugin. */
  51218. +int extract_key_de(const coord_t * coord /* coord of item */ ,
  51219. + reiser4_key * key /* resulting key */ )
  51220. +{
  51221. + directory_entry_format *dent;
  51222. +
  51223. + assert("nikita-1458", coord != NULL);
  51224. + assert("nikita-1459", key != NULL);
  51225. +
  51226. + dent = (directory_entry_format *) item_body_by_coord(coord);
  51227. + assert("nikita-1158", item_length_by_coord(coord) >= (int)sizeof *dent);
  51228. + return extract_key_from_id(&dent->id, key);
  51229. +}
  51230. +
  51231. +int
  51232. +update_key_de(const coord_t * coord, const reiser4_key * key,
  51233. + lock_handle * lh UNUSED_ARG)
  51234. +{
  51235. + directory_entry_format *dent;
  51236. + obj_key_id obj_id;
  51237. + int result;
  51238. +
  51239. + assert("nikita-2342", coord != NULL);
  51240. + assert("nikita-2343", key != NULL);
  51241. +
  51242. + dent = (directory_entry_format *) item_body_by_coord(coord);
  51243. + result = build_obj_key_id(key, &obj_id);
  51244. + if (result == 0) {
  51245. + dent->id = obj_id;
  51246. + znode_make_dirty(coord->node);
  51247. + }
  51248. + return 0;
  51249. +}
  51250. +
  51251. +char *extract_dent_name(const coord_t * coord, directory_entry_format * dent,
  51252. + char *buf)
  51253. +{
  51254. + reiser4_key key;
  51255. +
  51256. + unit_key_by_coord(coord, &key);
  51257. + if (get_key_type(&key) != KEY_FILE_NAME_MINOR)
  51258. + reiser4_print_address("oops", znode_get_block(coord->node));
  51259. + if (!is_longname_key(&key)) {
  51260. + if (is_dot_key(&key))
  51261. + return (char *)".";
  51262. + else
  51263. + return extract_name_from_key(&key, buf);
  51264. + } else
  51265. + return (char *)dent->name;
  51266. +}
  51267. +
  51268. +/* ->extract_name() method of simple directory item plugin. */
  51269. +char *extract_name_de(const coord_t * coord /* coord of item */ , char *buf)
  51270. +{
  51271. + directory_entry_format *dent;
  51272. +
  51273. + assert("nikita-1460", coord != NULL);
  51274. +
  51275. + dent = (directory_entry_format *) item_body_by_coord(coord);
  51276. + return extract_dent_name(coord, dent, buf);
  51277. +}
  51278. +
  51279. +/* ->extract_file_type() method of simple directory item plugin. */
  51280. +unsigned extract_file_type_de(const coord_t * coord UNUSED_ARG /* coord of
  51281. + * item */ )
  51282. +{
  51283. + assert("nikita-1764", coord != NULL);
  51284. + /* we don't store file type in the directory entry yet.
  51285. +
  51286. + But see comments at kassign.h:obj_key_id
  51287. + */
  51288. + return DT_UNKNOWN;
  51289. +}
  51290. +
  51291. +int add_entry_de(struct inode *dir /* directory of item */ ,
  51292. + coord_t * coord /* coord of item */ ,
  51293. + lock_handle * lh /* insertion lock handle */ ,
  51294. + const struct dentry *de /* name to add */ ,
  51295. + reiser4_dir_entry_desc * entry /* parameters of new directory
  51296. + * entry */ )
  51297. +{
  51298. + reiser4_item_data data;
  51299. + directory_entry_format *dent;
  51300. + int result;
  51301. + const char *name;
  51302. + int len;
  51303. + int longname;
  51304. +
  51305. + name = de->d_name.name;
  51306. + len = de->d_name.len;
  51307. + assert("nikita-1163", strlen(name) == len);
  51308. +
  51309. + longname = is_longname(name, len);
  51310. +
  51311. + data.length = sizeof *dent;
  51312. + if (longname)
  51313. + data.length += len + 1;
  51314. + data.data = NULL;
  51315. + data.user = 0;
  51316. + data.iplug = item_plugin_by_id(SIMPLE_DIR_ENTRY_ID);
  51317. +
  51318. + inode_add_bytes(dir, data.length);
  51319. +
  51320. + result = insert_by_coord(coord, &data, &entry->key, lh, 0 /*flags */ );
  51321. + if (result != 0)
  51322. + return result;
  51323. +
  51324. + dent = (directory_entry_format *) item_body_by_coord(coord);
  51325. + build_inode_key_id(entry->obj, &dent->id);
  51326. + if (longname) {
  51327. + memcpy(dent->name, name, len);
  51328. + put_unaligned(0, &dent->name[len]);
  51329. + }
  51330. + return 0;
  51331. +}
  51332. +
  51333. +int rem_entry_de(struct inode *dir /* directory of item */ ,
  51334. + const struct qstr *name UNUSED_ARG,
  51335. + coord_t * coord /* coord of item */ ,
  51336. + lock_handle * lh UNUSED_ARG /* lock handle for
  51337. + * removal */ ,
  51338. + reiser4_dir_entry_desc * entry UNUSED_ARG /* parameters of
  51339. + * directory entry
  51340. + * being removed */ )
  51341. +{
  51342. + coord_t shadow;
  51343. + int result;
  51344. + int length;
  51345. +
  51346. + length = item_length_by_coord(coord);
  51347. + if (inode_get_bytes(dir) < length) {
  51348. + warning("nikita-2627", "Dir is broke: %llu: %llu",
  51349. + (unsigned long long)get_inode_oid(dir),
  51350. + inode_get_bytes(dir));
  51351. +
  51352. + return RETERR(-EIO);
  51353. + }
  51354. +
  51355. + /* cut_node() is supposed to take pointers to _different_
  51356. + coords, because it will modify them without respect to
  51357. + possible aliasing. To work around this, create temporary copy
  51358. + of @coord.
  51359. + */
  51360. + coord_dup(&shadow, coord);
  51361. + result =
  51362. + kill_node_content(coord, &shadow, NULL, NULL, NULL, NULL, NULL, 0);
  51363. + if (result == 0) {
  51364. + inode_sub_bytes(dir, length);
  51365. + }
  51366. + return result;
  51367. +}
  51368. +
  51369. +int max_name_len_de(const struct inode *dir)
  51370. +{
  51371. + return reiser4_tree_by_inode(dir)->nplug->max_item_size() -
  51372. + sizeof(directory_entry_format) - 2;
  51373. +}
  51374. +
  51375. +/* Make Linus happy.
  51376. + Local variables:
  51377. + c-indentation-style: "K&R"
  51378. + mode-name: "LC"
  51379. + c-basic-offset: 8
  51380. + tab-width: 8
  51381. + fill-column: 120
  51382. + End:
  51383. +*/
  51384. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/item/sde.h linux-4.14.2/fs/reiser4/plugin/item/sde.h
  51385. --- linux-4.14.2.orig/fs/reiser4/plugin/item/sde.h 1970-01-01 01:00:00.000000000 +0100
  51386. +++ linux-4.14.2/fs/reiser4/plugin/item/sde.h 2017-11-26 22:13:09.000000000 +0100
  51387. @@ -0,0 +1,66 @@
  51388. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  51389. +
  51390. +/* Directory entry. */
  51391. +
  51392. +#if !defined( __FS_REISER4_PLUGIN_DIRECTORY_ENTRY_H__ )
  51393. +#define __FS_REISER4_PLUGIN_DIRECTORY_ENTRY_H__
  51394. +
  51395. +#include "../../forward.h"
  51396. +#include "../../dformat.h"
  51397. +#include "../../kassign.h"
  51398. +#include "../../key.h"
  51399. +
  51400. +#include <linux/fs.h>
  51401. +#include <linux/dcache.h> /* for struct dentry */
  51402. +
  51403. +typedef struct directory_entry_format {
  51404. + /* key of object stat-data. It's not necessary to store whole
  51405. + key here, because it's always key of stat-data, so minor
  51406. + packing locality and offset can be omitted here. But this
  51407. + relies on particular key allocation scheme for stat-data, so,
  51408. + for extensibility sake, whole key can be stored here.
  51409. +
  51410. + We store key as array of bytes, because we don't want 8-byte
  51411. + alignment of dir entries.
  51412. + */
  51413. + obj_key_id id;
  51414. + /* file name. Null terminated string. */
  51415. + d8 name[0];
  51416. +} directory_entry_format;
  51417. +
  51418. +void print_de(const char *prefix, coord_t * coord);
  51419. +int extract_key_de(const coord_t * coord, reiser4_key * key);
  51420. +int update_key_de(const coord_t * coord, const reiser4_key * key,
  51421. + lock_handle * lh);
  51422. +char *extract_name_de(const coord_t * coord, char *buf);
  51423. +unsigned extract_file_type_de(const coord_t * coord);
  51424. +int add_entry_de(struct inode *dir, coord_t * coord,
  51425. + lock_handle * lh, const struct dentry *name,
  51426. + reiser4_dir_entry_desc * entry);
  51427. +int rem_entry_de(struct inode *dir, const struct qstr *name, coord_t * coord,
  51428. + lock_handle * lh, reiser4_dir_entry_desc * entry);
  51429. +int max_name_len_de(const struct inode *dir);
  51430. +
  51431. +int de_rem_and_shrink(struct inode *dir, coord_t * coord, int length);
  51432. +
  51433. +char *extract_dent_name(const coord_t * coord,
  51434. + directory_entry_format * dent, char *buf);
  51435. +
  51436. +#if REISER4_LARGE_KEY
  51437. +#define DE_NAME_BUF_LEN (24)
  51438. +#else
  51439. +#define DE_NAME_BUF_LEN (16)
  51440. +#endif
  51441. +
  51442. +/* __FS_REISER4_PLUGIN_DIRECTORY_ENTRY_H__ */
  51443. +#endif
  51444. +
  51445. +/* Make Linus happy.
  51446. + Local variables:
  51447. + c-indentation-style: "K&R"
  51448. + mode-name: "LC"
  51449. + c-basic-offset: 8
  51450. + tab-width: 8
  51451. + fill-column: 120
  51452. + End:
  51453. +*/
  51454. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/item/static_stat.c linux-4.14.2/fs/reiser4/plugin/item/static_stat.c
  51455. --- linux-4.14.2.orig/fs/reiser4/plugin/item/static_stat.c 1970-01-01 01:00:00.000000000 +0100
  51456. +++ linux-4.14.2/fs/reiser4/plugin/item/static_stat.c 2017-11-26 22:13:09.000000000 +0100
  51457. @@ -0,0 +1,1114 @@
  51458. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  51459. +
  51460. +/* stat data manipulation. */
  51461. +
  51462. +#include "../../forward.h"
  51463. +#include "../../super.h"
  51464. +#include "../../vfs_ops.h"
  51465. +#include "../../inode.h"
  51466. +#include "../../debug.h"
  51467. +#include "../../dformat.h"
  51468. +#include "../object.h"
  51469. +#include "../plugin.h"
  51470. +#include "../plugin_header.h"
  51471. +#include "static_stat.h"
  51472. +#include "item.h"
  51473. +
  51474. +#include <linux/types.h>
  51475. +#include <linux/fs.h>
  51476. +
  51477. +/* see static_stat.h for explanation */
  51478. +
  51479. +/* helper function used while we are dumping/loading inode/plugin state
  51480. + to/from the stat-data. */
  51481. +
  51482. +static void move_on(int *length /* space remaining in stat-data */ ,
  51483. + char **area /* current coord in stat data */ ,
  51484. + int size_of /* how many bytes to move forward */ )
  51485. +{
  51486. + assert("nikita-615", length != NULL);
  51487. + assert("nikita-616", area != NULL);
  51488. +
  51489. + *length -= size_of;
  51490. + *area += size_of;
  51491. +
  51492. + assert("nikita-617", *length >= 0);
  51493. +}
  51494. +
  51495. +/* helper function used while loading inode/plugin state from stat-data.
  51496. + Complain if there is less space in stat-data than was expected.
  51497. + Can only happen on disk corruption. */
  51498. +static int not_enough_space(struct inode *inode /* object being processed */ ,
  51499. + const char *where /* error message */ )
  51500. +{
  51501. + assert("nikita-618", inode != NULL);
  51502. +
  51503. + warning("nikita-619", "Not enough space in %llu while loading %s",
  51504. + (unsigned long long)get_inode_oid(inode), where);
  51505. +
  51506. + return RETERR(-EINVAL);
  51507. +}
  51508. +
  51509. +/* helper function used while loading inode/plugin state from
  51510. + stat-data. Call it if invalid plugin id was found. */
  51511. +static int unknown_plugin(reiser4_plugin_id id /* invalid id */ ,
  51512. + struct inode *inode /* object being processed */ )
  51513. +{
  51514. + warning("nikita-620", "Unknown plugin %i in %llu",
  51515. + id, (unsigned long long)get_inode_oid(inode));
  51516. +
  51517. + return RETERR(-EINVAL);
  51518. +}
  51519. +
  51520. +/* this is installed as ->init_inode() method of
  51521. + item_plugins[ STATIC_STAT_DATA_IT ] (fs/reiser4/plugin/item/item.c).
  51522. + Copies data from on-disk stat-data format into inode.
  51523. + Handles stat-data extensions. */
  51524. +/* was sd_load */
  51525. +int init_inode_static_sd(struct inode *inode /* object being processed */ ,
  51526. + char *sd /* stat-data body */ ,
  51527. + int len /* length of stat-data */ )
  51528. +{
  51529. + int result;
  51530. + int bit;
  51531. + int chunk;
  51532. + __u16 mask;
  51533. + __u64 bigmask;
  51534. + reiser4_stat_data_base *sd_base;
  51535. + reiser4_inode *state;
  51536. +
  51537. + assert("nikita-625", inode != NULL);
  51538. + assert("nikita-626", sd != NULL);
  51539. +
  51540. + result = 0;
  51541. + sd_base = (reiser4_stat_data_base *) sd;
  51542. + state = reiser4_inode_data(inode);
  51543. + mask = le16_to_cpu(get_unaligned(&sd_base->extmask));
  51544. + bigmask = mask;
  51545. + reiser4_inode_set_flag(inode, REISER4_SDLEN_KNOWN);
  51546. +
  51547. + move_on(&len, &sd, sizeof *sd_base);
  51548. + for (bit = 0, chunk = 0;
  51549. + mask != 0 || bit <= LAST_IMPORTANT_SD_EXTENSION;
  51550. + ++bit, mask >>= 1) {
  51551. + if (((bit + 1) % 16) != 0) {
  51552. + /* handle extension */
  51553. + sd_ext_plugin *sdplug;
  51554. +
  51555. + if (bit >= LAST_SD_EXTENSION) {
  51556. + warning("vpf-1904",
  51557. + "No such extension %i in inode %llu",
  51558. + bit,
  51559. + (unsigned long long)
  51560. + get_inode_oid(inode));
  51561. +
  51562. + result = RETERR(-EINVAL);
  51563. + break;
  51564. + }
  51565. +
  51566. + sdplug = sd_ext_plugin_by_id(bit);
  51567. + if (sdplug == NULL) {
  51568. + warning("nikita-627",
  51569. + "No such extension %i in inode %llu",
  51570. + bit,
  51571. + (unsigned long long)
  51572. + get_inode_oid(inode));
  51573. +
  51574. + result = RETERR(-EINVAL);
  51575. + break;
  51576. + }
  51577. + if (mask & 1) {
  51578. + assert("nikita-628", sdplug->present);
  51579. + /* alignment is not supported in node layout
  51580. + plugin yet.
  51581. + result = align( inode, &len, &sd,
  51582. + sdplug -> alignment );
  51583. + if( result != 0 )
  51584. + return result; */
  51585. + result = sdplug->present(inode, &sd, &len);
  51586. + } else if (sdplug->absent != NULL)
  51587. + result = sdplug->absent(inode);
  51588. + if (result)
  51589. + break;
  51590. + /* else, we are looking at the last bit in 16-bit
  51591. + portion of bitmask */
  51592. + } else if (mask & 1) {
  51593. + /* next portion of bitmask */
  51594. + if (len < (int)sizeof(d16)) {
  51595. + warning("nikita-629",
  51596. + "No space for bitmap in inode %llu",
  51597. + (unsigned long long)
  51598. + get_inode_oid(inode));
  51599. +
  51600. + result = RETERR(-EINVAL);
  51601. + break;
  51602. + }
  51603. + mask = le16_to_cpu(get_unaligned((d16 *)sd));
  51604. + bigmask <<= 16;
  51605. + bigmask |= mask;
  51606. + move_on(&len, &sd, sizeof(d16));
  51607. + ++chunk;
  51608. + if (chunk == 3) {
  51609. + if (!(mask & 0x8000)) {
  51610. + /* clear last bit */
  51611. + mask &= ~0x8000;
  51612. + continue;
  51613. + }
  51614. + /* too much */
  51615. + warning("nikita-630",
  51616. + "Too many extensions in %llu",
  51617. + (unsigned long long)
  51618. + get_inode_oid(inode));
  51619. +
  51620. + result = RETERR(-EINVAL);
  51621. + break;
  51622. + }
  51623. + } else
  51624. + /* bitmask exhausted */
  51625. + break;
  51626. + }
  51627. + state->extmask = bigmask;
  51628. + /* common initialisations */
  51629. + if (len - (bit / 16 * sizeof(d16)) > 0) {
  51630. + /* alignment in save_len_static_sd() is taken into account
  51631. + -edward */
  51632. + warning("nikita-631", "unused space in inode %llu",
  51633. + (unsigned long long)get_inode_oid(inode));
  51634. + }
  51635. +
  51636. + return result;
  51637. +}
  51638. +
  51639. +/* estimates size of stat-data required to store inode.
  51640. + Installed as ->save_len() method of
  51641. + item_plugins[ STATIC_STAT_DATA_IT ] (fs/reiser4/plugin/item/item.c). */
  51642. +/* was sd_len */
  51643. +int save_len_static_sd(struct inode *inode /* object being processed */ )
  51644. +{
  51645. + unsigned int result;
  51646. + __u64 mask;
  51647. + int bit;
  51648. +
  51649. + assert("nikita-632", inode != NULL);
  51650. +
  51651. + result = sizeof(reiser4_stat_data_base);
  51652. + mask = reiser4_inode_data(inode)->extmask;
  51653. + for (bit = 0; mask != 0; ++bit, mask >>= 1) {
  51654. + if (mask & 1) {
  51655. + sd_ext_plugin *sdplug;
  51656. +
  51657. + sdplug = sd_ext_plugin_by_id(bit);
  51658. + assert("nikita-633", sdplug != NULL);
  51659. + /*
  51660. + no aligment support
  51661. + result +=
  51662. + reiser4_round_up(result, sdplug -> alignment) -
  51663. + result;
  51664. + */
  51665. + result += sdplug->save_len(inode);
  51666. + }
  51667. + }
  51668. + result += bit / 16 * sizeof(d16);
  51669. + return result;
  51670. +}
  51671. +
  51672. +/* saves inode into stat-data.
  51673. + Installed as ->save() method of
  51674. + item_plugins[ STATIC_STAT_DATA_IT ] (fs/reiser4/plugin/item/item.c). */
  51675. +/* was sd_save */
  51676. +int save_static_sd(struct inode *inode /* object being processed */ ,
  51677. + char **area /* where to save stat-data */ )
  51678. +{
  51679. + int result;
  51680. + __u64 emask;
  51681. + int bit;
  51682. + unsigned int len;
  51683. + reiser4_stat_data_base *sd_base;
  51684. +
  51685. + assert("nikita-634", inode != NULL);
  51686. + assert("nikita-635", area != NULL);
  51687. +
  51688. + result = 0;
  51689. + emask = reiser4_inode_data(inode)->extmask;
  51690. + sd_base = (reiser4_stat_data_base *) * area;
  51691. + put_unaligned(cpu_to_le16((__u16)(emask & 0xffff)), &sd_base->extmask);
  51692. + /*cputod16((unsigned)(emask & 0xffff), &sd_base->extmask);*/
  51693. +
  51694. + *area += sizeof *sd_base;
  51695. + len = 0xffffffffu;
  51696. + for (bit = 0; emask != 0; ++bit, emask >>= 1) {
  51697. + if (emask & 1) {
  51698. + if ((bit + 1) % 16 != 0) {
  51699. + sd_ext_plugin *sdplug;
  51700. + sdplug = sd_ext_plugin_by_id(bit);
  51701. + assert("nikita-636", sdplug != NULL);
  51702. + /* no alignment support yet
  51703. + align( inode, &len, area,
  51704. + sdplug -> alignment ); */
  51705. + result = sdplug->save(inode, area);
  51706. + if (result)
  51707. + break;
  51708. + } else {
  51709. + put_unaligned(cpu_to_le16((__u16)(emask & 0xffff)),
  51710. + (d16 *)(*area));
  51711. + /*cputod16((unsigned)(emask & 0xffff),
  51712. + (d16 *) * area);*/
  51713. + *area += sizeof(d16);
  51714. + }
  51715. + }
  51716. + }
  51717. + return result;
  51718. +}
  51719. +
  51720. +/* stat-data extension handling functions. */
  51721. +
  51722. +static int present_lw_sd(struct inode *inode /* object being processed */ ,
  51723. + char **area /* position in stat-data */ ,
  51724. + int *len /* remaining length */ )
  51725. +{
  51726. + if (*len >= (int)sizeof(reiser4_light_weight_stat)) {
  51727. + reiser4_light_weight_stat *sd_lw;
  51728. +
  51729. + sd_lw = (reiser4_light_weight_stat *) * area;
  51730. +
  51731. + inode->i_mode = le16_to_cpu(get_unaligned(&sd_lw->mode));
  51732. + set_nlink(inode, le32_to_cpu(get_unaligned(&sd_lw->nlink)));
  51733. + inode->i_size = le64_to_cpu(get_unaligned(&sd_lw->size));
  51734. + if ((inode->i_mode & S_IFMT) == (S_IFREG | S_IFIFO)) {
  51735. + inode->i_mode &= ~S_IFIFO;
  51736. + warning("", "partially converted file is encountered");
  51737. + reiser4_inode_set_flag(inode, REISER4_PART_MIXED);
  51738. + }
  51739. + move_on(len, area, sizeof *sd_lw);
  51740. + return 0;
  51741. + } else
  51742. + return not_enough_space(inode, "lw sd");
  51743. +}
  51744. +
  51745. +static int save_len_lw_sd(struct inode *inode UNUSED_ARG /* object being
  51746. + * processed */ )
  51747. +{
  51748. + return sizeof(reiser4_light_weight_stat);
  51749. +}
  51750. +
  51751. +static int save_lw_sd(struct inode *inode /* object being processed */ ,
  51752. + char **area /* position in stat-data */ )
  51753. +{
  51754. + reiser4_light_weight_stat *sd;
  51755. + mode_t delta;
  51756. +
  51757. + assert("nikita-2705", inode != NULL);
  51758. + assert("nikita-2706", area != NULL);
  51759. + assert("nikita-2707", *area != NULL);
  51760. +
  51761. + sd = (reiser4_light_weight_stat *) * area;
  51762. +
  51763. + delta = (reiser4_inode_get_flag(inode,
  51764. + REISER4_PART_MIXED) ? S_IFIFO : 0);
  51765. + put_unaligned(cpu_to_le16(inode->i_mode | delta), &sd->mode);
  51766. + put_unaligned(cpu_to_le32(inode->i_nlink), &sd->nlink);
  51767. + put_unaligned(cpu_to_le64((__u64) inode->i_size), &sd->size);
  51768. + *area += sizeof *sd;
  51769. + return 0;
  51770. +}
  51771. +
  51772. +static int present_unix_sd(struct inode *inode /* object being processed */ ,
  51773. + char **area /* position in stat-data */ ,
  51774. + int *len /* remaining length */ )
  51775. +{
  51776. + assert("nikita-637", inode != NULL);
  51777. + assert("nikita-638", area != NULL);
  51778. + assert("nikita-639", *area != NULL);
  51779. + assert("nikita-640", len != NULL);
  51780. + assert("nikita-641", *len > 0);
  51781. +
  51782. + if (*len >= (int)sizeof(reiser4_unix_stat)) {
  51783. + reiser4_unix_stat *sd;
  51784. +
  51785. + sd = (reiser4_unix_stat *) * area;
  51786. +
  51787. + i_uid_write(inode, le32_to_cpu(get_unaligned(&sd->uid)));
  51788. + i_gid_write(inode, le32_to_cpu(get_unaligned(&sd->gid)));
  51789. + inode->i_atime.tv_sec = le32_to_cpu(get_unaligned(&sd->atime));
  51790. + inode->i_mtime.tv_sec = le32_to_cpu(get_unaligned(&sd->mtime));
  51791. + inode->i_ctime.tv_sec = le32_to_cpu(get_unaligned(&sd->ctime));
  51792. + if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode))
  51793. + inode->i_rdev = le64_to_cpu(get_unaligned(&sd->u.rdev));
  51794. + else
  51795. + inode_set_bytes(inode, (loff_t) le64_to_cpu(get_unaligned(&sd->u.bytes)));
  51796. + move_on(len, area, sizeof *sd);
  51797. + return 0;
  51798. + } else
  51799. + return not_enough_space(inode, "unix sd");
  51800. +}
  51801. +
  51802. +static int absent_unix_sd(struct inode *inode /* object being processed */ )
  51803. +{
  51804. + i_uid_write(inode, get_super_private(inode->i_sb)->default_uid);
  51805. + i_gid_write(inode, get_super_private(inode->i_sb)->default_gid);
  51806. + inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
  51807. + inode_set_bytes(inode, inode->i_size);
  51808. + /* mark inode as lightweight, so that caller (lookup_common) will
  51809. + complete initialisation by copying [ug]id from a parent. */
  51810. + reiser4_inode_set_flag(inode, REISER4_LIGHT_WEIGHT);
  51811. + return 0;
  51812. +}
  51813. +
  51814. +/* Audited by: green(2002.06.14) */
  51815. +static int save_len_unix_sd(struct inode *inode UNUSED_ARG /* object being
  51816. + * processed */ )
  51817. +{
  51818. + return sizeof(reiser4_unix_stat);
  51819. +}
  51820. +
  51821. +static int save_unix_sd(struct inode *inode /* object being processed */ ,
  51822. + char **area /* position in stat-data */ )
  51823. +{
  51824. + reiser4_unix_stat *sd;
  51825. +
  51826. + assert("nikita-642", inode != NULL);
  51827. + assert("nikita-643", area != NULL);
  51828. + assert("nikita-644", *area != NULL);
  51829. +
  51830. + sd = (reiser4_unix_stat *) * area;
  51831. + put_unaligned(cpu_to_le32(i_uid_read(inode)), &sd->uid);
  51832. + put_unaligned(cpu_to_le32(i_gid_read(inode)), &sd->gid);
  51833. + put_unaligned(cpu_to_le32((__u32) inode->i_atime.tv_sec), &sd->atime);
  51834. + put_unaligned(cpu_to_le32((__u32) inode->i_ctime.tv_sec), &sd->ctime);
  51835. + put_unaligned(cpu_to_le32((__u32) inode->i_mtime.tv_sec), &sd->mtime);
  51836. + if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode))
  51837. + put_unaligned(cpu_to_le64(inode->i_rdev), &sd->u.rdev);
  51838. + else
  51839. + put_unaligned(cpu_to_le64((__u64) inode_get_bytes(inode)), &sd->u.bytes);
  51840. + *area += sizeof *sd;
  51841. + return 0;
  51842. +}
  51843. +
  51844. +static int
  51845. +present_large_times_sd(struct inode *inode /* object being processed */ ,
  51846. + char **area /* position in stat-data */ ,
  51847. + int *len /* remaining length */ )
  51848. +{
  51849. + if (*len >= (int)sizeof(reiser4_large_times_stat)) {
  51850. + reiser4_large_times_stat *sd_lt;
  51851. +
  51852. + sd_lt = (reiser4_large_times_stat *) * area;
  51853. +
  51854. + inode->i_atime.tv_nsec = le32_to_cpu(get_unaligned(&sd_lt->atime));
  51855. + inode->i_mtime.tv_nsec = le32_to_cpu(get_unaligned(&sd_lt->mtime));
  51856. + inode->i_ctime.tv_nsec = le32_to_cpu(get_unaligned(&sd_lt->ctime));
  51857. +
  51858. + move_on(len, area, sizeof *sd_lt);
  51859. + return 0;
  51860. + } else
  51861. + return not_enough_space(inode, "large times sd");
  51862. +}
  51863. +
  51864. +static int
  51865. +save_len_large_times_sd(struct inode *inode UNUSED_ARG
  51866. + /* object being processed */ )
  51867. +{
  51868. + return sizeof(reiser4_large_times_stat);
  51869. +}
  51870. +
  51871. +static int
  51872. +save_large_times_sd(struct inode *inode /* object being processed */ ,
  51873. + char **area /* position in stat-data */ )
  51874. +{
  51875. + reiser4_large_times_stat *sd;
  51876. +
  51877. + assert("nikita-2817", inode != NULL);
  51878. + assert("nikita-2818", area != NULL);
  51879. + assert("nikita-2819", *area != NULL);
  51880. +
  51881. + sd = (reiser4_large_times_stat *) * area;
  51882. +
  51883. + put_unaligned(cpu_to_le32((__u32) inode->i_atime.tv_nsec), &sd->atime);
  51884. + put_unaligned(cpu_to_le32((__u32) inode->i_ctime.tv_nsec), &sd->ctime);
  51885. + put_unaligned(cpu_to_le32((__u32) inode->i_mtime.tv_nsec), &sd->mtime);
  51886. +
  51887. + *area += sizeof *sd;
  51888. + return 0;
  51889. +}
  51890. +
  51891. +/* symlink stat data extension */
  51892. +
  51893. +/* allocate memory for symlink target and attach it to inode->i_private */
  51894. +static int
  51895. +symlink_target_to_inode(struct inode *inode, const char *target, int len)
  51896. +{
  51897. + assert("vs-845", inode->i_private == NULL);
  51898. + assert("vs-846", !reiser4_inode_get_flag(inode,
  51899. + REISER4_GENERIC_PTR_USED));
  51900. + /* FIXME-VS: this is prone to deadlock. Not more than other similar
  51901. + places, though */
  51902. + inode->i_private = kmalloc((size_t) len + 1,
  51903. + reiser4_ctx_gfp_mask_get());
  51904. + if (!inode->i_private)
  51905. + return RETERR(-ENOMEM);
  51906. +
  51907. + memcpy((char *)(inode->i_private), target, (size_t) len);
  51908. + ((char *)(inode->i_private))[len] = 0;
  51909. + reiser4_inode_set_flag(inode, REISER4_GENERIC_PTR_USED);
  51910. + return 0;
  51911. +}
  51912. +
  51913. +/* this is called on read_inode. There is nothing to do actually, but some
  51914. + sanity checks */
  51915. +static int present_symlink_sd(struct inode *inode, char **area, int *len)
  51916. +{
  51917. + int result;
  51918. + int length;
  51919. + reiser4_symlink_stat *sd;
  51920. +
  51921. + length = (int)inode->i_size;
  51922. + /*
  51923. + * *len is number of bytes in stat data item from *area to the end of
  51924. + * item. It must be not less than size of symlink + 1 for ending 0
  51925. + */
  51926. + if (length > *len)
  51927. + return not_enough_space(inode, "symlink");
  51928. +
  51929. + if (*(*area + length) != 0) {
  51930. + warning("vs-840", "Symlink is not zero terminated");
  51931. + return RETERR(-EIO);
  51932. + }
  51933. +
  51934. + sd = (reiser4_symlink_stat *) * area;
  51935. + result = symlink_target_to_inode(inode, sd->body, length);
  51936. +
  51937. + move_on(len, area, length + 1);
  51938. + return result;
  51939. +}
  51940. +
  51941. +static int save_len_symlink_sd(struct inode *inode)
  51942. +{
  51943. + return inode->i_size + 1;
  51944. +}
  51945. +
  51946. +/* this is called on create and update stat data. Do nothing on update but
  51947. + update @area */
  51948. +static int save_symlink_sd(struct inode *inode, char **area)
  51949. +{
  51950. + int result;
  51951. + int length;
  51952. + reiser4_symlink_stat *sd;
  51953. +
  51954. + length = (int)inode->i_size;
  51955. + /* inode->i_size must be set already */
  51956. + assert("vs-841", length);
  51957. +
  51958. + result = 0;
  51959. + sd = (reiser4_symlink_stat *) * area;
  51960. + if (!reiser4_inode_get_flag(inode, REISER4_GENERIC_PTR_USED)) {
  51961. + const char *target;
  51962. +
  51963. + target = (const char *)(inode->i_private);
  51964. + inode->i_private = NULL;
  51965. +
  51966. + result = symlink_target_to_inode(inode, target, length);
  51967. +
  51968. + /* copy symlink to stat data */
  51969. + memcpy(sd->body, target, (size_t) length);
  51970. + (*area)[length] = 0;
  51971. + } else {
  51972. + /* there is nothing to do in update but move area */
  51973. + assert("vs-844",
  51974. + !memcmp(inode->i_private, sd->body,
  51975. + (size_t) length + 1));
  51976. + }
  51977. +
  51978. + *area += (length + 1);
  51979. + return result;
  51980. +}
  51981. +
  51982. +static int present_flags_sd(struct inode *inode /* object being processed */ ,
  51983. + char **area /* position in stat-data */ ,
  51984. + int *len /* remaining length */ )
  51985. +{
  51986. + assert("nikita-645", inode != NULL);
  51987. + assert("nikita-646", area != NULL);
  51988. + assert("nikita-647", *area != NULL);
  51989. + assert("nikita-648", len != NULL);
  51990. + assert("nikita-649", *len > 0);
  51991. +
  51992. + if (*len >= (int)sizeof(reiser4_flags_stat)) {
  51993. + reiser4_flags_stat *sd;
  51994. +
  51995. + sd = (reiser4_flags_stat *) * area;
  51996. + inode->i_flags = le32_to_cpu(get_unaligned(&sd->flags));
  51997. + move_on(len, area, sizeof *sd);
  51998. + return 0;
  51999. + } else
  52000. + return not_enough_space(inode, "generation and attrs");
  52001. +}
  52002. +
  52003. +/* Audited by: green(2002.06.14) */
  52004. +static int save_len_flags_sd(struct inode *inode UNUSED_ARG /* object being
  52005. + * processed */ )
  52006. +{
  52007. + return sizeof(reiser4_flags_stat);
  52008. +}
  52009. +
  52010. +static int save_flags_sd(struct inode *inode /* object being processed */ ,
  52011. + char **area /* position in stat-data */ )
  52012. +{
  52013. + reiser4_flags_stat *sd;
  52014. +
  52015. + assert("nikita-650", inode != NULL);
  52016. + assert("nikita-651", area != NULL);
  52017. + assert("nikita-652", *area != NULL);
  52018. +
  52019. + sd = (reiser4_flags_stat *) * area;
  52020. + put_unaligned(cpu_to_le32(inode->i_flags), &sd->flags);
  52021. + *area += sizeof *sd;
  52022. + return 0;
  52023. +}
  52024. +
  52025. +static int absent_plugin_sd(struct inode *inode);
  52026. +static int present_plugin_sd(struct inode *inode /* object being processed */ ,
  52027. + char **area /* position in stat-data */ ,
  52028. + int *len /* remaining length */,
  52029. + int is_pset /* 1 if plugin set, 0 if heir set. */)
  52030. +{
  52031. + reiser4_plugin_stat *sd;
  52032. + reiser4_plugin *plugin;
  52033. + reiser4_inode *info;
  52034. + int i;
  52035. + __u16 mask;
  52036. + int result;
  52037. + int num_of_plugins;
  52038. +
  52039. + assert("nikita-653", inode != NULL);
  52040. + assert("nikita-654", area != NULL);
  52041. + assert("nikita-655", *area != NULL);
  52042. + assert("nikita-656", len != NULL);
  52043. + assert("nikita-657", *len > 0);
  52044. +
  52045. + if (*len < (int)sizeof(reiser4_plugin_stat))
  52046. + return not_enough_space(inode, "plugin");
  52047. +
  52048. + sd = (reiser4_plugin_stat *) * area;
  52049. + info = reiser4_inode_data(inode);
  52050. +
  52051. + mask = 0;
  52052. + num_of_plugins = le16_to_cpu(get_unaligned(&sd->plugins_no));
  52053. + move_on(len, area, sizeof *sd);
  52054. + result = 0;
  52055. + for (i = 0; i < num_of_plugins; ++i) {
  52056. + reiser4_plugin_slot *slot;
  52057. + reiser4_plugin_type type;
  52058. + pset_member memb;
  52059. +
  52060. + slot = (reiser4_plugin_slot *) * area;
  52061. + if (*len < (int)sizeof *slot)
  52062. + return not_enough_space(inode, "additional plugin");
  52063. +
  52064. + memb = le16_to_cpu(get_unaligned(&slot->pset_memb));
  52065. + type = aset_member_to_type_unsafe(memb);
  52066. +
  52067. + if (type == REISER4_PLUGIN_TYPES) {
  52068. + warning("nikita-3502",
  52069. + "wrong %s member (%i) for %llu", is_pset ?
  52070. + "pset" : "hset", memb,
  52071. + (unsigned long long)get_inode_oid(inode));
  52072. + return RETERR(-EINVAL);
  52073. + }
  52074. + plugin = plugin_by_disk_id(reiser4_tree_by_inode(inode),
  52075. + type, &slot->id);
  52076. + if (plugin == NULL)
  52077. + return unknown_plugin(le16_to_cpu(get_unaligned(&slot->id)), inode);
  52078. +
  52079. + /* plugin is loaded into inode, mark this into inode's
  52080. + bitmask of loaded non-standard plugins */
  52081. + if (!(mask & (1 << memb))) {
  52082. + mask |= (1 << memb);
  52083. + } else {
  52084. + warning("nikita-658", "duplicate plugin for %llu",
  52085. + (unsigned long long)get_inode_oid(inode));
  52086. + return RETERR(-EINVAL);
  52087. + }
  52088. + move_on(len, area, sizeof *slot);
  52089. + /* load plugin data, if any */
  52090. + if (plugin->h.pops != NULL && plugin->h.pops->load)
  52091. + result = plugin->h.pops->load(inode, plugin, area, len);
  52092. + else
  52093. + result = aset_set_unsafe(is_pset ? &info->pset :
  52094. + &info->hset, memb, plugin);
  52095. + if (result)
  52096. + return result;
  52097. + }
  52098. + if (is_pset) {
  52099. + /* if object plugin wasn't loaded from stat-data, guess it by
  52100. + mode bits */
  52101. + plugin = file_plugin_to_plugin(inode_file_plugin(inode));
  52102. + if (plugin == NULL)
  52103. + result = absent_plugin_sd(inode);
  52104. + info->plugin_mask = mask;
  52105. + } else
  52106. + info->heir_mask = mask;
  52107. +
  52108. + return result;
  52109. +}
  52110. +
  52111. +static int present_pset_sd(struct inode *inode, char **area, int *len) {
  52112. + return present_plugin_sd(inode, area, len, 1 /* pset */);
  52113. +}
  52114. +
  52115. +/* Determine object plugin for @inode based on i_mode.
  52116. +
  52117. + Many objects in reiser4 file system are controlled by standard object
  52118. + plugins that emulate traditional unix objects: unix file, directory, symlink, fifo, and so on.
  52119. +
  52120. + For such files we don't explicitly store plugin id in object stat
  52121. + data. Rather required plugin is guessed from mode bits, where file "type"
  52122. + is encoded (see stat(2)).
  52123. +*/
  52124. +static int
  52125. +guess_plugin_by_mode(struct inode *inode /* object to guess plugins for */ )
  52126. +{
  52127. + int fplug_id;
  52128. + int dplug_id;
  52129. + reiser4_inode *info;
  52130. +
  52131. + assert("nikita-736", inode != NULL);
  52132. +
  52133. + dplug_id = fplug_id = -1;
  52134. +
  52135. + switch (inode->i_mode & S_IFMT) {
  52136. + case S_IFSOCK:
  52137. + case S_IFBLK:
  52138. + case S_IFCHR:
  52139. + case S_IFIFO:
  52140. + fplug_id = SPECIAL_FILE_PLUGIN_ID;
  52141. + break;
  52142. + case S_IFLNK:
  52143. + fplug_id = SYMLINK_FILE_PLUGIN_ID;
  52144. + break;
  52145. + case S_IFDIR:
  52146. + fplug_id = DIRECTORY_FILE_PLUGIN_ID;
  52147. + dplug_id = HASHED_DIR_PLUGIN_ID;
  52148. + break;
  52149. + default:
  52150. + warning("nikita-737", "wrong file mode: %o", inode->i_mode);
  52151. + return RETERR(-EIO);
  52152. + case S_IFREG:
  52153. + fplug_id = UNIX_FILE_PLUGIN_ID;
  52154. + break;
  52155. + }
  52156. + info = reiser4_inode_data(inode);
  52157. + set_plugin(&info->pset, PSET_FILE, (fplug_id >= 0) ?
  52158. + plugin_by_id(REISER4_FILE_PLUGIN_TYPE, fplug_id) : NULL);
  52159. + set_plugin(&info->pset, PSET_DIR, (dplug_id >= 0) ?
  52160. + plugin_by_id(REISER4_DIR_PLUGIN_TYPE, dplug_id) : NULL);
  52161. + return 0;
  52162. +}
  52163. +
  52164. +/* Audited by: green(2002.06.14) */
  52165. +static int absent_plugin_sd(struct inode *inode /* object being processed */ )
  52166. +{
  52167. + int result;
  52168. +
  52169. + assert("nikita-659", inode != NULL);
  52170. +
  52171. + result = guess_plugin_by_mode(inode);
  52172. + /* if mode was wrong, guess_plugin_by_mode() returns "regular file",
  52173. + but setup_inode_ops() will call make_bad_inode().
  52174. + Another, more logical but bit more complex solution is to add
  52175. + "bad-file plugin". */
  52176. + /* FIXME-VS: activate was called here */
  52177. + return result;
  52178. +}
  52179. +
  52180. +/* helper function for plugin_sd_save_len(): calculate how much space
  52181. + required to save state of given plugin */
  52182. +/* Audited by: green(2002.06.14) */
  52183. +static int len_for(reiser4_plugin * plugin /* plugin to save */ ,
  52184. + struct inode *inode /* object being processed */ ,
  52185. + pset_member memb,
  52186. + int len, int is_pset)
  52187. +{
  52188. + reiser4_inode *info;
  52189. + assert("nikita-661", inode != NULL);
  52190. +
  52191. + if (plugin == NULL)
  52192. + return len;
  52193. +
  52194. + info = reiser4_inode_data(inode);
  52195. + if (is_pset ?
  52196. + info->plugin_mask & (1 << memb) :
  52197. + info->heir_mask & (1 << memb)) {
  52198. + len += sizeof(reiser4_plugin_slot);
  52199. + if (plugin->h.pops && plugin->h.pops->save_len != NULL) {
  52200. + /*
  52201. + * non-standard plugin, call method
  52202. + * commented as it is incompatible with alignment
  52203. + * policy in save_plug() -edward
  52204. + *
  52205. + * len = reiser4_round_up(len,
  52206. + * plugin->h.pops->alignment);
  52207. + */
  52208. + len += plugin->h.pops->save_len(inode, plugin);
  52209. + }
  52210. + }
  52211. + return len;
  52212. +}
  52213. +
  52214. +/* calculate how much space is required to save state of all plugins,
  52215. + associated with inode */
  52216. +static int save_len_plugin_sd(struct inode *inode /* object being processed */,
  52217. + int is_pset)
  52218. +{
  52219. + int len;
  52220. + int last;
  52221. + reiser4_inode *state;
  52222. + pset_member memb;
  52223. +
  52224. + assert("nikita-663", inode != NULL);
  52225. +
  52226. + state = reiser4_inode_data(inode);
  52227. +
  52228. + /* common case: no non-standard plugins */
  52229. + if (is_pset ? state->plugin_mask == 0 : state->heir_mask == 0)
  52230. + return 0;
  52231. + len = sizeof(reiser4_plugin_stat);
  52232. + last = PSET_LAST;
  52233. +
  52234. + for (memb = 0; memb < last; ++memb) {
  52235. + len = len_for(aset_get(is_pset ? state->pset : state->hset, memb),
  52236. + inode, memb, len, is_pset);
  52237. + }
  52238. + assert("nikita-664", len > (int)sizeof(reiser4_plugin_stat));
  52239. + return len;
  52240. +}
  52241. +
  52242. +static int save_len_pset_sd(struct inode *inode) {
  52243. + return save_len_plugin_sd(inode, 1 /* pset */);
  52244. +}
  52245. +
  52246. +/* helper function for plugin_sd_save(): save plugin, associated with
  52247. + inode. */
  52248. +static int save_plug(reiser4_plugin * plugin /* plugin to save */ ,
  52249. + struct inode *inode /* object being processed */ ,
  52250. + int memb /* what element of pset is saved */ ,
  52251. + char **area /* position in stat-data */ ,
  52252. + int *count /* incremented if plugin were actually saved. */,
  52253. + int is_pset /* 1 for plugin set, 0 for heir set */)
  52254. +{
  52255. + reiser4_plugin_slot *slot;
  52256. + int fake_len;
  52257. + int result;
  52258. +
  52259. + assert("nikita-665", inode != NULL);
  52260. + assert("nikita-666", area != NULL);
  52261. + assert("nikita-667", *area != NULL);
  52262. +
  52263. + if (plugin == NULL)
  52264. + return 0;
  52265. +
  52266. + if (is_pset ?
  52267. + !(reiser4_inode_data(inode)->plugin_mask & (1 << memb)) :
  52268. + !(reiser4_inode_data(inode)->heir_mask & (1 << memb)))
  52269. + return 0;
  52270. + slot = (reiser4_plugin_slot *) * area;
  52271. + put_unaligned(cpu_to_le16(memb), &slot->pset_memb);
  52272. + put_unaligned(cpu_to_le16(plugin->h.id), &slot->id);
  52273. + fake_len = (int)0xffff;
  52274. + move_on(&fake_len, area, sizeof *slot);
  52275. + ++*count;
  52276. + result = 0;
  52277. + if (plugin->h.pops != NULL) {
  52278. + if (plugin->h.pops->save != NULL)
  52279. + result = plugin->h.pops->save(inode, plugin, area);
  52280. + }
  52281. + return result;
  52282. +}
  52283. +
  52284. +/* save state of all non-standard plugins associated with inode */
  52285. +static int save_plugin_sd(struct inode *inode /* object being processed */ ,
  52286. + char **area /* position in stat-data */,
  52287. + int is_pset /* 1 for pset, 0 for hset */)
  52288. +{
  52289. + int fake_len;
  52290. + int result = 0;
  52291. + int num_of_plugins;
  52292. + reiser4_plugin_stat *sd;
  52293. + reiser4_inode *state;
  52294. + pset_member memb;
  52295. +
  52296. + assert("nikita-669", inode != NULL);
  52297. + assert("nikita-670", area != NULL);
  52298. + assert("nikita-671", *area != NULL);
  52299. +
  52300. + state = reiser4_inode_data(inode);
  52301. + if (is_pset ? state->plugin_mask == 0 : state->heir_mask == 0)
  52302. + return 0;
  52303. + sd = (reiser4_plugin_stat *) * area;
  52304. + fake_len = (int)0xffff;
  52305. + move_on(&fake_len, area, sizeof *sd);
  52306. +
  52307. + num_of_plugins = 0;
  52308. + for (memb = 0; memb < PSET_LAST; ++memb) {
  52309. + result = save_plug(aset_get(is_pset ? state->pset : state->hset,
  52310. + memb),
  52311. + inode, memb, area, &num_of_plugins, is_pset);
  52312. + if (result != 0)
  52313. + break;
  52314. + }
  52315. +
  52316. + put_unaligned(cpu_to_le16((__u16)num_of_plugins), &sd->plugins_no);
  52317. + return result;
  52318. +}
  52319. +
  52320. +static int save_pset_sd(struct inode *inode, char **area) {
  52321. + return save_plugin_sd(inode, area, 1 /* pset */);
  52322. +}
  52323. +
  52324. +static int present_hset_sd(struct inode *inode, char **area, int *len) {
  52325. + return present_plugin_sd(inode, area, len, 0 /* hset */);
  52326. +}
  52327. +
  52328. +static int save_len_hset_sd(struct inode *inode) {
  52329. + return save_len_plugin_sd(inode, 0 /* pset */);
  52330. +}
  52331. +
  52332. +static int save_hset_sd(struct inode *inode, char **area) {
  52333. + return save_plugin_sd(inode, area, 0 /* hset */);
  52334. +}
  52335. +
  52336. +/* helper function for crypto_sd_present(), crypto_sd_save.
  52337. + Extract crypto info from stat-data and attach it to inode */
  52338. +static int extract_crypto_info (struct inode * inode,
  52339. + reiser4_crypto_stat * sd)
  52340. +{
  52341. + struct reiser4_crypto_info * info;
  52342. + assert("edward-11", !inode_crypto_info(inode));
  52343. + assert("edward-1413",
  52344. + !reiser4_inode_get_flag(inode, REISER4_CRYPTO_STAT_LOADED));
  52345. + /* create and attach a crypto-stat without secret key loaded */
  52346. + info = reiser4_alloc_crypto_info(inode);
  52347. + if (IS_ERR(info))
  52348. + return PTR_ERR(info);
  52349. + info->keysize = le16_to_cpu(get_unaligned(&sd->keysize));
  52350. + memcpy(info->keyid, sd->keyid, inode_digest_plugin(inode)->fipsize);
  52351. + reiser4_attach_crypto_info(inode, info);
  52352. + reiser4_inode_set_flag(inode, REISER4_CRYPTO_STAT_LOADED);
  52353. + return 0;
  52354. +}
  52355. +
  52356. +/* crypto stat-data extension */
  52357. +
  52358. +static int present_crypto_sd(struct inode *inode, char **area, int *len)
  52359. +{
  52360. + int result;
  52361. + reiser4_crypto_stat *sd;
  52362. + digest_plugin *dplug = inode_digest_plugin(inode);
  52363. +
  52364. + assert("edward-06", dplug != NULL);
  52365. + assert("edward-684", dplug->fipsize);
  52366. + assert("edward-07", area != NULL);
  52367. + assert("edward-08", *area != NULL);
  52368. + assert("edward-09", len != NULL);
  52369. + assert("edward-10", *len > 0);
  52370. +
  52371. + if (*len < (int)sizeof(reiser4_crypto_stat)) {
  52372. + return not_enough_space(inode, "crypto-sd");
  52373. + }
  52374. + /* *len is number of bytes in stat data item from *area to the end of
  52375. + item. It must be not less than size of this extension */
  52376. + assert("edward-75", sizeof(*sd) + dplug->fipsize <= *len);
  52377. +
  52378. + sd = (reiser4_crypto_stat *) * area;
  52379. + result = extract_crypto_info(inode, sd);
  52380. + move_on(len, area, sizeof(*sd) + dplug->fipsize);
  52381. +
  52382. + return result;
  52383. +}
  52384. +
  52385. +static int save_len_crypto_sd(struct inode *inode)
  52386. +{
  52387. + return sizeof(reiser4_crypto_stat) +
  52388. + inode_digest_plugin(inode)->fipsize;
  52389. +}
  52390. +
  52391. +static int save_crypto_sd(struct inode *inode, char **area)
  52392. +{
  52393. + int result = 0;
  52394. + reiser4_crypto_stat *sd;
  52395. + struct reiser4_crypto_info * info = inode_crypto_info(inode);
  52396. + digest_plugin *dplug = inode_digest_plugin(inode);
  52397. +
  52398. + assert("edward-12", dplug != NULL);
  52399. + assert("edward-13", area != NULL);
  52400. + assert("edward-14", *area != NULL);
  52401. + assert("edward-15", info != NULL);
  52402. + assert("edward-1414", info->keyid != NULL);
  52403. + assert("edward-1415", info->keysize != 0);
  52404. + assert("edward-76", reiser4_inode_data(inode) != NULL);
  52405. +
  52406. + if (!reiser4_inode_get_flag(inode, REISER4_CRYPTO_STAT_LOADED)) {
  52407. + /* file is just created */
  52408. + sd = (reiser4_crypto_stat *) *area;
  52409. + /* copy everything but private key to the disk stat-data */
  52410. + put_unaligned(cpu_to_le16(info->keysize), &sd->keysize);
  52411. + memcpy(sd->keyid, info->keyid, (size_t) dplug->fipsize);
  52412. + reiser4_inode_set_flag(inode, REISER4_CRYPTO_STAT_LOADED);
  52413. + }
  52414. + *area += (sizeof(*sd) + dplug->fipsize);
  52415. + return result;
  52416. +}
  52417. +
  52418. +static int eio(struct inode *inode, char **area, int *len)
  52419. +{
  52420. + return RETERR(-EIO);
  52421. +}
  52422. +
  52423. +sd_ext_plugin sd_ext_plugins[LAST_SD_EXTENSION] = {
  52424. + [LIGHT_WEIGHT_STAT] = {
  52425. + .h = {
  52426. + .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
  52427. + .id = LIGHT_WEIGHT_STAT,
  52428. + .pops = NULL,
  52429. + .label = "light-weight sd",
  52430. + .desc = "sd for light-weight files",
  52431. + .linkage = {NULL,NULL}
  52432. + },
  52433. + .present = present_lw_sd,
  52434. + .absent = NULL,
  52435. + .save_len = save_len_lw_sd,
  52436. + .save = save_lw_sd,
  52437. + .alignment = 8
  52438. + },
  52439. + [UNIX_STAT] = {
  52440. + .h = {
  52441. + .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
  52442. + .id = UNIX_STAT,
  52443. + .pops = NULL,
  52444. + .label = "unix-sd",
  52445. + .desc = "unix stat-data fields",
  52446. + .linkage = {NULL,NULL}
  52447. + },
  52448. + .present = present_unix_sd,
  52449. + .absent = absent_unix_sd,
  52450. + .save_len = save_len_unix_sd,
  52451. + .save = save_unix_sd,
  52452. + .alignment = 8
  52453. + },
  52454. + [LARGE_TIMES_STAT] = {
  52455. + .h = {
  52456. + .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
  52457. + .id = LARGE_TIMES_STAT,
  52458. + .pops = NULL,
  52459. + .label = "64time-sd",
  52460. + .desc = "nanosecond resolution for times",
  52461. + .linkage = {NULL,NULL}
  52462. + },
  52463. + .present = present_large_times_sd,
  52464. + .absent = NULL,
  52465. + .save_len = save_len_large_times_sd,
  52466. + .save = save_large_times_sd,
  52467. + .alignment = 8
  52468. + },
  52469. + [SYMLINK_STAT] = {
  52470. + /* stat data of symlink has this extension */
  52471. + .h = {
  52472. + .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
  52473. + .id = SYMLINK_STAT,
  52474. + .pops = NULL,
  52475. + .label = "symlink-sd",
  52476. + .desc =
  52477. + "stat data is appended with symlink name",
  52478. + .linkage = {NULL,NULL}
  52479. + },
  52480. + .present = present_symlink_sd,
  52481. + .absent = NULL,
  52482. + .save_len = save_len_symlink_sd,
  52483. + .save = save_symlink_sd,
  52484. + .alignment = 8
  52485. + },
  52486. + [PLUGIN_STAT] = {
  52487. + .h = {
  52488. + .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
  52489. + .id = PLUGIN_STAT,
  52490. + .pops = NULL,
  52491. + .label = "plugin-sd",
  52492. + .desc = "plugin stat-data fields",
  52493. + .linkage = {NULL,NULL}
  52494. + },
  52495. + .present = present_pset_sd,
  52496. + .absent = absent_plugin_sd,
  52497. + .save_len = save_len_pset_sd,
  52498. + .save = save_pset_sd,
  52499. + .alignment = 8
  52500. + },
  52501. + [HEIR_STAT] = {
  52502. + .h = {
  52503. + .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
  52504. + .id = HEIR_STAT,
  52505. + .pops = NULL,
  52506. + .label = "heir-plugin-sd",
  52507. + .desc = "heir plugin stat-data fields",
  52508. + .linkage = {NULL,NULL}
  52509. + },
  52510. + .present = present_hset_sd,
  52511. + .absent = NULL,
  52512. + .save_len = save_len_hset_sd,
  52513. + .save = save_hset_sd,
  52514. + .alignment = 8
  52515. + },
  52516. + [FLAGS_STAT] = {
  52517. + .h = {
  52518. + .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
  52519. + .id = FLAGS_STAT,
  52520. + .pops = NULL,
  52521. + .label = "flags-sd",
  52522. + .desc = "inode bit flags",
  52523. + .linkage = {NULL, NULL}
  52524. + },
  52525. + .present = present_flags_sd,
  52526. + .absent = NULL,
  52527. + .save_len = save_len_flags_sd,
  52528. + .save = save_flags_sd,
  52529. + .alignment = 8
  52530. + },
  52531. + [CAPABILITIES_STAT] = {
  52532. + .h = {
  52533. + .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
  52534. + .id = CAPABILITIES_STAT,
  52535. + .pops = NULL,
  52536. + .label = "capabilities-sd",
  52537. + .desc = "capabilities",
  52538. + .linkage = {NULL, NULL}
  52539. + },
  52540. + .present = eio,
  52541. + .absent = NULL,
  52542. + .save_len = save_len_flags_sd,
  52543. + .save = save_flags_sd,
  52544. + .alignment = 8
  52545. + },
  52546. + [CRYPTO_STAT] = {
  52547. + .h = {
  52548. + .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
  52549. + .id = CRYPTO_STAT,
  52550. + .pops = NULL,
  52551. + .label = "crypto-sd",
  52552. + .desc = "secret key size and id",
  52553. + .linkage = {NULL, NULL}
  52554. + },
  52555. + .present = present_crypto_sd,
  52556. + .absent = NULL,
  52557. + .save_len = save_len_crypto_sd,
  52558. + .save = save_crypto_sd,
  52559. + .alignment = 8
  52560. + }
  52561. +};
  52562. +
  52563. +/* Make Linus happy.
  52564. + Local variables:
  52565. + c-indentation-style: "K&R"
  52566. + mode-name: "LC"
  52567. + c-basic-offset: 8
  52568. + tab-width: 8
  52569. + fill-column: 120
  52570. + End:
  52571. +*/
  52572. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/item/static_stat.h linux-4.14.2/fs/reiser4/plugin/item/static_stat.h
  52573. --- linux-4.14.2.orig/fs/reiser4/plugin/item/static_stat.h 1970-01-01 01:00:00.000000000 +0100
  52574. +++ linux-4.14.2/fs/reiser4/plugin/item/static_stat.h 2017-11-26 22:13:09.000000000 +0100
  52575. @@ -0,0 +1,224 @@
  52576. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  52577. +
  52578. +/* This describes the static_stat item, used to hold all information needed by the stat() syscall.
  52579. +
  52580. +In the case where each file has not less than the fields needed by the
  52581. +stat() syscall, it is more compact to store those fields in this
  52582. +struct.
  52583. +
  52584. +If this item does not exist, then all stats are dynamically resolved.
  52585. +At the moment, we either resolve all stats dynamically or all of them
  52586. +statically. If you think this is not fully optimal, and the rest of
  52587. +reiser4 is working, then fix it...:-)
  52588. +
  52589. +*/
  52590. +
  52591. +#if !defined( __FS_REISER4_PLUGIN_ITEM_STATIC_STAT_H__ )
  52592. +#define __FS_REISER4_PLUGIN_ITEM_STATIC_STAT_H__
  52593. +
  52594. +#include "../../forward.h"
  52595. +#include "../../dformat.h"
  52596. +
  52597. +#include <linux/fs.h> /* for struct inode */
  52598. +
  52599. +/* Stat data layout: goals and implementation.
  52600. +
  52601. + We want to be able to have lightweight files which have complete flexibility in what semantic metadata is attached to
  52602. + them, including not having semantic metadata attached to them.
  52603. +
  52604. + There is one problem with doing that, which is that if in fact you have exactly the same metadata for most files you
  52605. + want to store, then it takes more space to store that metadata in a dynamically sized structure than in a statically
  52606. + sized structure because the statically sized structure knows without recording it what the names and lengths of the
  52607. + attributes are.
  52608. +
  52609. + This leads to a natural compromise, which is to special case those files which have simply the standard unix file
  52610. + attributes, and only employ the full dynamic stat data mechanism for those files that differ from the standard unix
  52611. + file in their use of file attributes.
  52612. +
  52613. + Yet this compromise deserves to be compromised a little.
  52614. +
  52615. + We accommodate the case where you have no more than the standard unix file attributes by using an "extension
  52616. + bitmask": each bit in it indicates presence or absence of or particular stat data extension (see sd_ext_bits enum).
  52617. +
  52618. + If the first bit of the extension bitmask bit is 0, we have light-weight file whose attributes are either inherited
  52619. + from parent directory (as uid, gid) or initialised to some sane values.
  52620. +
  52621. + To capitalize on existing code infrastructure, extensions are
  52622. + implemented as plugins of type REISER4_SD_EXT_PLUGIN_TYPE.
  52623. + Each stat-data extension plugin implements four methods:
  52624. +
  52625. + ->present() called by sd_load() when this extension is found in stat-data
  52626. + ->absent() called by sd_load() when this extension is not found in stat-data
  52627. + ->save_len() called by sd_len() to calculate total length of stat-data
  52628. + ->save() called by sd_save() to store extension data into stat-data
  52629. +
  52630. + Implementation is in fs/reiser4/plugin/item/static_stat.c
  52631. +*/
  52632. +
  52633. +/* stat-data extension. Please order this by presumed frequency of use */
  52634. +typedef enum {
  52635. + /* support for light-weight files */
  52636. + LIGHT_WEIGHT_STAT,
  52637. + /* data required to implement unix stat(2) call. Layout is in
  52638. + reiser4_unix_stat. If this is not present, file is light-weight */
  52639. + UNIX_STAT,
  52640. + /* this contains additional set of 32bit [anc]time fields to implement
  52641. + nanosecond resolution. Layout is in reiser4_large_times_stat. Usage
  52642. + if this extension is governed by 32bittimes mount option. */
  52643. + LARGE_TIMES_STAT,
  52644. + /* stat data has link name included */
  52645. + SYMLINK_STAT,
  52646. + /* on-disk slots of non-standard plugins for main plugin table
  52647. + (@reiser4_inode->pset), that is, plugins that cannot be deduced
  52648. + from file mode bits), for example, aggregation, interpolation etc. */
  52649. + PLUGIN_STAT,
  52650. + /* this extension contains persistent inode flags. These flags are
  52651. + single bits: immutable, append, only, etc. Layout is in
  52652. + reiser4_flags_stat. */
  52653. + FLAGS_STAT,
  52654. + /* this extension contains capabilities sets, associated with this
  52655. + file. Layout is in reiser4_capabilities_stat */
  52656. + CAPABILITIES_STAT,
  52657. + /* this extension contains size and public id of the secret key.
  52658. + Layout is in reiser4_crypto_stat */
  52659. + CRYPTO_STAT,
  52660. + /* on-disk slots of non-default plugins for inheritance, which
  52661. + are extracted to special plugin table (@reiser4_inode->hset).
  52662. + By default, children of the object will inherit plugins from
  52663. + its main plugin table (pset). */
  52664. + HEIR_STAT,
  52665. + LAST_SD_EXTENSION,
  52666. + /*
  52667. + * init_inode_static_sd() iterates over extension mask until all
  52668. + * non-zero bits are processed. This means, that neither ->present(),
  52669. + * nor ->absent() methods will be called for stat-data extensions that
  52670. + * go after last present extension. But some basic extensions, we want
  52671. + * either ->absent() or ->present() method to be called, because these
  52672. + * extensions set up something in inode even when they are not
  52673. + * present. This is what LAST_IMPORTANT_SD_EXTENSION is for: for all
  52674. + * extensions before and including LAST_IMPORTANT_SD_EXTENSION either
  52675. + * ->present(), or ->absent() method will be called, independently of
  52676. + * what other extensions are present.
  52677. + */
  52678. + LAST_IMPORTANT_SD_EXTENSION = PLUGIN_STAT
  52679. +} sd_ext_bits;
  52680. +
  52681. +/* minimal stat-data. This allows to support light-weight files. */
  52682. +typedef struct reiser4_stat_data_base {
  52683. + /* 0 */ __le16 extmask;
  52684. + /* 2 */
  52685. +} PACKED reiser4_stat_data_base;
  52686. +
  52687. +typedef struct reiser4_light_weight_stat {
  52688. + /* 0 */ __le16 mode;
  52689. + /* 2 */ __le32 nlink;
  52690. + /* 6 */ __le64 size;
  52691. + /* size in bytes */
  52692. + /* 14 */
  52693. +} PACKED reiser4_light_weight_stat;
  52694. +
  52695. +typedef struct reiser4_unix_stat {
  52696. + /* owner id */
  52697. + /* 0 */ __le32 uid;
  52698. + /* group id */
  52699. + /* 4 */ __le32 gid;
  52700. + /* access time */
  52701. + /* 8 */ __le32 atime;
  52702. + /* modification time */
  52703. + /* 12 */ __le32 mtime;
  52704. + /* change time */
  52705. + /* 16 */ __le32 ctime;
  52706. + union {
  52707. + /* minor:major for device files */
  52708. + /* 20 */ __le64 rdev;
  52709. + /* bytes used by file */
  52710. + /* 20 */ __le64 bytes;
  52711. + } u;
  52712. + /* 28 */
  52713. +} PACKED reiser4_unix_stat;
  52714. +
  52715. +/* symlink stored as part of inode */
  52716. +typedef struct reiser4_symlink_stat {
  52717. + char body[0];
  52718. +} PACKED reiser4_symlink_stat;
  52719. +
  52720. +typedef struct reiser4_plugin_slot {
  52721. + /* 0 */ __le16 pset_memb;
  52722. + /* 2 */ __le16 id;
  52723. + /* 4 *//* here plugin stores its persistent state */
  52724. +} PACKED reiser4_plugin_slot;
  52725. +
  52726. +/* stat-data extension for files with non-standard plugin. */
  52727. +typedef struct reiser4_plugin_stat {
  52728. + /* number of additional plugins, associated with this object */
  52729. + /* 0 */ __le16 plugins_no;
  52730. + /* 2 */ reiser4_plugin_slot slot[0];
  52731. + /* 2 */
  52732. +} PACKED reiser4_plugin_stat;
  52733. +
  52734. +/* stat-data extension for inode flags. Currently it is just fixed-width 32
  52735. + * bit mask. If need arise, this can be replaced with variable width
  52736. + * bitmask. */
  52737. +typedef struct reiser4_flags_stat {
  52738. + /* 0 */ __le32 flags;
  52739. + /* 4 */
  52740. +} PACKED reiser4_flags_stat;
  52741. +
  52742. +typedef struct reiser4_capabilities_stat {
  52743. + /* 0 */ __le32 effective;
  52744. + /* 8 */ __le32 permitted;
  52745. + /* 16 */
  52746. +} PACKED reiser4_capabilities_stat;
  52747. +
  52748. +typedef struct reiser4_cluster_stat {
  52749. +/* this defines cluster size (an attribute of cryptcompress objects) as PAGE_SIZE << cluster shift */
  52750. + /* 0 */ d8 cluster_shift;
  52751. + /* 1 */
  52752. +} PACKED reiser4_cluster_stat;
  52753. +
  52754. +typedef struct reiser4_crypto_stat {
  52755. + /* secret key size, bits */
  52756. + /* 0 */ d16 keysize;
  52757. + /* secret key id */
  52758. + /* 2 */ d8 keyid[0];
  52759. + /* 2 */
  52760. +} PACKED reiser4_crypto_stat;
  52761. +
  52762. +typedef struct reiser4_large_times_stat {
  52763. + /* access time */
  52764. + /* 0 */ d32 atime;
  52765. + /* modification time */
  52766. + /* 4 */ d32 mtime;
  52767. + /* change time */
  52768. + /* 8 */ d32 ctime;
  52769. + /* 12 */
  52770. +} PACKED reiser4_large_times_stat;
  52771. +
  52772. +/* this structure is filled by sd_item_stat */
  52773. +typedef struct sd_stat {
  52774. + int dirs;
  52775. + int files;
  52776. + int others;
  52777. +} sd_stat;
  52778. +
  52779. +/* plugin->item.common.* */
  52780. +extern void print_sd(const char *prefix, coord_t * coord);
  52781. +extern void item_stat_static_sd(const coord_t * coord, void *vp);
  52782. +
  52783. +/* plugin->item.s.sd.* */
  52784. +extern int init_inode_static_sd(struct inode *inode, char *sd, int len);
  52785. +extern int save_len_static_sd(struct inode *inode);
  52786. +extern int save_static_sd(struct inode *inode, char **area);
  52787. +
  52788. +/* __FS_REISER4_PLUGIN_ITEM_STATIC_STAT_H__ */
  52789. +#endif
  52790. +
  52791. +/* Make Linus happy.
  52792. + Local variables:
  52793. + c-indentation-style: "K&R"
  52794. + mode-name: "LC"
  52795. + c-basic-offset: 8
  52796. + tab-width: 8
  52797. + fill-column: 120
  52798. + End:
  52799. +*/
  52800. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/item/tail.c linux-4.14.2/fs/reiser4/plugin/item/tail.c
  52801. --- linux-4.14.2.orig/fs/reiser4/plugin/item/tail.c 1970-01-01 01:00:00.000000000 +0100
  52802. +++ linux-4.14.2/fs/reiser4/plugin/item/tail.c 2017-11-26 22:13:09.000000000 +0100
  52803. @@ -0,0 +1,810 @@
  52804. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  52805. +
  52806. +#include "item.h"
  52807. +#include "../../inode.h"
  52808. +#include "../../page_cache.h"
  52809. +#include "../../carry.h"
  52810. +#include "../../vfs_ops.h"
  52811. +
  52812. +#include <asm/uaccess.h>
  52813. +#include <linux/swap.h>
  52814. +#include <linux/writeback.h>
  52815. +
  52816. +/* plugin->u.item.b.max_key_inside */
  52817. +reiser4_key *max_key_inside_tail(const coord_t *coord, reiser4_key *key)
  52818. +{
  52819. + item_key_by_coord(coord, key);
  52820. + set_key_offset(key, get_key_offset(reiser4_max_key()));
  52821. + return key;
  52822. +}
  52823. +
  52824. +/* plugin->u.item.b.can_contain_key */
  52825. +int can_contain_key_tail(const coord_t *coord, const reiser4_key *key,
  52826. + const reiser4_item_data *data)
  52827. +{
  52828. + reiser4_key item_key;
  52829. +
  52830. + if (item_plugin_by_coord(coord) != data->iplug)
  52831. + return 0;
  52832. +
  52833. + item_key_by_coord(coord, &item_key);
  52834. + if (get_key_locality(key) != get_key_locality(&item_key) ||
  52835. + get_key_objectid(key) != get_key_objectid(&item_key))
  52836. + return 0;
  52837. +
  52838. + return 1;
  52839. +}
  52840. +
  52841. +/* plugin->u.item.b.mergeable
  52842. + first item is of tail type */
  52843. +/* Audited by: green(2002.06.14) */
  52844. +int mergeable_tail(const coord_t *p1, const coord_t *p2)
  52845. +{
  52846. + reiser4_key key1, key2;
  52847. +
  52848. + assert("vs-535", plugin_of_group(item_plugin_by_coord(p1),
  52849. + UNIX_FILE_METADATA_ITEM_TYPE));
  52850. + assert("vs-365", item_id_by_coord(p1) == FORMATTING_ID);
  52851. +
  52852. + if (item_id_by_coord(p2) != FORMATTING_ID) {
  52853. + /* second item is of another type */
  52854. + return 0;
  52855. + }
  52856. +
  52857. + item_key_by_coord(p1, &key1);
  52858. + item_key_by_coord(p2, &key2);
  52859. + if (get_key_locality(&key1) != get_key_locality(&key2) ||
  52860. + get_key_objectid(&key1) != get_key_objectid(&key2)
  52861. + || get_key_type(&key1) != get_key_type(&key2)) {
  52862. + /* items of different objects */
  52863. + return 0;
  52864. + }
  52865. + if (get_key_offset(&key1) + nr_units_tail(p1) != get_key_offset(&key2)) {
  52866. + /* not adjacent items */
  52867. + return 0;
  52868. + }
  52869. + return 1;
  52870. +}
  52871. +
  52872. +/* plugin->u.item.b.print
  52873. + plugin->u.item.b.check */
  52874. +
  52875. +/* plugin->u.item.b.nr_units */
  52876. +pos_in_node_t nr_units_tail(const coord_t * coord)
  52877. +{
  52878. + return item_length_by_coord(coord);
  52879. +}
  52880. +
  52881. +/* plugin->u.item.b.lookup */
  52882. +lookup_result
  52883. +lookup_tail(const reiser4_key * key, lookup_bias bias, coord_t * coord)
  52884. +{
  52885. + reiser4_key item_key;
  52886. + __u64 lookuped, offset;
  52887. + unsigned nr_units;
  52888. +
  52889. + item_key_by_coord(coord, &item_key);
  52890. + offset = get_key_offset(item_key_by_coord(coord, &item_key));
  52891. + nr_units = nr_units_tail(coord);
  52892. +
  52893. + /* key we are looking for must be greater than key of item @coord */
  52894. + assert("vs-416", keygt(key, &item_key));
  52895. +
  52896. + /* offset we are looking for */
  52897. + lookuped = get_key_offset(key);
  52898. +
  52899. + if (lookuped >= offset && lookuped < offset + nr_units) {
  52900. + /* byte we are looking for is in this item */
  52901. + coord->unit_pos = lookuped - offset;
  52902. + coord->between = AT_UNIT;
  52903. + return CBK_COORD_FOUND;
  52904. + }
  52905. +
  52906. + /* set coord after last unit */
  52907. + coord->unit_pos = nr_units - 1;
  52908. + coord->between = AFTER_UNIT;
  52909. + return bias ==
  52910. + FIND_MAX_NOT_MORE_THAN ? CBK_COORD_FOUND : CBK_COORD_NOTFOUND;
  52911. +}
  52912. +
  52913. +/* plugin->u.item.b.paste */
  52914. +int
  52915. +paste_tail(coord_t *coord, reiser4_item_data *data,
  52916. + carry_plugin_info *info UNUSED_ARG)
  52917. +{
  52918. + unsigned old_item_length;
  52919. + char *item;
  52920. +
  52921. + /* length the item had before resizing has been performed */
  52922. + old_item_length = item_length_by_coord(coord) - data->length;
  52923. +
  52924. + /* tail items never get pasted in the middle */
  52925. + assert("vs-363",
  52926. + (coord->unit_pos == 0 && coord->between == BEFORE_UNIT) ||
  52927. + (coord->unit_pos == old_item_length - 1 &&
  52928. + coord->between == AFTER_UNIT) ||
  52929. + (coord->unit_pos == 0 && old_item_length == 0
  52930. + && coord->between == AT_UNIT));
  52931. +
  52932. + item = item_body_by_coord(coord);
  52933. + if (coord->unit_pos == 0)
  52934. + /* make space for pasted data when pasting at the beginning of
  52935. + the item */
  52936. + memmove(item + data->length, item, old_item_length);
  52937. +
  52938. + if (coord->between == AFTER_UNIT)
  52939. + coord->unit_pos++;
  52940. +
  52941. + if (data->data) {
  52942. + assert("vs-554", data->user == 0 || data->user == 1);
  52943. + if (data->user) {
  52944. + assert("nikita-3035", reiser4_schedulable());
  52945. + /* copy from user space */
  52946. + if (__copy_from_user(item + coord->unit_pos,
  52947. + (const char __user *)data->data,
  52948. + (unsigned)data->length))
  52949. + return RETERR(-EFAULT);
  52950. + } else
  52951. + /* copy from kernel space */
  52952. + memcpy(item + coord->unit_pos, data->data,
  52953. + (unsigned)data->length);
  52954. + } else {
  52955. + memset(item + coord->unit_pos, 0, (unsigned)data->length);
  52956. + }
  52957. + return 0;
  52958. +}
  52959. +
  52960. +/* plugin->u.item.b.fast_paste */
  52961. +
  52962. +/* plugin->u.item.b.can_shift
  52963. + number of units is returned via return value, number of bytes via @size. For
  52964. + tail items they coincide */
  52965. +int
  52966. +can_shift_tail(unsigned free_space, coord_t * source UNUSED_ARG,
  52967. + znode * target UNUSED_ARG, shift_direction direction UNUSED_ARG,
  52968. + unsigned *size, unsigned want)
  52969. +{
  52970. + /* make sure that that we do not want to shift more than we have */
  52971. + assert("vs-364", want > 0
  52972. + && want <= (unsigned)item_length_by_coord(source));
  52973. +
  52974. + *size = min(want, free_space);
  52975. + return *size;
  52976. +}
  52977. +
  52978. +/* plugin->u.item.b.copy_units */
  52979. +void
  52980. +copy_units_tail(coord_t * target, coord_t * source,
  52981. + unsigned from, unsigned count,
  52982. + shift_direction where_is_free_space,
  52983. + unsigned free_space UNUSED_ARG)
  52984. +{
  52985. + /* make sure that item @target is expanded already */
  52986. + assert("vs-366", (unsigned)item_length_by_coord(target) >= count);
  52987. + assert("vs-370", free_space >= count);
  52988. +
  52989. + if (where_is_free_space == SHIFT_LEFT) {
  52990. + /* append item @target with @count first bytes of @source */
  52991. + assert("vs-365", from == 0);
  52992. +
  52993. + memcpy((char *)item_body_by_coord(target) +
  52994. + item_length_by_coord(target) - count,
  52995. + (char *)item_body_by_coord(source), count);
  52996. + } else {
  52997. + /* target item is moved to right already */
  52998. + reiser4_key key;
  52999. +
  53000. + assert("vs-367",
  53001. + (unsigned)item_length_by_coord(source) == from + count);
  53002. +
  53003. + memcpy((char *)item_body_by_coord(target),
  53004. + (char *)item_body_by_coord(source) + from, count);
  53005. +
  53006. + /* new units are inserted before first unit in an item,
  53007. + therefore, we have to update item key */
  53008. + item_key_by_coord(source, &key);
  53009. + set_key_offset(&key, get_key_offset(&key) + from);
  53010. +
  53011. + node_plugin_by_node(target->node)->update_item_key(target, &key,
  53012. + NULL /*info */);
  53013. + }
  53014. +}
  53015. +
  53016. +/* plugin->u.item.b.create_hook */
  53017. +
  53018. +/* item_plugin->b.kill_hook
  53019. + this is called when @count units starting from @from-th one are going to be removed
  53020. + */
  53021. +int
  53022. +kill_hook_tail(const coord_t * coord, pos_in_node_t from,
  53023. + pos_in_node_t count, struct carry_kill_data *kdata)
  53024. +{
  53025. + reiser4_key key;
  53026. + loff_t start, end;
  53027. +
  53028. + assert("vs-1577", kdata);
  53029. + assert("vs-1579", kdata->inode);
  53030. +
  53031. + item_key_by_coord(coord, &key);
  53032. + start = get_key_offset(&key) + from;
  53033. + end = start + count;
  53034. + fake_kill_hook_tail(kdata->inode, start, end, kdata->params.truncate);
  53035. + return 0;
  53036. +}
  53037. +
  53038. +/* plugin->u.item.b.shift_hook */
  53039. +
  53040. +/* helper for kill_units_tail and cut_units_tail */
  53041. +static int
  53042. +do_cut_or_kill(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
  53043. + reiser4_key * smallest_removed, reiser4_key * new_first)
  53044. +{
  53045. + pos_in_node_t count;
  53046. +
  53047. + /* this method is only called to remove part of item */
  53048. + assert("vs-374", (to - from + 1) < item_length_by_coord(coord));
  53049. + /* tails items are never cut from the middle of an item */
  53050. + assert("vs-396", ergo(from != 0, to == coord_last_unit_pos(coord)));
  53051. + assert("vs-1558", ergo(from == 0, to < coord_last_unit_pos(coord)));
  53052. +
  53053. + count = to - from + 1;
  53054. +
  53055. + if (smallest_removed) {
  53056. + /* store smallest key removed */
  53057. + item_key_by_coord(coord, smallest_removed);
  53058. + set_key_offset(smallest_removed,
  53059. + get_key_offset(smallest_removed) + from);
  53060. + }
  53061. + if (new_first) {
  53062. + /* head of item is cut */
  53063. + assert("vs-1529", from == 0);
  53064. +
  53065. + item_key_by_coord(coord, new_first);
  53066. + set_key_offset(new_first,
  53067. + get_key_offset(new_first) + from + count);
  53068. + }
  53069. +
  53070. + if (REISER4_DEBUG)
  53071. + memset((char *)item_body_by_coord(coord) + from, 0, count);
  53072. + return count;
  53073. +}
  53074. +
  53075. +/* plugin->u.item.b.cut_units */
  53076. +int
  53077. +cut_units_tail(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
  53078. + struct carry_cut_data *cdata UNUSED_ARG,
  53079. + reiser4_key * smallest_removed, reiser4_key * new_first)
  53080. +{
  53081. + return do_cut_or_kill(coord, from, to, smallest_removed, new_first);
  53082. +}
  53083. +
  53084. +/* plugin->u.item.b.kill_units */
  53085. +int
  53086. +kill_units_tail(coord_t * coord, pos_in_node_t from, pos_in_node_t to,
  53087. + struct carry_kill_data *kdata, reiser4_key * smallest_removed,
  53088. + reiser4_key * new_first)
  53089. +{
  53090. + kill_hook_tail(coord, from, to - from + 1, kdata);
  53091. + return do_cut_or_kill(coord, from, to, smallest_removed, new_first);
  53092. +}
  53093. +
  53094. +/* plugin->u.item.b.unit_key */
  53095. +reiser4_key *unit_key_tail(const coord_t * coord, reiser4_key * key)
  53096. +{
  53097. + assert("vs-375", coord_is_existing_unit(coord));
  53098. +
  53099. + item_key_by_coord(coord, key);
  53100. + set_key_offset(key, (get_key_offset(key) + coord->unit_pos));
  53101. +
  53102. + return key;
  53103. +}
  53104. +
  53105. +/* plugin->u.item.b.estimate
  53106. + plugin->u.item.b.item_data_by_flow */
  53107. +
  53108. +/* tail redpage function. It is called from readpage_tail(). */
  53109. +static int do_readpage_tail(uf_coord_t *uf_coord, struct page *page)
  53110. +{
  53111. + tap_t tap;
  53112. + int result;
  53113. + coord_t coord;
  53114. + lock_handle lh;
  53115. + int count, mapped;
  53116. + struct inode *inode;
  53117. + char *pagedata;
  53118. +
  53119. + /* saving passed coord in order to do not move it by tap. */
  53120. + init_lh(&lh);
  53121. + copy_lh(&lh, uf_coord->lh);
  53122. + inode = page->mapping->host;
  53123. + coord_dup(&coord, &uf_coord->coord);
  53124. +
  53125. + reiser4_tap_init(&tap, &coord, &lh, ZNODE_READ_LOCK);
  53126. +
  53127. + if ((result = reiser4_tap_load(&tap)))
  53128. + goto out_tap_done;
  53129. +
  53130. + /* lookup until page is filled up. */
  53131. + for (mapped = 0; mapped < PAGE_SIZE; ) {
  53132. + /* number of bytes to be copied to page */
  53133. + count = item_length_by_coord(&coord) - coord.unit_pos;
  53134. + if (count > PAGE_SIZE - mapped)
  53135. + count = PAGE_SIZE - mapped;
  53136. +
  53137. + /* attach @page to address space and get data address */
  53138. + pagedata = kmap_atomic(page);
  53139. +
  53140. + /* copy tail item to page */
  53141. + memcpy(pagedata + mapped,
  53142. + ((char *)item_body_by_coord(&coord) + coord.unit_pos),
  53143. + count);
  53144. + mapped += count;
  53145. +
  53146. + flush_dcache_page(page);
  53147. +
  53148. + /* dettach page from address space */
  53149. + kunmap_atomic(pagedata);
  53150. +
  53151. + /* Getting next tail item. */
  53152. + if (mapped < PAGE_SIZE) {
  53153. + /*
  53154. + * unlock page in order to avoid keep it locked
  53155. + * during tree lookup, which takes long term locks
  53156. + */
  53157. + unlock_page(page);
  53158. +
  53159. + /* getting right neighbour. */
  53160. + result = go_dir_el(&tap, RIGHT_SIDE, 0);
  53161. +
  53162. + /* lock page back */
  53163. + lock_page(page);
  53164. + if (PageUptodate(page)) {
  53165. + /*
  53166. + * another thread read the page, we have
  53167. + * nothing to do
  53168. + */
  53169. + result = 0;
  53170. + goto out_unlock_page;
  53171. + }
  53172. +
  53173. + if (result) {
  53174. + if (result == -E_NO_NEIGHBOR) {
  53175. + /*
  53176. + * rigth neighbor is not a formatted
  53177. + * node
  53178. + */
  53179. + result = 0;
  53180. + goto done;
  53181. + } else {
  53182. + goto out_tap_relse;
  53183. + }
  53184. + } else {
  53185. + if (!inode_file_plugin(inode)->
  53186. + owns_item(inode, &coord)) {
  53187. + /* item of another file is found */
  53188. + result = 0;
  53189. + goto done;
  53190. + }
  53191. + }
  53192. + }
  53193. + }
  53194. +
  53195. + done:
  53196. + if (mapped != PAGE_SIZE)
  53197. + zero_user_segment(page, mapped, PAGE_SIZE);
  53198. + SetPageUptodate(page);
  53199. + out_unlock_page:
  53200. + unlock_page(page);
  53201. + out_tap_relse:
  53202. + reiser4_tap_relse(&tap);
  53203. + out_tap_done:
  53204. + reiser4_tap_done(&tap);
  53205. + return result;
  53206. +}
  53207. +
  53208. +/*
  53209. + * plugin->s.file.readpage
  53210. + *
  53211. + * reiser4_read_dispatch->read_unix_file->page_cache_readahead->
  53212. + * ->reiser4_readpage_dispatch->readpage_unix_file->readpage_tail
  53213. + * or
  53214. + * filemap_fault->reiser4_readpage_dispatch->readpage_unix_file->readpage_tail
  53215. + *
  53216. + * At the beginning: coord->node is read locked, zloaded, page is locked,
  53217. + * coord is set to existing unit inside of tail item.
  53218. + */
  53219. +int readpage_tail(void *vp, struct page *page)
  53220. +{
  53221. + uf_coord_t *uf_coord = vp;
  53222. + ON_DEBUG(coord_t * coord = &uf_coord->coord);
  53223. + ON_DEBUG(reiser4_key key);
  53224. +
  53225. + assert("umka-2515", PageLocked(page));
  53226. + assert("umka-2516", !PageUptodate(page));
  53227. + assert("umka-2517", !jprivate(page) && !PagePrivate(page));
  53228. + assert("umka-2518", page->mapping && page->mapping->host);
  53229. +
  53230. + assert("umka-2519", znode_is_loaded(coord->node));
  53231. + assert("umka-2520", item_is_tail(coord));
  53232. + assert("umka-2521", coord_is_existing_unit(coord));
  53233. + assert("umka-2522", znode_is_rlocked(coord->node));
  53234. + assert("umka-2523",
  53235. + page->mapping->host->i_ino ==
  53236. + get_key_objectid(item_key_by_coord(coord, &key)));
  53237. +
  53238. + return do_readpage_tail(uf_coord, page);
  53239. +}
  53240. +
  53241. +/**
  53242. + * overwrite_tail
  53243. + * @flow:
  53244. + * @coord:
  53245. + *
  53246. + * Overwrites tail item or its part by user data. Returns number of bytes
  53247. + * written or error code.
  53248. + */
  53249. +static int overwrite_tail(flow_t *flow, coord_t *coord)
  53250. +{
  53251. + unsigned count;
  53252. +
  53253. + assert("vs-570", flow->user == 1);
  53254. + assert("vs-946", flow->data);
  53255. + assert("vs-947", coord_is_existing_unit(coord));
  53256. + assert("vs-948", znode_is_write_locked(coord->node));
  53257. + assert("nikita-3036", reiser4_schedulable());
  53258. +
  53259. + count = item_length_by_coord(coord) - coord->unit_pos;
  53260. + if (count > flow->length)
  53261. + count = flow->length;
  53262. +
  53263. + if (__copy_from_user((char *)item_body_by_coord(coord) + coord->unit_pos,
  53264. + (const char __user *)flow->data, count))
  53265. + return RETERR(-EFAULT);
  53266. +
  53267. + znode_make_dirty(coord->node);
  53268. + return count;
  53269. +}
  53270. +
  53271. +/**
  53272. + * insert_first_tail
  53273. + * @inode:
  53274. + * @flow:
  53275. + * @coord:
  53276. + * @lh:
  53277. + *
  53278. + * Returns number of bytes written or error code.
  53279. + */
  53280. +static ssize_t insert_first_tail(struct inode *inode, flow_t *flow,
  53281. + coord_t *coord, lock_handle *lh)
  53282. +{
  53283. + int result;
  53284. + loff_t to_write;
  53285. + struct unix_file_info *uf_info;
  53286. +
  53287. + if (get_key_offset(&flow->key) != 0) {
  53288. + /*
  53289. + * file is empty and we have to write not to the beginning of
  53290. + * file. Create a hole at the beginning of file. On success
  53291. + * insert_flow returns 0 as number of written bytes which is
  53292. + * what we have to return on padding a file with holes
  53293. + */
  53294. + flow->data = NULL;
  53295. + flow->length = get_key_offset(&flow->key);
  53296. + set_key_offset(&flow->key, 0);
  53297. + /*
  53298. + * holes in files built of tails are stored just like if there
  53299. + * were real data which are all zeros.
  53300. + */
  53301. + inode_add_bytes(inode, flow->length);
  53302. + result = reiser4_insert_flow(coord, lh, flow);
  53303. + if (flow->length)
  53304. + inode_sub_bytes(inode, flow->length);
  53305. +
  53306. + uf_info = unix_file_inode_data(inode);
  53307. +
  53308. + /*
  53309. + * first item insertion is only possible when writing to empty
  53310. + * file or performing tail conversion
  53311. + */
  53312. + assert("", (uf_info->container == UF_CONTAINER_EMPTY ||
  53313. + (reiser4_inode_get_flag(inode,
  53314. + REISER4_PART_MIXED) &&
  53315. + reiser4_inode_get_flag(inode,
  53316. + REISER4_PART_IN_CONV))));
  53317. + /* if file was empty - update its state */
  53318. + if (result == 0 && uf_info->container == UF_CONTAINER_EMPTY)
  53319. + uf_info->container = UF_CONTAINER_TAILS;
  53320. + return result;
  53321. + }
  53322. +
  53323. + inode_add_bytes(inode, flow->length);
  53324. +
  53325. + to_write = flow->length;
  53326. + result = reiser4_insert_flow(coord, lh, flow);
  53327. + if (flow->length)
  53328. + inode_sub_bytes(inode, flow->length);
  53329. + return (to_write - flow->length) ? (to_write - flow->length) : result;
  53330. +}
  53331. +
  53332. +/**
  53333. + * append_tail
  53334. + * @inode:
  53335. + * @flow:
  53336. + * @coord:
  53337. + * @lh:
  53338. + *
  53339. + * Returns number of bytes written or error code.
  53340. + */
  53341. +static ssize_t append_tail(struct inode *inode,
  53342. + flow_t *flow, coord_t *coord, lock_handle *lh)
  53343. +{
  53344. + int result;
  53345. + reiser4_key append_key;
  53346. + loff_t to_write;
  53347. +
  53348. + if (!keyeq(&flow->key, append_key_tail(coord, &append_key))) {
  53349. + flow->data = NULL;
  53350. + flow->length = get_key_offset(&flow->key) - get_key_offset(&append_key);
  53351. + set_key_offset(&flow->key, get_key_offset(&append_key));
  53352. + /*
  53353. + * holes in files built of tails are stored just like if there
  53354. + * were real data which are all zeros.
  53355. + */
  53356. + inode_add_bytes(inode, flow->length);
  53357. + result = reiser4_insert_flow(coord, lh, flow);
  53358. + if (flow->length)
  53359. + inode_sub_bytes(inode, flow->length);
  53360. + return result;
  53361. + }
  53362. +
  53363. + inode_add_bytes(inode, flow->length);
  53364. +
  53365. + to_write = flow->length;
  53366. + result = reiser4_insert_flow(coord, lh, flow);
  53367. + if (flow->length)
  53368. + inode_sub_bytes(inode, flow->length);
  53369. + return (to_write - flow->length) ? (to_write - flow->length) : result;
  53370. +}
  53371. +
  53372. +/**
  53373. + * write_tail_reserve_space - reserve space for tail write operation
  53374. + * @inode:
  53375. + *
  53376. + * Estimates and reserves space which may be required for writing one flow to a
  53377. + * file
  53378. + */
  53379. +static int write_extent_reserve_space(struct inode *inode)
  53380. +{
  53381. + __u64 count;
  53382. + reiser4_tree *tree;
  53383. +
  53384. + /*
  53385. + * to write one flow to a file by tails we have to reserve disk space for:
  53386. +
  53387. + * 1. find_file_item may have to insert empty node to the tree (empty
  53388. + * leaf node between two extent items). This requires 1 block and
  53389. + * number of blocks which are necessary to perform insertion of an
  53390. + * internal item into twig level.
  53391. + *
  53392. + * 2. flow insertion
  53393. + *
  53394. + * 3. stat data update
  53395. + */
  53396. + tree = reiser4_tree_by_inode(inode);
  53397. + count = estimate_one_insert_item(tree) +
  53398. + estimate_insert_flow(tree->height) +
  53399. + estimate_one_insert_item(tree);
  53400. + grab_space_enable();
  53401. + return reiser4_grab_space(count, 0 /* flags */);
  53402. +}
  53403. +
  53404. +#define PAGE_PER_FLOW 4
  53405. +
  53406. +static loff_t faultin_user_pages(const char __user *buf, size_t count)
  53407. +{
  53408. + loff_t faulted;
  53409. + int to_fault;
  53410. +
  53411. + if (count > PAGE_PER_FLOW * PAGE_SIZE)
  53412. + count = PAGE_PER_FLOW * PAGE_SIZE;
  53413. + faulted = 0;
  53414. + while (count > 0) {
  53415. + to_fault = PAGE_SIZE;
  53416. + if (count < to_fault)
  53417. + to_fault = count;
  53418. + fault_in_pages_readable(buf + faulted, to_fault);
  53419. + count -= to_fault;
  53420. + faulted += to_fault;
  53421. + }
  53422. + return faulted;
  53423. +}
  53424. +
  53425. +ssize_t reiser4_write_tail_noreserve(struct file *file,
  53426. + struct inode * inode,
  53427. + const char __user *buf,
  53428. + size_t count, loff_t *pos)
  53429. +{
  53430. + struct hint hint;
  53431. + int result;
  53432. + flow_t flow;
  53433. + coord_t *coord;
  53434. + lock_handle *lh;
  53435. + znode *loaded;
  53436. +
  53437. + assert("edward-1548", inode != NULL);
  53438. +
  53439. + result = load_file_hint(file, &hint);
  53440. + BUG_ON(result != 0);
  53441. +
  53442. + flow.length = faultin_user_pages(buf, count);
  53443. + flow.user = 1;
  53444. + memcpy(&flow.data, &buf, sizeof(buf));
  53445. + flow.op = WRITE_OP;
  53446. + key_by_inode_and_offset_common(inode, *pos, &flow.key);
  53447. +
  53448. + result = find_file_item(&hint, &flow.key, ZNODE_WRITE_LOCK, inode);
  53449. + if (IS_CBKERR(result))
  53450. + return result;
  53451. +
  53452. + coord = &hint.ext_coord.coord;
  53453. + lh = hint.ext_coord.lh;
  53454. +
  53455. + result = zload(coord->node);
  53456. + BUG_ON(result != 0);
  53457. + loaded = coord->node;
  53458. +
  53459. + if (coord->between == AFTER_UNIT) {
  53460. + /* append with data or hole */
  53461. + result = append_tail(inode, &flow, coord, lh);
  53462. + } else if (coord->between == AT_UNIT) {
  53463. + /* overwrite */
  53464. + result = overwrite_tail(&flow, coord);
  53465. + } else {
  53466. + /* no items of this file yet. insert data or hole */
  53467. + result = insert_first_tail(inode, &flow, coord, lh);
  53468. + }
  53469. + zrelse(loaded);
  53470. + if (result < 0) {
  53471. + done_lh(lh);
  53472. + return result;
  53473. + }
  53474. +
  53475. + /* seal and unlock znode */
  53476. + hint.ext_coord.valid = 0;
  53477. + if (hint.ext_coord.valid)
  53478. + reiser4_set_hint(&hint, &flow.key, ZNODE_WRITE_LOCK);
  53479. + else
  53480. + reiser4_unset_hint(&hint);
  53481. +
  53482. + save_file_hint(file, &hint);
  53483. + return result;
  53484. +}
  53485. +
  53486. +/**
  53487. + * reiser4_write_tail - write method of tail item plugin
  53488. + * @file: file to write to
  53489. + * @buf: address of user-space buffer
  53490. + * @count: number of bytes to write
  53491. + * @pos: position in file to write to
  53492. + *
  53493. + * Returns number of written bytes or error code.
  53494. + */
  53495. +ssize_t reiser4_write_tail(struct file *file,
  53496. + struct inode * inode,
  53497. + const char __user *buf,
  53498. + size_t count, loff_t *pos)
  53499. +{
  53500. + if (write_extent_reserve_space(inode))
  53501. + return RETERR(-ENOSPC);
  53502. + return reiser4_write_tail_noreserve(file, inode, buf, count, pos);
  53503. +}
  53504. +
  53505. +#if REISER4_DEBUG
  53506. +
  53507. +static int
  53508. +coord_matches_key_tail(const coord_t * coord, const reiser4_key * key)
  53509. +{
  53510. + reiser4_key item_key;
  53511. +
  53512. + assert("vs-1356", coord_is_existing_unit(coord));
  53513. + assert("vs-1354", keylt(key, append_key_tail(coord, &item_key)));
  53514. + assert("vs-1355", keyge(key, item_key_by_coord(coord, &item_key)));
  53515. + return get_key_offset(key) ==
  53516. + get_key_offset(&item_key) + coord->unit_pos;
  53517. +
  53518. +}
  53519. +
  53520. +#endif
  53521. +
  53522. +/* plugin->u.item.s.file.read */
  53523. +int reiser4_read_tail(struct file *file UNUSED_ARG, flow_t *f, hint_t *hint)
  53524. +{
  53525. + unsigned count;
  53526. + int item_length;
  53527. + coord_t *coord;
  53528. + uf_coord_t *uf_coord;
  53529. +
  53530. + uf_coord = &hint->ext_coord;
  53531. + coord = &uf_coord->coord;
  53532. +
  53533. + assert("vs-571", f->user == 1);
  53534. + assert("vs-571", f->data);
  53535. + assert("vs-967", coord && coord->node);
  53536. + assert("vs-1117", znode_is_rlocked(coord->node));
  53537. + assert("vs-1118", znode_is_loaded(coord->node));
  53538. +
  53539. + assert("nikita-3037", reiser4_schedulable());
  53540. + assert("vs-1357", coord_matches_key_tail(coord, &f->key));
  53541. +
  53542. + /* calculate number of bytes to read off the item */
  53543. + item_length = item_length_by_coord(coord);
  53544. + count = item_length_by_coord(coord) - coord->unit_pos;
  53545. + if (count > f->length)
  53546. + count = f->length;
  53547. +
  53548. + /* user page has to be brought in so that major page fault does not
  53549. + * occur here when longtem lock is held */
  53550. + if (__copy_to_user((char __user *)f->data,
  53551. + ((char *)item_body_by_coord(coord) + coord->unit_pos),
  53552. + count))
  53553. + return RETERR(-EFAULT);
  53554. +
  53555. + /* probably mark_page_accessed() should only be called if
  53556. + * coord->unit_pos is zero. */
  53557. + mark_page_accessed(znode_page(coord->node));
  53558. + move_flow_forward(f, count);
  53559. +
  53560. + coord->unit_pos += count;
  53561. + if (item_length == coord->unit_pos) {
  53562. + coord->unit_pos--;
  53563. + coord->between = AFTER_UNIT;
  53564. + }
  53565. + reiser4_set_hint(hint, &f->key, ZNODE_READ_LOCK);
  53566. + return 0;
  53567. +}
  53568. +
  53569. +/*
  53570. + plugin->u.item.s.file.append_key
  53571. + key of first byte which is the next to last byte by addressed by this item
  53572. +*/
  53573. +reiser4_key *append_key_tail(const coord_t * coord, reiser4_key * key)
  53574. +{
  53575. + item_key_by_coord(coord, key);
  53576. + set_key_offset(key, get_key_offset(key) + item_length_by_coord(coord));
  53577. + return key;
  53578. +}
  53579. +
  53580. +/* plugin->u.item.s.file.init_coord_extension */
  53581. +void init_coord_extension_tail(uf_coord_t * uf_coord, loff_t lookuped)
  53582. +{
  53583. + uf_coord->valid = 1;
  53584. +}
  53585. +
  53586. +/*
  53587. + plugin->u.item.s.file.get_block
  53588. +*/
  53589. +int
  53590. +get_block_address_tail(const coord_t * coord, sector_t lblock, sector_t * block)
  53591. +{
  53592. + assert("nikita-3252", znode_get_level(coord->node) == LEAF_LEVEL);
  53593. +
  53594. + if (reiser4_blocknr_is_fake(znode_get_block(coord->node)))
  53595. + /* if node has'nt obtainet its block number yet, return 0.
  53596. + * Lets avoid upsetting users with some cosmic numbers beyond
  53597. + * the device capacity.*/
  53598. + *block = 0;
  53599. + else
  53600. + *block = *znode_get_block(coord->node);
  53601. + return 0;
  53602. +}
  53603. +
  53604. +/*
  53605. + * Local variables:
  53606. + * c-indentation-style: "K&R"
  53607. + * mode-name: "LC"
  53608. + * c-basic-offset: 8
  53609. + * tab-width: 8
  53610. + * fill-column: 79
  53611. + * scroll-step: 1
  53612. + * End:
  53613. + */
  53614. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/item/tail.h linux-4.14.2/fs/reiser4/plugin/item/tail.h
  53615. --- linux-4.14.2.orig/fs/reiser4/plugin/item/tail.h 1970-01-01 01:00:00.000000000 +0100
  53616. +++ linux-4.14.2/fs/reiser4/plugin/item/tail.h 2017-11-26 22:13:09.000000000 +0100
  53617. @@ -0,0 +1,59 @@
  53618. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  53619. +
  53620. +#if !defined( __REISER4_TAIL_H__ )
  53621. +#define __REISER4_TAIL_H__
  53622. +
  53623. +struct tail_coord_extension {
  53624. + int not_used;
  53625. +};
  53626. +
  53627. +struct cut_list;
  53628. +
  53629. +/* plugin->u.item.b.* */
  53630. +reiser4_key *max_key_inside_tail(const coord_t *, reiser4_key *);
  53631. +int can_contain_key_tail(const coord_t * coord, const reiser4_key * key,
  53632. + const reiser4_item_data *);
  53633. +int mergeable_tail(const coord_t * p1, const coord_t * p2);
  53634. +pos_in_node_t nr_units_tail(const coord_t *);
  53635. +lookup_result lookup_tail(const reiser4_key *, lookup_bias, coord_t *);
  53636. +int paste_tail(coord_t *, reiser4_item_data *, carry_plugin_info *);
  53637. +int can_shift_tail(unsigned free_space, coord_t * source,
  53638. + znode * target, shift_direction, unsigned *size,
  53639. + unsigned want);
  53640. +void copy_units_tail(coord_t * target, coord_t * source, unsigned from,
  53641. + unsigned count, shift_direction, unsigned free_space);
  53642. +int kill_hook_tail(const coord_t *, pos_in_node_t from, pos_in_node_t count,
  53643. + struct carry_kill_data *);
  53644. +int cut_units_tail(coord_t *, pos_in_node_t from, pos_in_node_t to,
  53645. + struct carry_cut_data *, reiser4_key * smallest_removed,
  53646. + reiser4_key * new_first);
  53647. +int kill_units_tail(coord_t *, pos_in_node_t from, pos_in_node_t to,
  53648. + struct carry_kill_data *, reiser4_key * smallest_removed,
  53649. + reiser4_key * new_first);
  53650. +reiser4_key *unit_key_tail(const coord_t *, reiser4_key *);
  53651. +
  53652. +/* plugin->u.item.s.* */
  53653. +ssize_t reiser4_write_tail_noreserve(struct file *file, struct inode * inode,
  53654. + const char __user *buf, size_t count,
  53655. + loff_t *pos);
  53656. +ssize_t reiser4_write_tail(struct file *file, struct inode * inode,
  53657. + const char __user *buf, size_t count, loff_t *pos);
  53658. +int reiser4_read_tail(struct file *, flow_t *, hint_t *);
  53659. +int readpage_tail(void *vp, struct page *page);
  53660. +reiser4_key *append_key_tail(const coord_t *, reiser4_key *);
  53661. +void init_coord_extension_tail(uf_coord_t *, loff_t offset);
  53662. +int get_block_address_tail(const coord_t *, sector_t, sector_t *);
  53663. +
  53664. +/* __REISER4_TAIL_H__ */
  53665. +#endif
  53666. +
  53667. +/* Make Linus happy.
  53668. + Local variables:
  53669. + c-indentation-style: "K&R"
  53670. + mode-name: "LC"
  53671. + c-basic-offset: 8
  53672. + tab-width: 8
  53673. + fill-column: 120
  53674. + scroll-step: 1
  53675. + End:
  53676. +*/
  53677. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/Makefile linux-4.14.2/fs/reiser4/plugin/Makefile
  53678. --- linux-4.14.2.orig/fs/reiser4/plugin/Makefile 1970-01-01 01:00:00.000000000 +0100
  53679. +++ linux-4.14.2/fs/reiser4/plugin/Makefile 2017-11-26 22:13:09.000000000 +0100
  53680. @@ -0,0 +1,26 @@
  53681. +obj-$(CONFIG_REISER4_FS) += plugins.o
  53682. +
  53683. +plugins-objs := \
  53684. + plugin.o \
  53685. + plugin_set.o \
  53686. + object.o \
  53687. + inode_ops.o \
  53688. + inode_ops_rename.o \
  53689. + file_ops.o \
  53690. + file_ops_readdir.o \
  53691. + file_plugin_common.o \
  53692. + dir_plugin_common.o \
  53693. + digest.o \
  53694. + hash.o \
  53695. + fibration.o \
  53696. + tail_policy.o \
  53697. + regular.o
  53698. +
  53699. +obj-$(CONFIG_REISER4_FS) += item/
  53700. +obj-$(CONFIG_REISER4_FS) += file/
  53701. +obj-$(CONFIG_REISER4_FS) += dir/
  53702. +obj-$(CONFIG_REISER4_FS) += node/
  53703. +obj-$(CONFIG_REISER4_FS) += compress/
  53704. +obj-$(CONFIG_REISER4_FS) += space/
  53705. +obj-$(CONFIG_REISER4_FS) += disk_format/
  53706. +obj-$(CONFIG_REISER4_FS) += security/
  53707. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/node/Makefile linux-4.14.2/fs/reiser4/plugin/node/Makefile
  53708. --- linux-4.14.2.orig/fs/reiser4/plugin/node/Makefile 1970-01-01 01:00:00.000000000 +0100
  53709. +++ linux-4.14.2/fs/reiser4/plugin/node/Makefile 2017-11-26 22:13:09.000000000 +0100
  53710. @@ -0,0 +1,6 @@
  53711. +obj-$(CONFIG_REISER4_FS) += node_plugins.o
  53712. +
  53713. +node_plugins-objs := \
  53714. + node.o \
  53715. + node40.o \
  53716. + node41.o
  53717. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/node/node40.c linux-4.14.2/fs/reiser4/plugin/node/node40.c
  53718. --- linux-4.14.2.orig/fs/reiser4/plugin/node/node40.c 1970-01-01 01:00:00.000000000 +0100
  53719. +++ linux-4.14.2/fs/reiser4/plugin/node/node40.c 2017-11-26 22:13:09.000000000 +0100
  53720. @@ -0,0 +1,3073 @@
  53721. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  53722. +
  53723. +#include "../../debug.h"
  53724. +#include "../../key.h"
  53725. +#include "../../coord.h"
  53726. +#include "../plugin_header.h"
  53727. +#include "../item/item.h"
  53728. +#include "node.h"
  53729. +#include "node40.h"
  53730. +#include "../plugin.h"
  53731. +#include "../../jnode.h"
  53732. +#include "../../znode.h"
  53733. +#include "../../pool.h"
  53734. +#include "../../carry.h"
  53735. +#include "../../tap.h"
  53736. +#include "../../tree.h"
  53737. +#include "../../super.h"
  53738. +#include "../../reiser4.h"
  53739. +
  53740. +#include <asm/uaccess.h>
  53741. +#include <linux/types.h>
  53742. +#include <linux/prefetch.h>
  53743. +
  53744. +/* leaf 40 format:
  53745. +
  53746. + [node header | item 0, item 1, .., item N-1 | free space | item_head N-1, .. item_head 1, item head 0 ]
  53747. + plugin_id (16) key
  53748. + free_space (16) pluginid (16)
  53749. + free_space_start (16) offset (16)
  53750. + level (8)
  53751. + num_items (16)
  53752. + magic (32)
  53753. + flush_time (32)
  53754. +*/
  53755. +/* NIKITA-FIXME-HANS: I told you guys not less than 10 times to not call it r4fs. Change to "ReIs". */
  53756. +/* magic number that is stored in ->magic field of node header */
  53757. +static const __u32 REISER4_NODE40_MAGIC = 0x52344653; /* (*(__u32 *)"R4FS"); */
  53758. +
  53759. +static int prepare_for_update(znode * left, znode * right,
  53760. + carry_plugin_info * info);
  53761. +
  53762. +/* header of node of reiser40 format is at the beginning of node */
  53763. +static inline node40_header *node40_node_header(const znode * node /* node to
  53764. + * query */ )
  53765. +{
  53766. + assert("nikita-567", node != NULL);
  53767. + assert("nikita-568", znode_page(node) != NULL);
  53768. + assert("nikita-569", zdata(node) != NULL);
  53769. + return (node40_header *) zdata(node);
  53770. +}
  53771. +
  53772. +/* functions to get/set fields of node40_header */
  53773. +#define nh40_get_magic(nh) le32_to_cpu(get_unaligned(&(nh)->magic))
  53774. +#define nh40_get_free_space(nh) le16_to_cpu(get_unaligned(&(nh)->free_space))
  53775. +#define nh40_get_free_space_start(nh) le16_to_cpu(get_unaligned(&(nh)->free_space_start))
  53776. +#define nh40_get_level(nh) get_unaligned(&(nh)->level)
  53777. +#define nh40_get_num_items(nh) le16_to_cpu(get_unaligned(&(nh)->nr_items))
  53778. +#define nh40_get_flush_id(nh) le64_to_cpu(get_unaligned(&(nh)->flush_id))
  53779. +
  53780. +#define nh40_set_magic(nh, value) put_unaligned(cpu_to_le32(value), &(nh)->magic)
  53781. +#define nh40_set_free_space(nh, value) put_unaligned(cpu_to_le16(value), &(nh)->free_space)
  53782. +#define nh40_set_free_space_start(nh, value) put_unaligned(cpu_to_le16(value), &(nh)->free_space_start)
  53783. +#define nh40_set_level(nh, value) put_unaligned(value, &(nh)->level)
  53784. +#define nh40_set_num_items(nh, value) put_unaligned(cpu_to_le16(value), &(nh)->nr_items)
  53785. +#define nh40_set_mkfs_id(nh, value) put_unaligned(cpu_to_le32(value), &(nh)->mkfs_id)
  53786. +
  53787. +/* plugin field of node header should be read/set by
  53788. + plugin_by_disk_id/save_disk_plugin */
  53789. +
  53790. +/* array of item headers is at the end of node */
  53791. +static inline item_header40 *node40_ih_at(const znode * node, unsigned pos)
  53792. +{
  53793. + return (item_header40 *) (zdata(node) + znode_size(node)) - pos - 1;
  53794. +}
  53795. +
  53796. +/* ( page_address( node -> pg ) + PAGE_CACHE_SIZE ) - pos - 1
  53797. + */
  53798. +static inline item_header40 *node40_ih_at_coord(const coord_t * coord)
  53799. +{
  53800. + return (item_header40 *) (zdata(coord->node) +
  53801. + znode_size(coord->node)) - (coord->item_pos) -
  53802. + 1;
  53803. +}
  53804. +
  53805. +/* functions to get/set fields of item_header40 */
  53806. +#define ih40_get_offset(ih) le16_to_cpu(get_unaligned(&(ih)->offset))
  53807. +
  53808. +#define ih40_set_offset(ih, value) put_unaligned(cpu_to_le16(value), &(ih)->offset)
  53809. +
  53810. +/* plugin field of item header should be read/set by
  53811. + plugin_by_disk_id/save_disk_plugin */
  53812. +
  53813. +/* plugin methods */
  53814. +
  53815. +/* plugin->u.node.item_overhead
  53816. + look for description of this method in plugin/node/node.h */
  53817. +size_t
  53818. +item_overhead_node40(const znode * node UNUSED_ARG, flow_t * f UNUSED_ARG)
  53819. +{
  53820. + return sizeof(item_header40);
  53821. +}
  53822. +
  53823. +/* plugin->u.node.free_space
  53824. + look for description of this method in plugin/node/node.h */
  53825. +size_t free_space_node40(znode * node)
  53826. +{
  53827. + assert("nikita-577", node != NULL);
  53828. + assert("nikita-578", znode_is_loaded(node));
  53829. + assert("nikita-579", zdata(node) != NULL);
  53830. +
  53831. + return nh40_get_free_space(node40_node_header(node));
  53832. +}
  53833. +
  53834. +/* private inline version of node40_num_of_items() for use in this file. This
  53835. + is necessary, because address of node40_num_of_items() is taken and it is
  53836. + never inlined as a result. */
  53837. +static inline short node40_num_of_items_internal(const znode * node)
  53838. +{
  53839. + return nh40_get_num_items(node40_node_header(node));
  53840. +}
  53841. +
  53842. +#if REISER4_DEBUG
  53843. +static inline void check_num_items(const znode * node)
  53844. +{
  53845. + assert("nikita-2749",
  53846. + node40_num_of_items_internal(node) == node->nr_items);
  53847. + assert("nikita-2746", znode_is_write_locked(node));
  53848. +}
  53849. +#else
  53850. +#define check_num_items(node) noop
  53851. +#endif
  53852. +
  53853. +/* plugin->u.node.num_of_items
  53854. + look for description of this method in plugin/node/node.h */
  53855. +int num_of_items_node40(const znode * node)
  53856. +{
  53857. + return node40_num_of_items_internal(node);
  53858. +}
  53859. +
  53860. +static void
  53861. +node40_set_num_items(znode * node, node40_header * nh, unsigned value)
  53862. +{
  53863. + assert("nikita-2751", node != NULL);
  53864. + assert("nikita-2750", nh == node40_node_header(node));
  53865. +
  53866. + check_num_items(node);
  53867. + nh40_set_num_items(nh, value);
  53868. + node->nr_items = value;
  53869. + check_num_items(node);
  53870. +}
  53871. +
  53872. +/* plugin->u.node.item_by_coord
  53873. + look for description of this method in plugin/node/node.h */
  53874. +char *item_by_coord_node40(const coord_t * coord)
  53875. +{
  53876. + item_header40 *ih;
  53877. + char *p;
  53878. +
  53879. + /* @coord is set to existing item */
  53880. + assert("nikita-596", coord != NULL);
  53881. + assert("vs-255", coord_is_existing_item(coord));
  53882. +
  53883. + ih = node40_ih_at_coord(coord);
  53884. + p = zdata(coord->node) + ih40_get_offset(ih);
  53885. + return p;
  53886. +}
  53887. +
  53888. +/* plugin->u.node.length_by_coord
  53889. + look for description of this method in plugin/node/node.h */
  53890. +int length_by_coord_node40(const coord_t * coord)
  53891. +{
  53892. + item_header40 *ih;
  53893. + int result;
  53894. +
  53895. + /* @coord is set to existing item */
  53896. + assert("vs-256", coord != NULL);
  53897. + assert("vs-257", coord_is_existing_item(coord));
  53898. +
  53899. + ih = node40_ih_at_coord(coord);
  53900. + if ((int)coord->item_pos ==
  53901. + node40_num_of_items_internal(coord->node) - 1)
  53902. + result =
  53903. + nh40_get_free_space_start(node40_node_header(coord->node)) -
  53904. + ih40_get_offset(ih);
  53905. + else
  53906. + result = ih40_get_offset(ih - 1) - ih40_get_offset(ih);
  53907. +
  53908. + return result;
  53909. +}
  53910. +
  53911. +static pos_in_node_t
  53912. +node40_item_length(const znode * node, pos_in_node_t item_pos)
  53913. +{
  53914. + item_header40 *ih;
  53915. + pos_in_node_t result;
  53916. +
  53917. + /* @coord is set to existing item */
  53918. + assert("vs-256", node != NULL);
  53919. + assert("vs-257", node40_num_of_items_internal(node) > item_pos);
  53920. +
  53921. + ih = node40_ih_at(node, item_pos);
  53922. + if (item_pos == node40_num_of_items_internal(node) - 1)
  53923. + result =
  53924. + nh40_get_free_space_start(node40_node_header(node)) -
  53925. + ih40_get_offset(ih);
  53926. + else
  53927. + result = ih40_get_offset(ih - 1) - ih40_get_offset(ih);
  53928. +
  53929. + return result;
  53930. +}
  53931. +
  53932. +/* plugin->u.node.plugin_by_coord
  53933. + look for description of this method in plugin/node/node.h */
  53934. +item_plugin *plugin_by_coord_node40(const coord_t * coord)
  53935. +{
  53936. + item_header40 *ih;
  53937. + item_plugin *result;
  53938. +
  53939. + /* @coord is set to existing item */
  53940. + assert("vs-258", coord != NULL);
  53941. + assert("vs-259", coord_is_existing_item(coord));
  53942. +
  53943. + ih = node40_ih_at_coord(coord);
  53944. + /* pass NULL in stead of current tree. This is time critical call. */
  53945. + result = item_plugin_by_disk_id(NULL, &ih->plugin_id);
  53946. + return result;
  53947. +}
  53948. +
  53949. +/* plugin->u.node.key_at
  53950. + look for description of this method in plugin/node/node.h */
  53951. +reiser4_key *key_at_node40(const coord_t * coord, reiser4_key * key)
  53952. +{
  53953. + item_header40 *ih;
  53954. +
  53955. + assert("nikita-1765", coord_is_existing_item(coord));
  53956. +
  53957. + /* @coord is set to existing item */
  53958. + ih = node40_ih_at_coord(coord);
  53959. + memcpy(key, &ih->key, sizeof(reiser4_key));
  53960. + return key;
  53961. +}
  53962. +
  53963. +/* VS-FIXME-HANS: please review whether the below are properly disabled when debugging is disabled */
  53964. +
  53965. +#define NODE_INCSTAT(n, counter) \
  53966. + reiser4_stat_inc_at_level(znode_get_level(n), node.lookup.counter)
  53967. +
  53968. +#define NODE_ADDSTAT(n, counter, val) \
  53969. + reiser4_stat_add_at_level(znode_get_level(n), node.lookup.counter, val)
  53970. +
  53971. +/* plugin->u.node.lookup
  53972. + look for description of this method in plugin/node/node.h */
  53973. +node_search_result lookup_node40(znode * node /* node to query */ ,
  53974. + const reiser4_key * key /* key to look for */ ,
  53975. + lookup_bias bias /* search bias */ ,
  53976. + coord_t * coord /* resulting coord */ )
  53977. +{
  53978. + int left;
  53979. + int right;
  53980. + int found;
  53981. + int items;
  53982. +
  53983. + item_header40 *lefth;
  53984. + item_header40 *righth;
  53985. +
  53986. + item_plugin *iplug;
  53987. + item_header40 *bstop;
  53988. + item_header40 *ih;
  53989. + cmp_t order;
  53990. +
  53991. + assert("nikita-583", node != NULL);
  53992. + assert("nikita-584", key != NULL);
  53993. + assert("nikita-585", coord != NULL);
  53994. + assert("nikita-2693", znode_is_any_locked(node));
  53995. + cassert(REISER4_SEQ_SEARCH_BREAK > 2);
  53996. +
  53997. + items = node_num_items(node);
  53998. +
  53999. + if (unlikely(items == 0)) {
  54000. + coord_init_first_unit(coord, node);
  54001. + return NS_NOT_FOUND;
  54002. + }
  54003. +
  54004. + /* binary search for item that can contain given key */
  54005. + left = 0;
  54006. + right = items - 1;
  54007. + coord->node = node;
  54008. + coord_clear_iplug(coord);
  54009. + found = 0;
  54010. +
  54011. + lefth = node40_ih_at(node, left);
  54012. + righth = node40_ih_at(node, right);
  54013. +
  54014. + /* It is known that for small arrays sequential search is on average
  54015. + more efficient than binary. This is because sequential search is
  54016. + coded as tight loop that can be better optimized by compilers and
  54017. + for small array size gain from this optimization makes sequential
  54018. + search the winner. Another, maybe more important, reason for this,
  54019. + is that sequential array is more CPU cache friendly, whereas binary
  54020. + search effectively destroys CPU caching.
  54021. +
  54022. + Critical here is the notion of "smallness". Reasonable value of
  54023. + REISER4_SEQ_SEARCH_BREAK can be found by playing with code in
  54024. + fs/reiser4/ulevel/ulevel.c:test_search().
  54025. +
  54026. + Don't try to further optimize sequential search by scanning from
  54027. + right to left in attempt to use more efficient loop termination
  54028. + condition (comparison with 0). This doesn't work.
  54029. +
  54030. + */
  54031. +
  54032. + while (right - left >= REISER4_SEQ_SEARCH_BREAK) {
  54033. + int median;
  54034. + item_header40 *medianh;
  54035. +
  54036. + median = (left + right) / 2;
  54037. + medianh = node40_ih_at(node, median);
  54038. +
  54039. + assert("nikita-1084", median >= 0);
  54040. + assert("nikita-1085", median < items);
  54041. + switch (keycmp(key, &medianh->key)) {
  54042. + case LESS_THAN:
  54043. + right = median;
  54044. + righth = medianh;
  54045. + break;
  54046. + default:
  54047. + wrong_return_value("nikita-586", "keycmp");
  54048. + case GREATER_THAN:
  54049. + left = median;
  54050. + lefth = medianh;
  54051. + break;
  54052. + case EQUAL_TO:
  54053. + do {
  54054. + --median;
  54055. + /* headers are ordered from right to left */
  54056. + ++medianh;
  54057. + } while (median >= 0 && keyeq(key, &medianh->key));
  54058. + right = left = median + 1;
  54059. + ih = lefth = righth = medianh - 1;
  54060. + found = 1;
  54061. + break;
  54062. + }
  54063. + }
  54064. + /* sequential scan. Item headers, and, therefore, keys are stored at
  54065. + the rightmost part of a node from right to left. We are trying to
  54066. + access memory from left to right, and hence, scan in _descending_
  54067. + order of item numbers.
  54068. + */
  54069. + if (!found) {
  54070. + for (left = right, ih = righth; left >= 0; ++ih, --left) {
  54071. + cmp_t comparison;
  54072. +
  54073. + prefetchkey(&(ih + 1)->key);
  54074. + comparison = keycmp(&ih->key, key);
  54075. + if (comparison == GREATER_THAN)
  54076. + continue;
  54077. + if (comparison == EQUAL_TO) {
  54078. + found = 1;
  54079. + do {
  54080. + --left;
  54081. + ++ih;
  54082. + } while (left >= 0 && keyeq(&ih->key, key));
  54083. + ++left;
  54084. + --ih;
  54085. + } else {
  54086. + assert("nikita-1256", comparison == LESS_THAN);
  54087. + }
  54088. + break;
  54089. + }
  54090. + if (unlikely(left < 0))
  54091. + left = 0;
  54092. + }
  54093. +
  54094. + assert("nikita-3212", right >= left);
  54095. + assert("nikita-3214",
  54096. + equi(found, keyeq(&node40_ih_at(node, left)->key, key)));
  54097. +
  54098. + coord_set_item_pos(coord, left);
  54099. + coord->unit_pos = 0;
  54100. + coord->between = AT_UNIT;
  54101. +
  54102. + /* key < leftmost key in a mode or node is corrupted and keys
  54103. + are not sorted */
  54104. + bstop = node40_ih_at(node, (unsigned)left);
  54105. + order = keycmp(&bstop->key, key);
  54106. + if (unlikely(order == GREATER_THAN)) {
  54107. + if (unlikely(left != 0)) {
  54108. + /* screw up */
  54109. + warning("nikita-587", "Key less than %i key in a node",
  54110. + left);
  54111. + reiser4_print_key("key", key);
  54112. + reiser4_print_key("min", &bstop->key);
  54113. + print_coord_content("coord", coord);
  54114. + return RETERR(-EIO);
  54115. + } else {
  54116. + coord->between = BEFORE_UNIT;
  54117. + return NS_NOT_FOUND;
  54118. + }
  54119. + }
  54120. + /* left <= key, ok */
  54121. + iplug = item_plugin_by_disk_id(znode_get_tree(node), &bstop->plugin_id);
  54122. +
  54123. + if (unlikely(iplug == NULL)) {
  54124. + warning("nikita-588", "Unknown plugin %i",
  54125. + le16_to_cpu(get_unaligned(&bstop->plugin_id)));
  54126. + reiser4_print_key("key", key);
  54127. + print_coord_content("coord", coord);
  54128. + return RETERR(-EIO);
  54129. + }
  54130. +
  54131. + coord_set_iplug(coord, iplug);
  54132. +
  54133. + /* if exact key from item header was found by binary search, no
  54134. + further checks are necessary. */
  54135. + if (found) {
  54136. + assert("nikita-1259", order == EQUAL_TO);
  54137. + return NS_FOUND;
  54138. + }
  54139. + if (iplug->b.max_key_inside != NULL) {
  54140. + reiser4_key max_item_key;
  54141. +
  54142. + /* key > max_item_key --- outside of an item */
  54143. + if (keygt(key, iplug->b.max_key_inside(coord, &max_item_key))) {
  54144. + coord->unit_pos = 0;
  54145. + coord->between = AFTER_ITEM;
  54146. + /* FIXME-VS: key we are looking for does not fit into
  54147. + found item. Return NS_NOT_FOUND then. Without that
  54148. + the following case does not work: there is extent of
  54149. + file 10000, 10001. File 10000, 10002 has been just
  54150. + created. When writing to position 0 in that file -
  54151. + traverse_tree will stop here on twig level. When we
  54152. + want it to go down to leaf level
  54153. + */
  54154. + return NS_NOT_FOUND;
  54155. + }
  54156. + }
  54157. +
  54158. + if (iplug->b.lookup != NULL) {
  54159. + return iplug->b.lookup(key, bias, coord);
  54160. + } else {
  54161. + assert("nikita-1260", order == LESS_THAN);
  54162. + coord->between = AFTER_UNIT;
  54163. + return (bias == FIND_EXACT) ? NS_NOT_FOUND : NS_FOUND;
  54164. + }
  54165. +}
  54166. +
  54167. +#undef NODE_ADDSTAT
  54168. +#undef NODE_INCSTAT
  54169. +
  54170. +/* plugin->u.node.estimate
  54171. + look for description of this method in plugin/node/node.h */
  54172. +size_t estimate_node40(znode * node)
  54173. +{
  54174. + size_t result;
  54175. +
  54176. + assert("nikita-597", node != NULL);
  54177. +
  54178. + result = free_space_node40(node) - sizeof(item_header40);
  54179. +
  54180. + return (result > 0) ? result : 0;
  54181. +}
  54182. +
  54183. +/* plugin->u.node.check
  54184. + look for description of this method in plugin/node/node.h */
  54185. +int check_node40(const znode * node /* node to check */ ,
  54186. + __u32 flags /* check flags */ ,
  54187. + const char **error /* where to store error message */ )
  54188. +{
  54189. + int nr_items;
  54190. + int i;
  54191. + reiser4_key prev;
  54192. + unsigned old_offset;
  54193. + tree_level level;
  54194. + coord_t coord;
  54195. + int result;
  54196. +
  54197. + assert("nikita-580", node != NULL);
  54198. + assert("nikita-581", error != NULL);
  54199. + assert("nikita-2948", znode_is_loaded(node));
  54200. +
  54201. + if (ZF_ISSET(node, JNODE_HEARD_BANSHEE))
  54202. + return 0;
  54203. +
  54204. + assert("nikita-582", zdata(node) != NULL);
  54205. +
  54206. + nr_items = node40_num_of_items_internal(node);
  54207. + if (nr_items < 0) {
  54208. + *error = "Negative number of items";
  54209. + return -1;
  54210. + }
  54211. +
  54212. + if (flags & REISER4_NODE_DKEYS)
  54213. + prev = *znode_get_ld_key((znode *) node);
  54214. + else
  54215. + prev = *reiser4_min_key();
  54216. +
  54217. + old_offset = 0;
  54218. + coord_init_zero(&coord);
  54219. + coord.node = (znode *) node;
  54220. + coord.unit_pos = 0;
  54221. + coord.between = AT_UNIT;
  54222. + level = znode_get_level(node);
  54223. + for (i = 0; i < nr_items; i++) {
  54224. + item_header40 *ih;
  54225. + reiser4_key unit_key;
  54226. + unsigned j;
  54227. +
  54228. + ih = node40_ih_at(node, (unsigned)i);
  54229. + coord_set_item_pos(&coord, i);
  54230. + if ((ih40_get_offset(ih) >=
  54231. + znode_size(node) - nr_items * sizeof(item_header40)) ||
  54232. + (ih40_get_offset(ih) < sizeof(node40_header))) {
  54233. + *error = "Offset is out of bounds";
  54234. + return -1;
  54235. + }
  54236. + if (ih40_get_offset(ih) <= old_offset) {
  54237. + *error = "Offsets are in wrong order";
  54238. + return -1;
  54239. + }
  54240. + if ((i == 0) && (ih40_get_offset(ih) != sizeof(node40_header))) {
  54241. + *error = "Wrong offset of first item";
  54242. + return -1;
  54243. + }
  54244. + old_offset = ih40_get_offset(ih);
  54245. +
  54246. + if (keygt(&prev, &ih->key)) {
  54247. + *error = "Keys are in wrong order";
  54248. + return -1;
  54249. + }
  54250. + if (!keyeq(&ih->key, unit_key_by_coord(&coord, &unit_key))) {
  54251. + *error = "Wrong key of first unit";
  54252. + return -1;
  54253. + }
  54254. + prev = ih->key;
  54255. + for (j = 0; j < coord_num_units(&coord); ++j) {
  54256. + coord.unit_pos = j;
  54257. + unit_key_by_coord(&coord, &unit_key);
  54258. + if (keygt(&prev, &unit_key)) {
  54259. + *error = "Unit keys are in wrong order";
  54260. + return -1;
  54261. + }
  54262. + prev = unit_key;
  54263. + }
  54264. + coord.unit_pos = 0;
  54265. + if (level != TWIG_LEVEL && item_is_extent(&coord)) {
  54266. + *error = "extent on the wrong level";
  54267. + return -1;
  54268. + }
  54269. + if (level == LEAF_LEVEL && item_is_internal(&coord)) {
  54270. + *error = "internal item on the wrong level";
  54271. + return -1;
  54272. + }
  54273. + if (level != LEAF_LEVEL &&
  54274. + !item_is_internal(&coord) && !item_is_extent(&coord)) {
  54275. + *error = "wrong item on the internal level";
  54276. + return -1;
  54277. + }
  54278. + if (level > TWIG_LEVEL && !item_is_internal(&coord)) {
  54279. + *error = "non-internal item on the internal level";
  54280. + return -1;
  54281. + }
  54282. +#if REISER4_DEBUG
  54283. + if (item_plugin_by_coord(&coord)->b.check
  54284. + && item_plugin_by_coord(&coord)->b.check(&coord, error))
  54285. + return -1;
  54286. +#endif
  54287. + if (i) {
  54288. + coord_t prev_coord;
  54289. + /* two neighboring items can not be mergeable */
  54290. + coord_dup(&prev_coord, &coord);
  54291. + coord_prev_item(&prev_coord);
  54292. + if (are_items_mergeable(&prev_coord, &coord)) {
  54293. + *error = "mergeable items in one node";
  54294. + return -1;
  54295. + }
  54296. +
  54297. + }
  54298. + }
  54299. +
  54300. + if ((flags & REISER4_NODE_DKEYS) && !node_is_empty(node)) {
  54301. + coord_t coord;
  54302. + item_plugin *iplug;
  54303. +
  54304. + coord_init_last_unit(&coord, node);
  54305. + iplug = item_plugin_by_coord(&coord);
  54306. + if ((item_is_extent(&coord) || item_is_tail(&coord)) &&
  54307. + iplug->s.file.append_key != NULL) {
  54308. + reiser4_key mkey;
  54309. +
  54310. + iplug->s.file.append_key(&coord, &mkey);
  54311. + set_key_offset(&mkey, get_key_offset(&mkey) - 1);
  54312. + read_lock_dk(current_tree);
  54313. + result = keygt(&mkey, znode_get_rd_key((znode *) node));
  54314. + read_unlock_dk(current_tree);
  54315. + if (result) {
  54316. + *error = "key of rightmost item is too large";
  54317. + return -1;
  54318. + }
  54319. + }
  54320. + }
  54321. + if (flags & REISER4_NODE_DKEYS) {
  54322. + read_lock_tree(current_tree);
  54323. + read_lock_dk(current_tree);
  54324. +
  54325. + flags |= REISER4_NODE_TREE_STABLE;
  54326. +
  54327. + if (keygt(&prev, znode_get_rd_key((znode *) node))) {
  54328. + if (flags & REISER4_NODE_TREE_STABLE) {
  54329. + *error = "Last key is greater than rdkey";
  54330. + read_unlock_dk(current_tree);
  54331. + read_unlock_tree(current_tree);
  54332. + return -1;
  54333. + }
  54334. + }
  54335. + if (keygt
  54336. + (znode_get_ld_key((znode *) node),
  54337. + znode_get_rd_key((znode *) node))) {
  54338. + *error = "ldkey is greater than rdkey";
  54339. + read_unlock_dk(current_tree);
  54340. + read_unlock_tree(current_tree);
  54341. + return -1;
  54342. + }
  54343. + if (ZF_ISSET(node, JNODE_LEFT_CONNECTED) &&
  54344. + (node->left != NULL) &&
  54345. + !ZF_ISSET(node->left, JNODE_HEARD_BANSHEE) &&
  54346. + ergo(flags & REISER4_NODE_TREE_STABLE,
  54347. + !keyeq(znode_get_rd_key(node->left),
  54348. + znode_get_ld_key((znode *) node)))
  54349. + && ergo(!(flags & REISER4_NODE_TREE_STABLE),
  54350. + keygt(znode_get_rd_key(node->left),
  54351. + znode_get_ld_key((znode *) node)))) {
  54352. + *error = "left rdkey or ldkey is wrong";
  54353. + read_unlock_dk(current_tree);
  54354. + read_unlock_tree(current_tree);
  54355. + return -1;
  54356. + }
  54357. + if (ZF_ISSET(node, JNODE_RIGHT_CONNECTED) &&
  54358. + (node->right != NULL) &&
  54359. + !ZF_ISSET(node->right, JNODE_HEARD_BANSHEE) &&
  54360. + ergo(flags & REISER4_NODE_TREE_STABLE,
  54361. + !keyeq(znode_get_rd_key((znode *) node),
  54362. + znode_get_ld_key(node->right)))
  54363. + && ergo(!(flags & REISER4_NODE_TREE_STABLE),
  54364. + keygt(znode_get_rd_key((znode *) node),
  54365. + znode_get_ld_key(node->right)))) {
  54366. + *error = "rdkey or right ldkey is wrong";
  54367. + read_unlock_dk(current_tree);
  54368. + read_unlock_tree(current_tree);
  54369. + return -1;
  54370. + }
  54371. +
  54372. + read_unlock_dk(current_tree);
  54373. + read_unlock_tree(current_tree);
  54374. + }
  54375. +
  54376. + return 0;
  54377. +}
  54378. +
  54379. +int parse_node40_common(znode *node, const __u32 magic)
  54380. +{
  54381. + node40_header *header;
  54382. + int result;
  54383. + d8 level;
  54384. +
  54385. + header = node40_node_header((znode *) node);
  54386. + result = -EIO;
  54387. + level = nh40_get_level(header);
  54388. + if (unlikely(((__u8) znode_get_level(node)) != level))
  54389. + warning("nikita-494", "Wrong level found in node: %i != %i",
  54390. + znode_get_level(node), level);
  54391. + else if (unlikely(nh40_get_magic(header) != magic))
  54392. + warning("nikita-495",
  54393. + "Wrong magic in tree node: want %x, got %x",
  54394. + magic, nh40_get_magic(header));
  54395. + else {
  54396. + node->nr_items = node40_num_of_items_internal(node);
  54397. + result = 0;
  54398. + }
  54399. + return RETERR(result);
  54400. +}
  54401. +
  54402. +/*
  54403. + * plugin->u.node.parse
  54404. + * look for description of this method in plugin/node/node.h
  54405. + */
  54406. +int parse_node40(znode *node /* node to parse */)
  54407. +{
  54408. + return parse_node40_common(node, REISER4_NODE40_MAGIC);
  54409. +}
  54410. +
  54411. +/*
  54412. + * common part of ->init_node() for all nodes,
  54413. + * which contain node40_header at the beginning
  54414. + */
  54415. +int init_node40_common(znode *node, node_plugin *nplug,
  54416. + size_t node_header_size, const __u32 magic)
  54417. +{
  54418. + node40_header *header40;
  54419. +
  54420. + assert("nikita-570", node != NULL);
  54421. + assert("nikita-572", zdata(node) != NULL);
  54422. +
  54423. + header40 = node40_node_header(node);
  54424. + memset(header40, 0, sizeof(node40_header));
  54425. +
  54426. + nh40_set_free_space(header40, znode_size(node) - node_header_size);
  54427. + nh40_set_free_space_start(header40, node_header_size);
  54428. + /*
  54429. + * sane hypothesis: 0 in CPU format is 0 in disk format
  54430. + */
  54431. + save_plugin_id(node_plugin_to_plugin(nplug),
  54432. + &header40->common_header.plugin_id);
  54433. + nh40_set_level(header40, znode_get_level(node));
  54434. + nh40_set_magic(header40, magic);
  54435. + nh40_set_mkfs_id(header40, reiser4_mkfs_id(reiser4_get_current_sb()));
  54436. + /*
  54437. + * nr_items: 0
  54438. + * flags: 0
  54439. + */
  54440. + return 0;
  54441. +}
  54442. +
  54443. +/*
  54444. + * plugin->u.node.init
  54445. + * look for description of this method in plugin/node/node.h
  54446. + */
  54447. +int init_node40(znode *node /* node to initialise */)
  54448. +{
  54449. + return init_node40_common(node, node_plugin_by_id(NODE40_ID),
  54450. + sizeof(node40_header), REISER4_NODE40_MAGIC);
  54451. +}
  54452. +
  54453. +#ifdef GUESS_EXISTS
  54454. +int guess_node40_common(const znode *node, reiser4_node_id id,
  54455. + const __u32 magic)
  54456. +{
  54457. + node40_header *header;
  54458. +
  54459. + assert("nikita-1058", node != NULL);
  54460. + header = node40_node_header(node);
  54461. + return (nh40_get_magic(header) == magic) &&
  54462. + (id == plugin_by_disk_id(znode_get_tree(node),
  54463. + REISER4_NODE_PLUGIN_TYPE,
  54464. + &header->common_header.plugin_id)->h.id);
  54465. +}
  54466. +
  54467. +int guess_node40(const znode *node /* node to guess plugin of */)
  54468. +{
  54469. + return guess_node40_common(node, NODE40_ID, REISER4_NODE40_MAGIC);
  54470. +}
  54471. +#endif
  54472. +
  54473. +/* plugin->u.node.chage_item_size
  54474. + look for description of this method in plugin/node/node.h */
  54475. +void change_item_size_node40(coord_t * coord, int by)
  54476. +{
  54477. + node40_header *nh;
  54478. + item_header40 *ih;
  54479. + char *item_data;
  54480. + int item_length;
  54481. + unsigned i;
  54482. +
  54483. + /* make sure that @item is coord of existing item */
  54484. + assert("vs-210", coord_is_existing_item(coord));
  54485. +
  54486. + nh = node40_node_header(coord->node);
  54487. +
  54488. + item_data = item_by_coord_node40(coord);
  54489. + item_length = length_by_coord_node40(coord);
  54490. +
  54491. + /* move item bodies */
  54492. + ih = node40_ih_at_coord(coord);
  54493. + memmove(item_data + item_length + by, item_data + item_length,
  54494. + nh40_get_free_space_start(node40_node_header(coord->node)) -
  54495. + (ih40_get_offset(ih) + item_length));
  54496. +
  54497. + /* update offsets of moved items */
  54498. + for (i = coord->item_pos + 1; i < nh40_get_num_items(nh); i++) {
  54499. + ih = node40_ih_at(coord->node, i);
  54500. + ih40_set_offset(ih, ih40_get_offset(ih) + by);
  54501. + }
  54502. +
  54503. + /* update node header */
  54504. + nh40_set_free_space(nh, nh40_get_free_space(nh) - by);
  54505. + nh40_set_free_space_start(nh, nh40_get_free_space_start(nh) + by);
  54506. +}
  54507. +
  54508. +static int should_notify_parent(const znode * node)
  54509. +{
  54510. + /* FIXME_JMACD This looks equivalent to znode_is_root(), right? -josh */
  54511. + return !disk_addr_eq(znode_get_block(node),
  54512. + &znode_get_tree(node)->root_block);
  54513. +}
  54514. +
  54515. +/* plugin->u.node.create_item
  54516. + look for description of this method in plugin/node/node.h */
  54517. +int
  54518. +create_item_node40(coord_t *target, const reiser4_key *key,
  54519. + reiser4_item_data *data, carry_plugin_info *info)
  54520. +{
  54521. + node40_header *nh;
  54522. + item_header40 *ih;
  54523. + unsigned offset;
  54524. + unsigned i;
  54525. +
  54526. + nh = node40_node_header(target->node);
  54527. +
  54528. + assert("vs-212", coord_is_between_items(target));
  54529. + /* node must have enough free space */
  54530. + assert("vs-254",
  54531. + free_space_node40(target->node) >=
  54532. + data->length + sizeof(item_header40));
  54533. + assert("vs-1410", data->length >= 0);
  54534. +
  54535. + if (coord_set_to_right(target))
  54536. + /* there are not items to the right of @target, so, new item
  54537. + will be inserted after last one */
  54538. + coord_set_item_pos(target, nh40_get_num_items(nh));
  54539. +
  54540. + if (target->item_pos < nh40_get_num_items(nh)) {
  54541. + /* there are items to be moved to prepare space for new
  54542. + item */
  54543. + ih = node40_ih_at_coord(target);
  54544. + /* new item will start at this offset */
  54545. + offset = ih40_get_offset(ih);
  54546. +
  54547. + memmove(zdata(target->node) + offset + data->length,
  54548. + zdata(target->node) + offset,
  54549. + nh40_get_free_space_start(nh) - offset);
  54550. + /* update headers of moved items */
  54551. + for (i = target->item_pos; i < nh40_get_num_items(nh); i++) {
  54552. + ih = node40_ih_at(target->node, i);
  54553. + ih40_set_offset(ih, ih40_get_offset(ih) + data->length);
  54554. + }
  54555. +
  54556. + /* @ih is set to item header of the last item, move item headers */
  54557. + memmove(ih - 1, ih,
  54558. + sizeof(item_header40) * (nh40_get_num_items(nh) -
  54559. + target->item_pos));
  54560. + } else {
  54561. + /* new item will start at this offset */
  54562. + offset = nh40_get_free_space_start(nh);
  54563. + }
  54564. +
  54565. + /* make item header for the new item */
  54566. + ih = node40_ih_at_coord(target);
  54567. + memcpy(&ih->key, key, sizeof(reiser4_key));
  54568. + ih40_set_offset(ih, offset);
  54569. + save_plugin_id(item_plugin_to_plugin(data->iplug), &ih->plugin_id);
  54570. +
  54571. + /* update node header */
  54572. + nh40_set_free_space(nh,
  54573. + nh40_get_free_space(nh) - data->length -
  54574. + sizeof(item_header40));
  54575. + nh40_set_free_space_start(nh,
  54576. + nh40_get_free_space_start(nh) + data->length);
  54577. + node40_set_num_items(target->node, nh, nh40_get_num_items(nh) + 1);
  54578. +
  54579. + /* FIXME: check how does create_item work when between is set to BEFORE_UNIT */
  54580. + target->unit_pos = 0;
  54581. + target->between = AT_UNIT;
  54582. + coord_clear_iplug(target);
  54583. +
  54584. + /* initialize item */
  54585. + if (data->iplug->b.init != NULL) {
  54586. + data->iplug->b.init(target, NULL, data);
  54587. + }
  54588. + /* copy item body */
  54589. + if (data->iplug->b.paste != NULL) {
  54590. + data->iplug->b.paste(target, data, info);
  54591. + } else if (data->data != NULL) {
  54592. + if (data->user) {
  54593. + /* AUDIT: Are we really should not check that pointer
  54594. + from userspace was valid and data bytes were
  54595. + available? How will we return -EFAULT of some kind
  54596. + without this check? */
  54597. + assert("nikita-3038", reiser4_schedulable());
  54598. + /* copy data from user space */
  54599. + if (__copy_from_user(zdata(target->node) + offset,
  54600. + (const char __user *)data->data,
  54601. + (unsigned)data->length))
  54602. + return RETERR(-EFAULT);
  54603. + } else
  54604. + /* copy from kernel space */
  54605. + memcpy(zdata(target->node) + offset, data->data,
  54606. + (unsigned)data->length);
  54607. + }
  54608. +
  54609. + if (target->item_pos == 0) {
  54610. + /* left delimiting key has to be updated */
  54611. + prepare_for_update(NULL, target->node, info);
  54612. + }
  54613. +
  54614. + if (item_plugin_by_coord(target)->b.create_hook != NULL) {
  54615. + item_plugin_by_coord(target)->b.create_hook(target, data->arg);
  54616. + }
  54617. +
  54618. + return 0;
  54619. +}
  54620. +
  54621. +/* plugin->u.node.update_item_key
  54622. + look for description of this method in plugin/node/node.h */
  54623. +void
  54624. +update_item_key_node40(coord_t * target, const reiser4_key * key,
  54625. + carry_plugin_info * info)
  54626. +{
  54627. + item_header40 *ih;
  54628. +
  54629. + ih = node40_ih_at_coord(target);
  54630. + memcpy(&ih->key, key, sizeof(reiser4_key));
  54631. +
  54632. + if (target->item_pos == 0) {
  54633. + prepare_for_update(NULL, target->node, info);
  54634. + }
  54635. +}
  54636. +
  54637. +/* this bits encode cut mode */
  54638. +#define CMODE_TAIL 1
  54639. +#define CMODE_WHOLE 2
  54640. +#define CMODE_HEAD 4
  54641. +
  54642. +struct cut40_info {
  54643. + int mode;
  54644. + pos_in_node_t tail_removed; /* position of item which gets tail removed */
  54645. + pos_in_node_t first_removed; /* position of first the leftmost item among items removed completely */
  54646. + pos_in_node_t removed_count; /* number of items removed completely */
  54647. + pos_in_node_t head_removed; /* position of item which gets head removed */
  54648. +
  54649. + pos_in_node_t freed_space_start;
  54650. + pos_in_node_t freed_space_end;
  54651. + pos_in_node_t first_moved;
  54652. + pos_in_node_t head_removed_location;
  54653. +};
  54654. +
  54655. +static void init_cinfo(struct cut40_info *cinfo)
  54656. +{
  54657. + cinfo->mode = 0;
  54658. + cinfo->tail_removed = MAX_POS_IN_NODE;
  54659. + cinfo->first_removed = MAX_POS_IN_NODE;
  54660. + cinfo->removed_count = MAX_POS_IN_NODE;
  54661. + cinfo->head_removed = MAX_POS_IN_NODE;
  54662. + cinfo->freed_space_start = MAX_POS_IN_NODE;
  54663. + cinfo->freed_space_end = MAX_POS_IN_NODE;
  54664. + cinfo->first_moved = MAX_POS_IN_NODE;
  54665. + cinfo->head_removed_location = MAX_POS_IN_NODE;
  54666. +}
  54667. +
  54668. +/* complete cut_node40/kill_node40 content by removing the gap created by */
  54669. +static void compact(znode * node, struct cut40_info *cinfo)
  54670. +{
  54671. + node40_header *nh;
  54672. + item_header40 *ih;
  54673. + pos_in_node_t freed;
  54674. + pos_in_node_t pos, nr_items;
  54675. +
  54676. + assert("vs-1526", (cinfo->freed_space_start != MAX_POS_IN_NODE &&
  54677. + cinfo->freed_space_end != MAX_POS_IN_NODE &&
  54678. + cinfo->first_moved != MAX_POS_IN_NODE));
  54679. + assert("vs-1523", cinfo->freed_space_end >= cinfo->freed_space_start);
  54680. +
  54681. + nh = node40_node_header(node);
  54682. + nr_items = nh40_get_num_items(nh);
  54683. +
  54684. + /* remove gap made up by removal */
  54685. + memmove(zdata(node) + cinfo->freed_space_start,
  54686. + zdata(node) + cinfo->freed_space_end,
  54687. + nh40_get_free_space_start(nh) - cinfo->freed_space_end);
  54688. +
  54689. + /* update item headers of moved items - change their locations */
  54690. + pos = cinfo->first_moved;
  54691. + ih = node40_ih_at(node, pos);
  54692. + if (cinfo->head_removed_location != MAX_POS_IN_NODE) {
  54693. + assert("vs-1580", pos == cinfo->head_removed);
  54694. + ih40_set_offset(ih, cinfo->head_removed_location);
  54695. + pos++;
  54696. + ih--;
  54697. + }
  54698. +
  54699. + freed = cinfo->freed_space_end - cinfo->freed_space_start;
  54700. + for (; pos < nr_items; pos++, ih--) {
  54701. + assert("vs-1581", ih == node40_ih_at(node, pos));
  54702. + ih40_set_offset(ih, ih40_get_offset(ih) - freed);
  54703. + }
  54704. +
  54705. + /* free space start moved to right */
  54706. + nh40_set_free_space_start(nh, nh40_get_free_space_start(nh) - freed);
  54707. +
  54708. + if (cinfo->removed_count != MAX_POS_IN_NODE) {
  54709. + /* number of items changed. Remove item headers of those items */
  54710. + ih = node40_ih_at(node, nr_items - 1);
  54711. + memmove(ih + cinfo->removed_count, ih,
  54712. + sizeof(item_header40) * (nr_items -
  54713. + cinfo->removed_count -
  54714. + cinfo->first_removed));
  54715. + freed += sizeof(item_header40) * cinfo->removed_count;
  54716. + node40_set_num_items(node, nh, nr_items - cinfo->removed_count);
  54717. + }
  54718. +
  54719. + /* total amount of free space increased */
  54720. + nh40_set_free_space(nh, nh40_get_free_space(nh) + freed);
  54721. +}
  54722. +
  54723. +int shrink_item_node40(coord_t * coord, int delta)
  54724. +{
  54725. + node40_header *nh;
  54726. + item_header40 *ih;
  54727. + pos_in_node_t pos;
  54728. + pos_in_node_t nr_items;
  54729. + char *end;
  54730. + znode *node;
  54731. + int off;
  54732. +
  54733. + assert("nikita-3487", coord != NULL);
  54734. + assert("nikita-3488", delta >= 0);
  54735. +
  54736. + node = coord->node;
  54737. + nh = node40_node_header(node);
  54738. + nr_items = nh40_get_num_items(nh);
  54739. +
  54740. + ih = node40_ih_at_coord(coord);
  54741. + assert("nikita-3489", delta <= length_by_coord_node40(coord));
  54742. + off = ih40_get_offset(ih) + length_by_coord_node40(coord);
  54743. + end = zdata(node) + off;
  54744. +
  54745. + /* remove gap made up by removal */
  54746. + memmove(end - delta, end, nh40_get_free_space_start(nh) - off);
  54747. +
  54748. + /* update item headers of moved items - change their locations */
  54749. + pos = coord->item_pos + 1;
  54750. + ih = node40_ih_at(node, pos);
  54751. + for (; pos < nr_items; pos++, ih--) {
  54752. + assert("nikita-3490", ih == node40_ih_at(node, pos));
  54753. + ih40_set_offset(ih, ih40_get_offset(ih) - delta);
  54754. + }
  54755. +
  54756. + /* free space start moved to left */
  54757. + nh40_set_free_space_start(nh, nh40_get_free_space_start(nh) - delta);
  54758. + /* total amount of free space increased */
  54759. + nh40_set_free_space(nh, nh40_get_free_space(nh) + delta);
  54760. + /*
  54761. + * This method does _not_ changes number of items. Hence, it cannot
  54762. + * make node empty. Also it doesn't remove items at all, which means
  54763. + * that no keys have to be updated either.
  54764. + */
  54765. + return 0;
  54766. +}
  54767. +
  54768. +/*
  54769. + * Evaluate cut mode, if key range has been specified.
  54770. + *
  54771. + * This is for the case when units are not minimal objects
  54772. + * addressed by keys.
  54773. + *
  54774. + * This doesn't work when range contains objects with
  54775. + * non-unique keys (e.g. directory items).
  54776. + */
  54777. +static int parse_cut_by_key_range(struct cut40_info *cinfo,
  54778. + const struct cut_kill_params *params)
  54779. +{
  54780. + reiser4_key min_from_key, max_to_key;
  54781. + const reiser4_key *from_key = params->from_key;
  54782. + const reiser4_key *to_key = params->to_key;
  54783. + /*
  54784. + * calculate minimal key stored in first item
  54785. + * of items to be cut (params->from)
  54786. + */
  54787. + item_key_by_coord(params->from, &min_from_key);
  54788. + /*
  54789. + * calculate maximal key stored in last item
  54790. + * of items to be cut (params->to)
  54791. + */
  54792. + max_item_key_by_coord(params->to, &max_to_key);
  54793. +
  54794. + if (params->from->item_pos == params->to->item_pos) {
  54795. + if (keylt(&min_from_key, from_key)
  54796. + && keylt(to_key, &max_to_key))
  54797. + return 1;
  54798. +
  54799. + if (keygt(from_key, &min_from_key)) {
  54800. + /* tail of item is to be cut cut */
  54801. + cinfo->tail_removed = params->from->item_pos;
  54802. + cinfo->mode |= CMODE_TAIL;
  54803. + } else if (keylt(to_key, &max_to_key)) {
  54804. + /* head of item is to be cut */
  54805. + cinfo->head_removed = params->from->item_pos;
  54806. + cinfo->mode |= CMODE_HEAD;
  54807. + } else {
  54808. + /* item is removed completely */
  54809. + cinfo->first_removed = params->from->item_pos;
  54810. + cinfo->removed_count = 1;
  54811. + cinfo->mode |= CMODE_WHOLE;
  54812. + }
  54813. + } else {
  54814. + cinfo->first_removed = params->from->item_pos + 1;
  54815. + cinfo->removed_count =
  54816. + params->to->item_pos - params->from->item_pos - 1;
  54817. +
  54818. + if (keygt(from_key, &min_from_key)) {
  54819. + /* first item is not cut completely */
  54820. + cinfo->tail_removed = params->from->item_pos;
  54821. + cinfo->mode |= CMODE_TAIL;
  54822. + } else {
  54823. + cinfo->first_removed--;
  54824. + cinfo->removed_count++;
  54825. + }
  54826. + if (keylt(to_key, &max_to_key)) {
  54827. + /* last item is not cut completely */
  54828. + cinfo->head_removed = params->to->item_pos;
  54829. + cinfo->mode |= CMODE_HEAD;
  54830. + } else {
  54831. + cinfo->removed_count++;
  54832. + }
  54833. + if (cinfo->removed_count)
  54834. + cinfo->mode |= CMODE_WHOLE;
  54835. + }
  54836. + return 0;
  54837. +}
  54838. +
  54839. +/*
  54840. + * Evaluate cut mode, if the key range hasn't been specified.
  54841. + * In this case the range can include objects with non-unique
  54842. + * keys (e.g. directory entries).
  54843. + *
  54844. + * This doesn't work when units are not the minimal objects
  54845. + * addressed by keys (e.g. bytes in file's body stored in
  54846. + * unformatted nodes).
  54847. + */
  54848. +static int parse_cut_by_coord_range(struct cut40_info *cinfo,
  54849. + const struct cut_kill_params *params)
  54850. +{
  54851. + coord_t *from = params->from;
  54852. + coord_t *to = params->to;
  54853. +
  54854. + if (from->item_pos == to->item_pos) {
  54855. + /*
  54856. + * cut is performed on only one item
  54857. + */
  54858. + if (from->unit_pos > 0 &&
  54859. + to->unit_pos < coord_last_unit_pos(to))
  54860. + /*
  54861. + * cut from the middle of item
  54862. + */
  54863. + return 1;
  54864. + if (from->unit_pos > 0) {
  54865. + /*
  54866. + * tail of item is to be cut
  54867. + */
  54868. + cinfo->tail_removed = params->from->item_pos;
  54869. + cinfo->mode |= CMODE_TAIL;
  54870. + } else if (to->unit_pos < coord_last_unit_pos(to)) {
  54871. + /*
  54872. + * head of item is to be cut
  54873. + */
  54874. + cinfo->head_removed = params->from->item_pos;
  54875. + cinfo->mode |= CMODE_HEAD;
  54876. + } else {
  54877. + /*
  54878. + * item is removed completely
  54879. + */
  54880. + assert("edward-1631",
  54881. + from->unit_pos == 0 &&
  54882. + to->unit_pos == coord_last_unit_pos(to));
  54883. +
  54884. + cinfo->first_removed = params->from->item_pos;
  54885. + cinfo->removed_count = 1;
  54886. + cinfo->mode |= CMODE_WHOLE;
  54887. + }
  54888. + } else {
  54889. + cinfo->first_removed = from->item_pos + 1;
  54890. + cinfo->removed_count =
  54891. + to->item_pos - from->item_pos - 1;
  54892. +
  54893. + if (from->unit_pos > 0) {
  54894. + /*
  54895. + * first item is not cut completely
  54896. + */
  54897. + cinfo->tail_removed = from->item_pos;
  54898. + cinfo->mode |= CMODE_TAIL;
  54899. + } else {
  54900. + cinfo->first_removed--;
  54901. + cinfo->removed_count++;
  54902. + }
  54903. + if (to->unit_pos < coord_last_unit_pos(to)) {
  54904. + /*
  54905. + * last item is not cut completely
  54906. + */
  54907. + cinfo->head_removed = to->item_pos;
  54908. + cinfo->mode |= CMODE_HEAD;
  54909. + } else {
  54910. + cinfo->removed_count++;
  54911. + }
  54912. + if (cinfo->removed_count)
  54913. + cinfo->mode |= CMODE_WHOLE;
  54914. + }
  54915. + return 0;
  54916. +}
  54917. +
  54918. +/*
  54919. + * this is used by cut_node40 and kill_node40. It analyses input parameters
  54920. + * and calculates cut mode. There are 2 types of cut. First is when a unit is
  54921. + * removed from the middle of an item. In this case this function returns 1.
  54922. + * All the rest fits into second case: 0 or 1 of items getting tail cut, 0 or
  54923. + * more items removed completely and 0 or 1 item getting head cut. Function
  54924. + * returns 0 in this case
  54925. + */
  54926. +static int parse_cut(struct cut40_info *cinfo,
  54927. + const struct cut_kill_params *params)
  54928. +{
  54929. + init_cinfo(cinfo);
  54930. + if (params->from_key == NULL) {
  54931. + /*
  54932. + * cut key range is not defined in input parameters
  54933. + */
  54934. + assert("vs-1513", params->to_key == NULL);
  54935. + return parse_cut_by_coord_range(cinfo, params);
  54936. + } else
  54937. + return parse_cut_by_key_range(cinfo, params);
  54938. +}
  54939. +
  54940. +static void
  54941. +call_kill_hooks(znode * node, pos_in_node_t from, pos_in_node_t count,
  54942. + carry_kill_data * kdata)
  54943. +{
  54944. + coord_t coord;
  54945. + item_plugin *iplug;
  54946. + pos_in_node_t pos;
  54947. +
  54948. + coord.node = node;
  54949. + coord.unit_pos = 0;
  54950. + coord.between = AT_UNIT;
  54951. + for (pos = 0; pos < count; pos++) {
  54952. + coord_set_item_pos(&coord, from + pos);
  54953. + coord.unit_pos = 0;
  54954. + coord.between = AT_UNIT;
  54955. + iplug = item_plugin_by_coord(&coord);
  54956. + if (iplug->b.kill_hook) {
  54957. + iplug->b.kill_hook(&coord, 0, coord_num_units(&coord),
  54958. + kdata);
  54959. + }
  54960. + }
  54961. +}
  54962. +
  54963. +/* this is used to kill item partially */
  54964. +static pos_in_node_t
  54965. +kill_units(coord_t * coord, pos_in_node_t from, pos_in_node_t to, void *data,
  54966. + reiser4_key * smallest_removed, reiser4_key * new_first_key)
  54967. +{
  54968. + struct carry_kill_data *kdata;
  54969. + item_plugin *iplug;
  54970. +
  54971. + kdata = data;
  54972. + iplug = item_plugin_by_coord(coord);
  54973. +
  54974. + assert("vs-1524", iplug->b.kill_units);
  54975. + return iplug->b.kill_units(coord, from, to, kdata, smallest_removed,
  54976. + new_first_key);
  54977. +}
  54978. +
  54979. +/* call item plugin to cut tail of file */
  54980. +static pos_in_node_t
  54981. +kill_tail(coord_t * coord, void *data, reiser4_key * smallest_removed)
  54982. +{
  54983. + struct carry_kill_data *kdata;
  54984. + pos_in_node_t to;
  54985. +
  54986. + kdata = data;
  54987. + to = coord_last_unit_pos(coord);
  54988. + return kill_units(coord, coord->unit_pos, to, kdata, smallest_removed,
  54989. + NULL);
  54990. +}
  54991. +
  54992. +/* call item plugin to cut head of item */
  54993. +static pos_in_node_t
  54994. +kill_head(coord_t * coord, void *data, reiser4_key * smallest_removed,
  54995. + reiser4_key * new_first_key)
  54996. +{
  54997. + return kill_units(coord, 0, coord->unit_pos, data, smallest_removed,
  54998. + new_first_key);
  54999. +}
  55000. +
  55001. +/* this is used to cut item partially */
  55002. +static pos_in_node_t
  55003. +cut_units(coord_t * coord, pos_in_node_t from, pos_in_node_t to, void *data,
  55004. + reiser4_key * smallest_removed, reiser4_key * new_first_key)
  55005. +{
  55006. + carry_cut_data *cdata;
  55007. + item_plugin *iplug;
  55008. +
  55009. + cdata = data;
  55010. + iplug = item_plugin_by_coord(coord);
  55011. + assert("vs-302", iplug->b.cut_units);
  55012. + return iplug->b.cut_units(coord, from, to, cdata, smallest_removed,
  55013. + new_first_key);
  55014. +}
  55015. +
  55016. +/* call item plugin to cut tail of file */
  55017. +static pos_in_node_t
  55018. +cut_tail(coord_t * coord, void *data, reiser4_key * smallest_removed)
  55019. +{
  55020. + carry_cut_data *cdata;
  55021. + pos_in_node_t to;
  55022. +
  55023. + cdata = data;
  55024. + to = coord_last_unit_pos(cdata->params.from);
  55025. + return cut_units(coord, coord->unit_pos, to, data, smallest_removed, NULL);
  55026. +}
  55027. +
  55028. +/* call item plugin to cut head of item */
  55029. +static pos_in_node_t
  55030. +cut_head(coord_t * coord, void *data, reiser4_key * smallest_removed,
  55031. + reiser4_key * new_first_key)
  55032. +{
  55033. + return cut_units(coord, 0, coord->unit_pos, data, smallest_removed,
  55034. + new_first_key);
  55035. +}
  55036. +
  55037. +/* this returns 1 of key of first item changed, 0 - if it did not */
  55038. +static int
  55039. +prepare_for_compact(struct cut40_info *cinfo,
  55040. + const struct cut_kill_params *params, int is_cut,
  55041. + void *data, carry_plugin_info * info)
  55042. +{
  55043. + znode *node;
  55044. + item_header40 *ih;
  55045. + pos_in_node_t freed;
  55046. + pos_in_node_t item_pos;
  55047. + coord_t coord;
  55048. + reiser4_key new_first_key;
  55049. + pos_in_node_t(*kill_units_f) (coord_t *, pos_in_node_t, pos_in_node_t,
  55050. + void *, reiser4_key *, reiser4_key *);
  55051. + pos_in_node_t(*kill_tail_f) (coord_t *, void *, reiser4_key *);
  55052. + pos_in_node_t(*kill_head_f) (coord_t *, void *, reiser4_key *,
  55053. + reiser4_key *);
  55054. + int retval;
  55055. +
  55056. + retval = 0;
  55057. +
  55058. + node = params->from->node;
  55059. +
  55060. + assert("vs-184", node == params->to->node);
  55061. + assert("vs-312", !node_is_empty(node));
  55062. + assert("vs-297",
  55063. + coord_compare(params->from, params->to) != COORD_CMP_ON_RIGHT);
  55064. +
  55065. + if (is_cut) {
  55066. + kill_units_f = cut_units;
  55067. + kill_tail_f = cut_tail;
  55068. + kill_head_f = cut_head;
  55069. + } else {
  55070. + kill_units_f = kill_units;
  55071. + kill_tail_f = kill_tail;
  55072. + kill_head_f = kill_head;
  55073. + }
  55074. +
  55075. + if (parse_cut(cinfo, params) == 1) {
  55076. + /* cut from the middle of item */
  55077. + freed =
  55078. + kill_units_f(params->from, params->from->unit_pos,
  55079. + params->to->unit_pos, data,
  55080. + params->smallest_removed, NULL);
  55081. +
  55082. + item_pos = params->from->item_pos;
  55083. + ih = node40_ih_at(node, item_pos);
  55084. + cinfo->freed_space_start =
  55085. + ih40_get_offset(ih) + node40_item_length(node,
  55086. + item_pos) - freed;
  55087. + cinfo->freed_space_end = cinfo->freed_space_start + freed;
  55088. + cinfo->first_moved = item_pos + 1;
  55089. + } else {
  55090. + assert("vs-1521", (cinfo->tail_removed != MAX_POS_IN_NODE ||
  55091. + cinfo->first_removed != MAX_POS_IN_NODE ||
  55092. + cinfo->head_removed != MAX_POS_IN_NODE));
  55093. +
  55094. + switch (cinfo->mode) {
  55095. + case CMODE_TAIL:
  55096. + /* one item gets cut partially from its end */
  55097. + assert("vs-1562",
  55098. + cinfo->tail_removed == params->from->item_pos);
  55099. +
  55100. + freed =
  55101. + kill_tail_f(params->from, data,
  55102. + params->smallest_removed);
  55103. +
  55104. + item_pos = cinfo->tail_removed;
  55105. + ih = node40_ih_at(node, item_pos);
  55106. + cinfo->freed_space_start =
  55107. + ih40_get_offset(ih) + node40_item_length(node,
  55108. + item_pos) -
  55109. + freed;
  55110. + cinfo->freed_space_end =
  55111. + cinfo->freed_space_start + freed;
  55112. + cinfo->first_moved = cinfo->tail_removed + 1;
  55113. + break;
  55114. +
  55115. + case CMODE_WHOLE:
  55116. + /* one or more items get removed completely */
  55117. + assert("vs-1563",
  55118. + cinfo->first_removed == params->from->item_pos);
  55119. + assert("vs-1564", cinfo->removed_count > 0
  55120. + && cinfo->removed_count != MAX_POS_IN_NODE);
  55121. +
  55122. + /* call kill hook for all items removed completely */
  55123. + if (is_cut == 0)
  55124. + call_kill_hooks(node, cinfo->first_removed,
  55125. + cinfo->removed_count, data);
  55126. +
  55127. + item_pos = cinfo->first_removed;
  55128. + ih = node40_ih_at(node, item_pos);
  55129. +
  55130. + if (params->smallest_removed)
  55131. + memcpy(params->smallest_removed, &ih->key,
  55132. + sizeof(reiser4_key));
  55133. +
  55134. + cinfo->freed_space_start = ih40_get_offset(ih);
  55135. +
  55136. + item_pos += (cinfo->removed_count - 1);
  55137. + ih -= (cinfo->removed_count - 1);
  55138. + cinfo->freed_space_end =
  55139. + ih40_get_offset(ih) + node40_item_length(node,
  55140. + item_pos);
  55141. + cinfo->first_moved = item_pos + 1;
  55142. + if (cinfo->first_removed == 0)
  55143. + /* key of first item of the node changes */
  55144. + retval = 1;
  55145. + break;
  55146. +
  55147. + case CMODE_HEAD:
  55148. + /* one item gets cut partially from its head */
  55149. + assert("vs-1565",
  55150. + cinfo->head_removed == params->from->item_pos);
  55151. +
  55152. + freed =
  55153. + kill_head_f(params->to, data,
  55154. + params->smallest_removed,
  55155. + &new_first_key);
  55156. +
  55157. + item_pos = cinfo->head_removed;
  55158. + ih = node40_ih_at(node, item_pos);
  55159. + cinfo->freed_space_start = ih40_get_offset(ih);
  55160. + cinfo->freed_space_end = ih40_get_offset(ih) + freed;
  55161. + cinfo->first_moved = cinfo->head_removed + 1;
  55162. +
  55163. + /* item head is removed, therefore, item key changed */
  55164. + coord.node = node;
  55165. + coord_set_item_pos(&coord, item_pos);
  55166. + coord.unit_pos = 0;
  55167. + coord.between = AT_UNIT;
  55168. + update_item_key_node40(&coord, &new_first_key, NULL);
  55169. + if (item_pos == 0)
  55170. + /* key of first item of the node changes */
  55171. + retval = 1;
  55172. + break;
  55173. +
  55174. + case CMODE_TAIL | CMODE_WHOLE:
  55175. + /* one item gets cut from its end and one or more items get removed completely */
  55176. + assert("vs-1566",
  55177. + cinfo->tail_removed == params->from->item_pos);
  55178. + assert("vs-1567",
  55179. + cinfo->first_removed == cinfo->tail_removed + 1);
  55180. + assert("vs-1564", cinfo->removed_count > 0
  55181. + && cinfo->removed_count != MAX_POS_IN_NODE);
  55182. +
  55183. + freed =
  55184. + kill_tail_f(params->from, data,
  55185. + params->smallest_removed);
  55186. +
  55187. + item_pos = cinfo->tail_removed;
  55188. + ih = node40_ih_at(node, item_pos);
  55189. + cinfo->freed_space_start =
  55190. + ih40_get_offset(ih) + node40_item_length(node,
  55191. + item_pos) -
  55192. + freed;
  55193. +
  55194. + /* call kill hook for all items removed completely */
  55195. + if (is_cut == 0)
  55196. + call_kill_hooks(node, cinfo->first_removed,
  55197. + cinfo->removed_count, data);
  55198. +
  55199. + item_pos += cinfo->removed_count;
  55200. + ih -= cinfo->removed_count;
  55201. + cinfo->freed_space_end =
  55202. + ih40_get_offset(ih) + node40_item_length(node,
  55203. + item_pos);
  55204. + cinfo->first_moved = item_pos + 1;
  55205. + break;
  55206. +
  55207. + case CMODE_WHOLE | CMODE_HEAD:
  55208. + /* one or more items get removed completely and one item gets cut partially from its head */
  55209. + assert("vs-1568",
  55210. + cinfo->first_removed == params->from->item_pos);
  55211. + assert("vs-1564", cinfo->removed_count > 0
  55212. + && cinfo->removed_count != MAX_POS_IN_NODE);
  55213. + assert("vs-1569",
  55214. + cinfo->head_removed ==
  55215. + cinfo->first_removed + cinfo->removed_count);
  55216. +
  55217. + /* call kill hook for all items removed completely */
  55218. + if (is_cut == 0)
  55219. + call_kill_hooks(node, cinfo->first_removed,
  55220. + cinfo->removed_count, data);
  55221. +
  55222. + item_pos = cinfo->first_removed;
  55223. + ih = node40_ih_at(node, item_pos);
  55224. +
  55225. + if (params->smallest_removed)
  55226. + memcpy(params->smallest_removed, &ih->key,
  55227. + sizeof(reiser4_key));
  55228. +
  55229. + freed =
  55230. + kill_head_f(params->to, data, NULL, &new_first_key);
  55231. +
  55232. + cinfo->freed_space_start = ih40_get_offset(ih);
  55233. +
  55234. + ih = node40_ih_at(node, cinfo->head_removed);
  55235. + /* this is the most complex case. Item which got head removed and items which are to be moved
  55236. + intact change their location differently. */
  55237. + cinfo->freed_space_end = ih40_get_offset(ih) + freed;
  55238. + cinfo->first_moved = cinfo->head_removed;
  55239. + cinfo->head_removed_location = cinfo->freed_space_start;
  55240. +
  55241. + /* item head is removed, therefore, item key changed */
  55242. + coord.node = node;
  55243. + coord_set_item_pos(&coord, cinfo->head_removed);
  55244. + coord.unit_pos = 0;
  55245. + coord.between = AT_UNIT;
  55246. + update_item_key_node40(&coord, &new_first_key, NULL);
  55247. +
  55248. + assert("vs-1579", cinfo->first_removed == 0);
  55249. + /* key of first item of the node changes */
  55250. + retval = 1;
  55251. + break;
  55252. +
  55253. + case CMODE_TAIL | CMODE_HEAD:
  55254. + /* one item get cut from its end and its neighbor gets cut from its tail */
  55255. + impossible("vs-1576", "this can not happen currently");
  55256. + break;
  55257. +
  55258. + case CMODE_TAIL | CMODE_WHOLE | CMODE_HEAD:
  55259. + impossible("vs-1577", "this can not happen currently");
  55260. + break;
  55261. + default:
  55262. + impossible("vs-1578", "unexpected cut mode");
  55263. + break;
  55264. + }
  55265. + }
  55266. + return retval;
  55267. +}
  55268. +
  55269. +/* plugin->u.node.kill
  55270. + return value is number of items removed completely */
  55271. +int kill_node40(struct carry_kill_data *kdata, carry_plugin_info * info)
  55272. +{
  55273. + znode *node;
  55274. + struct cut40_info cinfo;
  55275. + int first_key_changed;
  55276. +
  55277. + node = kdata->params.from->node;
  55278. +
  55279. + first_key_changed =
  55280. + prepare_for_compact(&cinfo, &kdata->params, 0 /* not cut */ , kdata,
  55281. + info);
  55282. + compact(node, &cinfo);
  55283. +
  55284. + if (info) {
  55285. + /* it is not called by node40_shift, so we have to take care
  55286. + of changes on upper levels */
  55287. + if (node_is_empty(node)
  55288. + && !(kdata->flags & DELETE_RETAIN_EMPTY))
  55289. + /* all contents of node is deleted */
  55290. + prepare_removal_node40(node, info);
  55291. + else if (first_key_changed) {
  55292. + prepare_for_update(NULL, node, info);
  55293. + }
  55294. + }
  55295. +
  55296. + coord_clear_iplug(kdata->params.from);
  55297. + coord_clear_iplug(kdata->params.to);
  55298. +
  55299. + znode_make_dirty(node);
  55300. + return cinfo.removed_count == MAX_POS_IN_NODE ? 0 : cinfo.removed_count;
  55301. +}
  55302. +
  55303. +/* plugin->u.node.cut
  55304. + return value is number of items removed completely */
  55305. +int cut_node40(struct carry_cut_data *cdata, carry_plugin_info * info)
  55306. +{
  55307. + znode *node;
  55308. + struct cut40_info cinfo;
  55309. + int first_key_changed;
  55310. +
  55311. + node = cdata->params.from->node;
  55312. +
  55313. + first_key_changed =
  55314. + prepare_for_compact(&cinfo, &cdata->params, 1 /* not cut */ , cdata,
  55315. + info);
  55316. + compact(node, &cinfo);
  55317. +
  55318. + if (info) {
  55319. + /* it is not called by node40_shift, so we have to take care
  55320. + of changes on upper levels */
  55321. + if (node_is_empty(node))
  55322. + /* all contents of node is deleted */
  55323. + prepare_removal_node40(node, info);
  55324. + else if (first_key_changed) {
  55325. + prepare_for_update(NULL, node, info);
  55326. + }
  55327. + }
  55328. +
  55329. + coord_clear_iplug(cdata->params.from);
  55330. + coord_clear_iplug(cdata->params.to);
  55331. +
  55332. + znode_make_dirty(node);
  55333. + return cinfo.removed_count == MAX_POS_IN_NODE ? 0 : cinfo.removed_count;
  55334. +}
  55335. +
  55336. +/* this structure is used by shift method of node40 plugin */
  55337. +struct shift_params {
  55338. + shift_direction pend; /* when @pend == append - we are shifting to
  55339. + left, when @pend == prepend - to right */
  55340. + coord_t wish_stop; /* when shifting to left this is last unit we
  55341. + want shifted, when shifting to right - this
  55342. + is set to unit we want to start shifting
  55343. + from */
  55344. + znode *target;
  55345. + int everything; /* it is set to 1 if everything we have to shift is
  55346. + shifted, 0 - otherwise */
  55347. +
  55348. + /* FIXME-VS: get rid of read_stop */
  55349. +
  55350. + /* these are set by estimate_shift */
  55351. + coord_t real_stop; /* this will be set to last unit which will be
  55352. + really shifted */
  55353. +
  55354. + /* coordinate in source node before operation of unit which becomes
  55355. + first after shift to left of last after shift to right */
  55356. + union {
  55357. + coord_t future_first;
  55358. + coord_t future_last;
  55359. + } u;
  55360. +
  55361. + unsigned merging_units; /* number of units of first item which have to
  55362. + be merged with last item of target node */
  55363. + unsigned merging_bytes; /* number of bytes in those units */
  55364. +
  55365. + unsigned entire; /* items shifted in their entirety */
  55366. + unsigned entire_bytes; /* number of bytes in those items */
  55367. +
  55368. + unsigned part_units; /* number of units of partially copied item */
  55369. + unsigned part_bytes; /* number of bytes in those units */
  55370. +
  55371. + unsigned shift_bytes; /* total number of bytes in items shifted (item
  55372. + headers not included) */
  55373. +
  55374. +};
  55375. +
  55376. +static int item_creation_overhead(coord_t *item)
  55377. +{
  55378. + return node_plugin_by_coord(item)->item_overhead(item->node, NULL);
  55379. +}
  55380. +
  55381. +/* how many units are there in @source starting from source->unit_pos
  55382. + but not further than @stop_coord */
  55383. +static int
  55384. +wanted_units(coord_t *source, coord_t *stop_coord, shift_direction pend)
  55385. +{
  55386. + if (pend == SHIFT_LEFT) {
  55387. + assert("vs-181", source->unit_pos == 0);
  55388. + } else {
  55389. + assert("vs-182",
  55390. + source->unit_pos == coord_last_unit_pos(source));
  55391. + }
  55392. +
  55393. + if (source->item_pos != stop_coord->item_pos) {
  55394. + /* @source and @stop_coord are different items */
  55395. + return coord_last_unit_pos(source) + 1;
  55396. + }
  55397. +
  55398. + if (pend == SHIFT_LEFT) {
  55399. + return stop_coord->unit_pos + 1;
  55400. + } else {
  55401. + return source->unit_pos - stop_coord->unit_pos + 1;
  55402. + }
  55403. +}
  55404. +
  55405. +/* this calculates what can be copied from @shift->wish_stop.node to
  55406. + @shift->target */
  55407. +static void
  55408. +estimate_shift(struct shift_params *shift, const reiser4_context * ctx)
  55409. +{
  55410. + unsigned target_free_space, size;
  55411. + pos_in_node_t stop_item; /* item which estimating should not consider */
  55412. + unsigned want; /* number of units of item we want shifted */
  55413. + coord_t source; /* item being estimated */
  55414. + item_plugin *iplug;
  55415. +
  55416. + /* shifting to left/right starts from first/last units of
  55417. + @shift->wish_stop.node */
  55418. + if (shift->pend == SHIFT_LEFT) {
  55419. + coord_init_first_unit(&source, shift->wish_stop.node);
  55420. + } else {
  55421. + coord_init_last_unit(&source, shift->wish_stop.node);
  55422. + }
  55423. + shift->real_stop = source;
  55424. +
  55425. + /* free space in target node and number of items in source */
  55426. + target_free_space = znode_free_space(shift->target);
  55427. +
  55428. + shift->everything = 0;
  55429. + if (!node_is_empty(shift->target)) {
  55430. + /* target node is not empty, check for boundary items
  55431. + mergeability */
  55432. + coord_t to;
  55433. +
  55434. + /* item we try to merge @source with */
  55435. + if (shift->pend == SHIFT_LEFT) {
  55436. + coord_init_last_unit(&to, shift->target);
  55437. + } else {
  55438. + coord_init_first_unit(&to, shift->target);
  55439. + }
  55440. +
  55441. + if ((shift->pend == SHIFT_LEFT) ? are_items_mergeable(&to,
  55442. + &source) :
  55443. + are_items_mergeable(&source, &to)) {
  55444. + /* how many units of @source do we want to merge to
  55445. + item @to */
  55446. + want =
  55447. + wanted_units(&source, &shift->wish_stop,
  55448. + shift->pend);
  55449. +
  55450. + /* how many units of @source we can merge to item
  55451. + @to */
  55452. + iplug = item_plugin_by_coord(&source);
  55453. + if (iplug->b.can_shift != NULL)
  55454. + shift->merging_units =
  55455. + iplug->b.can_shift(target_free_space,
  55456. + &source, shift->target,
  55457. + shift->pend, &size,
  55458. + want);
  55459. + else {
  55460. + shift->merging_units = 0;
  55461. + size = 0;
  55462. + }
  55463. + shift->merging_bytes = size;
  55464. + shift->shift_bytes += size;
  55465. + /* update stop coord to be set to last unit of @source
  55466. + we can merge to @target */
  55467. + if (shift->merging_units)
  55468. + /* at least one unit can be shifted */
  55469. + shift->real_stop.unit_pos =
  55470. + (shift->merging_units - source.unit_pos -
  55471. + 1) * shift->pend;
  55472. + else {
  55473. + /* nothing can be shifted */
  55474. + if (shift->pend == SHIFT_LEFT)
  55475. + coord_init_before_first_item(&shift->
  55476. + real_stop,
  55477. + source.
  55478. + node);
  55479. + else
  55480. + coord_init_after_last_item(&shift->
  55481. + real_stop,
  55482. + source.node);
  55483. + }
  55484. + assert("nikita-2081", shift->real_stop.unit_pos + 1);
  55485. +
  55486. + if (shift->merging_units != want) {
  55487. + /* we could not copy as many as we want, so,
  55488. + there is no reason for estimating any
  55489. + longer */
  55490. + return;
  55491. + }
  55492. +
  55493. + target_free_space -= size;
  55494. + coord_add_item_pos(&source, shift->pend);
  55495. + }
  55496. + }
  55497. +
  55498. + /* number of item nothing of which we want to shift */
  55499. + stop_item = shift->wish_stop.item_pos + shift->pend;
  55500. +
  55501. + /* calculate how many items can be copied into given free
  55502. + space as whole */
  55503. + for (; source.item_pos != stop_item;
  55504. + coord_add_item_pos(&source, shift->pend)) {
  55505. + if (shift->pend == SHIFT_RIGHT)
  55506. + source.unit_pos = coord_last_unit_pos(&source);
  55507. +
  55508. + /* how many units of @source do we want to copy */
  55509. + want = wanted_units(&source, &shift->wish_stop, shift->pend);
  55510. +
  55511. + if (want == coord_last_unit_pos(&source) + 1) {
  55512. + /* we want this item to be copied entirely */
  55513. + size =
  55514. + item_length_by_coord(&source) +
  55515. + item_creation_overhead(&source);
  55516. + if (size <= target_free_space) {
  55517. + /* item fits into target node as whole */
  55518. + target_free_space -= size;
  55519. + shift->shift_bytes +=
  55520. + size - item_creation_overhead(&source);
  55521. + shift->entire_bytes +=
  55522. + size - item_creation_overhead(&source);
  55523. + shift->entire++;
  55524. +
  55525. + /* update shift->real_stop coord to be set to
  55526. + last unit of @source we can merge to
  55527. + @target */
  55528. + shift->real_stop = source;
  55529. + if (shift->pend == SHIFT_LEFT)
  55530. + shift->real_stop.unit_pos =
  55531. + coord_last_unit_pos(&shift->
  55532. + real_stop);
  55533. + else
  55534. + shift->real_stop.unit_pos = 0;
  55535. + continue;
  55536. + }
  55537. + }
  55538. +
  55539. + /* we reach here only for an item which does not fit into
  55540. + target node in its entirety. This item may be either
  55541. + partially shifted, or not shifted at all. We will have to
  55542. + create new item in target node, so decrease amout of free
  55543. + space by an item creation overhead. We can reach here also
  55544. + if stop coord is in this item */
  55545. + if (target_free_space >=
  55546. + (unsigned)item_creation_overhead(&source)) {
  55547. + target_free_space -= item_creation_overhead(&source);
  55548. + iplug = item_plugin_by_coord(&source);
  55549. + if (iplug->b.can_shift) {
  55550. + shift->part_units = iplug->b.can_shift(target_free_space,
  55551. + &source,
  55552. + NULL, /* target */
  55553. + shift->pend,
  55554. + &size,
  55555. + want);
  55556. + } else {
  55557. + target_free_space = 0;
  55558. + shift->part_units = 0;
  55559. + size = 0;
  55560. + }
  55561. + } else {
  55562. + target_free_space = 0;
  55563. + shift->part_units = 0;
  55564. + size = 0;
  55565. + }
  55566. + shift->part_bytes = size;
  55567. + shift->shift_bytes += size;
  55568. +
  55569. + /* set @shift->real_stop to last unit of @source we can merge
  55570. + to @shift->target */
  55571. + if (shift->part_units) {
  55572. + shift->real_stop = source;
  55573. + shift->real_stop.unit_pos =
  55574. + (shift->part_units - source.unit_pos -
  55575. + 1) * shift->pend;
  55576. + assert("nikita-2082", shift->real_stop.unit_pos + 1);
  55577. + }
  55578. +
  55579. + if (want != shift->part_units)
  55580. + /* not everything wanted were shifted */
  55581. + return;
  55582. + break;
  55583. + }
  55584. +
  55585. + shift->everything = 1;
  55586. +}
  55587. +
  55588. +static void
  55589. +copy_units(coord_t * target, coord_t * source, unsigned from, unsigned count,
  55590. + shift_direction dir, unsigned free_space)
  55591. +{
  55592. + item_plugin *iplug;
  55593. +
  55594. + assert("nikita-1463", target != NULL);
  55595. + assert("nikita-1464", source != NULL);
  55596. + assert("nikita-1465", from + count <= coord_num_units(source));
  55597. +
  55598. + iplug = item_plugin_by_coord(source);
  55599. + assert("nikita-1468", iplug == item_plugin_by_coord(target));
  55600. + iplug->b.copy_units(target, source, from, count, dir, free_space);
  55601. +
  55602. + if (dir == SHIFT_RIGHT) {
  55603. + /* FIXME-VS: this looks not necessary. update_item_key was
  55604. + called already by copy_units method */
  55605. + reiser4_key split_key;
  55606. +
  55607. + assert("nikita-1469", target->unit_pos == 0);
  55608. +
  55609. + unit_key_by_coord(target, &split_key);
  55610. + node_plugin_by_coord(target)->update_item_key(target,
  55611. + &split_key, NULL);
  55612. + }
  55613. +}
  55614. +
  55615. +/* copy part of @shift->real_stop.node starting either from its beginning or
  55616. + from its end and ending at @shift->real_stop to either the end or the
  55617. + beginning of @shift->target */
  55618. +static void copy(struct shift_params *shift, size_t node_header_size)
  55619. +{
  55620. + node40_header *nh;
  55621. + coord_t from;
  55622. + coord_t to;
  55623. + item_header40 *from_ih, *to_ih;
  55624. + int free_space_start;
  55625. + int new_items;
  55626. + unsigned old_items;
  55627. + int old_offset;
  55628. + unsigned i;
  55629. +
  55630. + nh = node40_node_header(shift->target);
  55631. + free_space_start = nh40_get_free_space_start(nh);
  55632. + old_items = nh40_get_num_items(nh);
  55633. + new_items = shift->entire + (shift->part_units ? 1 : 0);
  55634. + assert("vs-185",
  55635. + shift->shift_bytes ==
  55636. + shift->merging_bytes + shift->entire_bytes + shift->part_bytes);
  55637. +
  55638. + from = shift->wish_stop;
  55639. +
  55640. + coord_init_first_unit(&to, shift->target);
  55641. +
  55642. + /* NOTE:NIKITA->VS not sure what I am doing: shift->target is empty,
  55643. + hence to.between is set to EMPTY_NODE above. Looks like we want it
  55644. + to be AT_UNIT.
  55645. +
  55646. + Oh, wonders of ->betweeness...
  55647. +
  55648. + */
  55649. + to.between = AT_UNIT;
  55650. +
  55651. + if (shift->pend == SHIFT_LEFT) {
  55652. + /* copying to left */
  55653. +
  55654. + coord_set_item_pos(&from, 0);
  55655. + from_ih = node40_ih_at(from.node, 0);
  55656. +
  55657. + coord_set_item_pos(&to,
  55658. + node40_num_of_items_internal(to.node) - 1);
  55659. + if (shift->merging_units) {
  55660. + /* expand last item, so that plugin methods will see
  55661. + correct data */
  55662. + free_space_start += shift->merging_bytes;
  55663. + nh40_set_free_space_start(nh,
  55664. + (unsigned)free_space_start);
  55665. + nh40_set_free_space(nh,
  55666. + nh40_get_free_space(nh) -
  55667. + shift->merging_bytes);
  55668. +
  55669. + /* appending last item of @target */
  55670. + copy_units(&to, &from, 0, /* starting from 0-th unit */
  55671. + shift->merging_units, SHIFT_LEFT,
  55672. + shift->merging_bytes);
  55673. + coord_inc_item_pos(&from);
  55674. + from_ih--;
  55675. + coord_inc_item_pos(&to);
  55676. + }
  55677. +
  55678. + to_ih = node40_ih_at(shift->target, old_items);
  55679. + if (shift->entire) {
  55680. + /* copy @entire items entirely */
  55681. +
  55682. + /* copy item headers */
  55683. + memcpy(to_ih - shift->entire + 1,
  55684. + from_ih - shift->entire + 1,
  55685. + shift->entire * sizeof(item_header40));
  55686. + /* update item header offset */
  55687. + old_offset = ih40_get_offset(from_ih);
  55688. + /* AUDIT: Looks like if we calculate old_offset + free_space_start here instead of just old_offset, we can perform one "add" operation less per each iteration */
  55689. + for (i = 0; i < shift->entire; i++, to_ih--, from_ih--)
  55690. + ih40_set_offset(to_ih,
  55691. + ih40_get_offset(from_ih) -
  55692. + old_offset + free_space_start);
  55693. +
  55694. + /* copy item bodies */
  55695. + memcpy(zdata(shift->target) + free_space_start, zdata(from.node) + old_offset, /*ih40_get_offset (from_ih), */
  55696. + shift->entire_bytes);
  55697. +
  55698. + coord_add_item_pos(&from, (int)shift->entire);
  55699. + coord_add_item_pos(&to, (int)shift->entire);
  55700. + }
  55701. +
  55702. + nh40_set_free_space_start(nh,
  55703. + free_space_start +
  55704. + shift->shift_bytes -
  55705. + shift->merging_bytes);
  55706. + nh40_set_free_space(nh,
  55707. + nh40_get_free_space(nh) -
  55708. + (shift->shift_bytes - shift->merging_bytes +
  55709. + sizeof(item_header40) * new_items));
  55710. +
  55711. + /* update node header */
  55712. + node40_set_num_items(shift->target, nh, old_items + new_items);
  55713. + assert("vs-170",
  55714. + nh40_get_free_space(nh) < znode_size(shift->target));
  55715. +
  55716. + if (shift->part_units) {
  55717. + /* copy heading part (@part units) of @source item as
  55718. + a new item into @target->node */
  55719. +
  55720. + /* copy item header of partially copied item */
  55721. + coord_set_item_pos(&to,
  55722. + node40_num_of_items_internal(to.node)
  55723. + - 1);
  55724. + memcpy(to_ih, from_ih, sizeof(item_header40));
  55725. + ih40_set_offset(to_ih,
  55726. + nh40_get_free_space_start(nh) -
  55727. + shift->part_bytes);
  55728. + if (item_plugin_by_coord(&to)->b.init)
  55729. + item_plugin_by_coord(&to)->b.init(&to, &from,
  55730. + NULL);
  55731. + copy_units(&to, &from, 0, shift->part_units, SHIFT_LEFT,
  55732. + shift->part_bytes);
  55733. + }
  55734. +
  55735. + } else {
  55736. + /* copying to right */
  55737. +
  55738. + coord_set_item_pos(&from,
  55739. + node40_num_of_items_internal(from.node) - 1);
  55740. + from_ih = node40_ih_at_coord(&from);
  55741. +
  55742. + coord_set_item_pos(&to, 0);
  55743. +
  55744. + /* prepare space for new items */
  55745. + memmove(zdata(to.node) + node_header_size +
  55746. + shift->shift_bytes,
  55747. + zdata(to.node) + node_header_size,
  55748. + free_space_start - node_header_size);
  55749. + /* update item headers of moved items */
  55750. + to_ih = node40_ih_at(to.node, 0);
  55751. + /* first item gets @merging_bytes longer. free space appears
  55752. + at its beginning */
  55753. + if (!node_is_empty(to.node))
  55754. + ih40_set_offset(to_ih,
  55755. + ih40_get_offset(to_ih) +
  55756. + shift->shift_bytes -
  55757. + shift->merging_bytes);
  55758. +
  55759. + for (i = 1; i < old_items; i++)
  55760. + ih40_set_offset(to_ih - i,
  55761. + ih40_get_offset(to_ih - i) +
  55762. + shift->shift_bytes);
  55763. +
  55764. + /* move item headers to make space for new items */
  55765. + memmove(to_ih - old_items + 1 - new_items,
  55766. + to_ih - old_items + 1,
  55767. + sizeof(item_header40) * old_items);
  55768. + to_ih -= (new_items - 1);
  55769. +
  55770. + nh40_set_free_space_start(nh,
  55771. + free_space_start +
  55772. + shift->shift_bytes);
  55773. + nh40_set_free_space(nh,
  55774. + nh40_get_free_space(nh) -
  55775. + (shift->shift_bytes +
  55776. + sizeof(item_header40) * new_items));
  55777. +
  55778. + /* update node header */
  55779. + node40_set_num_items(shift->target, nh, old_items + new_items);
  55780. + assert("vs-170",
  55781. + nh40_get_free_space(nh) < znode_size(shift->target));
  55782. +
  55783. + if (shift->merging_units) {
  55784. + coord_add_item_pos(&to, new_items);
  55785. + to.unit_pos = 0;
  55786. + to.between = AT_UNIT;
  55787. + /* prepend first item of @to */
  55788. + copy_units(&to, &from,
  55789. + coord_last_unit_pos(&from) -
  55790. + shift->merging_units + 1,
  55791. + shift->merging_units, SHIFT_RIGHT,
  55792. + shift->merging_bytes);
  55793. + coord_dec_item_pos(&from);
  55794. + from_ih++;
  55795. + }
  55796. +
  55797. + if (shift->entire) {
  55798. + /* copy @entire items entirely */
  55799. +
  55800. + /* copy item headers */
  55801. + memcpy(to_ih, from_ih,
  55802. + shift->entire * sizeof(item_header40));
  55803. +
  55804. + /* update item header offset */
  55805. + old_offset =
  55806. + ih40_get_offset(from_ih + shift->entire - 1);
  55807. + /* AUDIT: old_offset + sizeof (node40_header) + shift->part_bytes calculation can be taken off the loop. */
  55808. + for (i = 0; i < shift->entire; i++, to_ih++, from_ih++)
  55809. + ih40_set_offset(to_ih,
  55810. + ih40_get_offset(from_ih) -
  55811. + old_offset +
  55812. + node_header_size +
  55813. + shift->part_bytes);
  55814. + /* copy item bodies */
  55815. + coord_add_item_pos(&from, -(int)(shift->entire - 1));
  55816. + memcpy(zdata(to.node) + node_header_size +
  55817. + shift->part_bytes, item_by_coord_node40(&from),
  55818. + shift->entire_bytes);
  55819. + coord_dec_item_pos(&from);
  55820. + }
  55821. +
  55822. + if (shift->part_units) {
  55823. + coord_set_item_pos(&to, 0);
  55824. + to.unit_pos = 0;
  55825. + to.between = AT_UNIT;
  55826. + /* copy heading part (@part units) of @source item as
  55827. + a new item into @target->node */
  55828. +
  55829. + /* copy item header of partially copied item */
  55830. + memcpy(to_ih, from_ih, sizeof(item_header40));
  55831. + ih40_set_offset(to_ih, node_header_size);
  55832. + if (item_plugin_by_coord(&to)->b.init)
  55833. + item_plugin_by_coord(&to)->b.init(&to, &from,
  55834. + NULL);
  55835. + copy_units(&to, &from,
  55836. + coord_last_unit_pos(&from) -
  55837. + shift->part_units + 1, shift->part_units,
  55838. + SHIFT_RIGHT, shift->part_bytes);
  55839. + }
  55840. + }
  55841. +}
  55842. +
  55843. +/* remove everything either before or after @fact_stop. Number of items
  55844. + removed completely is returned */
  55845. +static int delete_copied(struct shift_params *shift)
  55846. +{
  55847. + coord_t from;
  55848. + coord_t to;
  55849. + struct carry_cut_data cdata;
  55850. +
  55851. + if (shift->pend == SHIFT_LEFT) {
  55852. + /* we were shifting to left, remove everything from the
  55853. + beginning of @shift->wish_stop->node upto
  55854. + @shift->wish_stop */
  55855. + coord_init_first_unit(&from, shift->real_stop.node);
  55856. + to = shift->real_stop;
  55857. +
  55858. + /* store old coordinate of unit which will be first after
  55859. + shift to left */
  55860. + shift->u.future_first = to;
  55861. + coord_next_unit(&shift->u.future_first);
  55862. + } else {
  55863. + /* we were shifting to right, remove everything from
  55864. + @shift->stop_coord upto to end of
  55865. + @shift->stop_coord->node */
  55866. + from = shift->real_stop;
  55867. + coord_init_last_unit(&to, from.node);
  55868. +
  55869. + /* store old coordinate of unit which will be last after
  55870. + shift to right */
  55871. + shift->u.future_last = from;
  55872. + coord_prev_unit(&shift->u.future_last);
  55873. + }
  55874. +
  55875. + cdata.params.from = &from;
  55876. + cdata.params.to = &to;
  55877. + cdata.params.from_key = NULL;
  55878. + cdata.params.to_key = NULL;
  55879. + cdata.params.smallest_removed = NULL;
  55880. + return cut_node40(&cdata, NULL);
  55881. +}
  55882. +
  55883. +/* something was moved between @left and @right. Add carry operation to @info
  55884. + list to have carry to update delimiting key between them */
  55885. +static int
  55886. +prepare_for_update(znode * left, znode * right, carry_plugin_info * info)
  55887. +{
  55888. + carry_op *op;
  55889. + carry_node *cn;
  55890. +
  55891. + if (info == NULL)
  55892. + /* nowhere to send operation to. */
  55893. + return 0;
  55894. +
  55895. + if (!should_notify_parent(right))
  55896. + return 0;
  55897. +
  55898. + op = node_post_carry(info, COP_UPDATE, right, 1);
  55899. + if (IS_ERR(op) || op == NULL)
  55900. + return op ? PTR_ERR(op) : -EIO;
  55901. +
  55902. + if (left != NULL) {
  55903. + carry_node *reference;
  55904. +
  55905. + if (info->doing)
  55906. + reference = insert_carry_node(info->doing,
  55907. + info->todo, left);
  55908. + else
  55909. + reference = op->node;
  55910. + assert("nikita-2992", reference != NULL);
  55911. + cn = reiser4_add_carry(info->todo, POOLO_BEFORE, reference);
  55912. + if (IS_ERR(cn))
  55913. + return PTR_ERR(cn);
  55914. + cn->parent = 1;
  55915. + cn->node = left;
  55916. + if (ZF_ISSET(left, JNODE_ORPHAN))
  55917. + cn->left_before = 1;
  55918. + op->u.update.left = cn;
  55919. + } else
  55920. + op->u.update.left = NULL;
  55921. + return 0;
  55922. +}
  55923. +
  55924. +/* plugin->u.node.prepare_removal
  55925. + to delete a pointer to @empty from the tree add corresponding carry
  55926. + operation (delete) to @info list */
  55927. +int prepare_removal_node40(znode * empty, carry_plugin_info * info)
  55928. +{
  55929. + carry_op *op;
  55930. + reiser4_tree *tree;
  55931. +
  55932. + if (!should_notify_parent(empty))
  55933. + return 0;
  55934. + /* already on a road to Styx */
  55935. + if (ZF_ISSET(empty, JNODE_HEARD_BANSHEE))
  55936. + return 0;
  55937. + op = node_post_carry(info, COP_DELETE, empty, 1);
  55938. + if (IS_ERR(op) || op == NULL)
  55939. + return RETERR(op ? PTR_ERR(op) : -EIO);
  55940. +
  55941. + op->u.delete.child = NULL;
  55942. + op->u.delete.flags = 0;
  55943. +
  55944. + /* fare thee well */
  55945. + tree = znode_get_tree(empty);
  55946. + read_lock_tree(tree);
  55947. + write_lock_dk(tree);
  55948. + znode_set_ld_key(empty, znode_get_rd_key(empty));
  55949. + if (znode_is_left_connected(empty) && empty->left)
  55950. + znode_set_rd_key(empty->left, znode_get_rd_key(empty));
  55951. + write_unlock_dk(tree);
  55952. + read_unlock_tree(tree);
  55953. +
  55954. + ZF_SET(empty, JNODE_HEARD_BANSHEE);
  55955. + return 0;
  55956. +}
  55957. +
  55958. +/* something were shifted from @insert_coord->node to @shift->target, update
  55959. + @insert_coord correspondingly */
  55960. +static void
  55961. +adjust_coord(coord_t * insert_coord, struct shift_params *shift, int removed,
  55962. + int including_insert_coord)
  55963. +{
  55964. + /* item plugin was invalidated by shifting */
  55965. + coord_clear_iplug(insert_coord);
  55966. +
  55967. + if (node_is_empty(shift->wish_stop.node)) {
  55968. + assert("vs-242", shift->everything);
  55969. + if (including_insert_coord) {
  55970. + if (shift->pend == SHIFT_RIGHT) {
  55971. + /* set @insert_coord before first unit of
  55972. + @shift->target node */
  55973. + coord_init_before_first_item(insert_coord,
  55974. + shift->target);
  55975. + } else {
  55976. + /* set @insert_coord after last in target node */
  55977. + coord_init_after_last_item(insert_coord,
  55978. + shift->target);
  55979. + }
  55980. + } else {
  55981. + /* set @insert_coord inside of empty node. There is
  55982. + only one possible coord within an empty
  55983. + node. init_first_unit will set that coord */
  55984. + coord_init_first_unit(insert_coord,
  55985. + shift->wish_stop.node);
  55986. + }
  55987. + return;
  55988. + }
  55989. +
  55990. + if (shift->pend == SHIFT_RIGHT) {
  55991. + /* there was shifting to right */
  55992. + if (shift->everything) {
  55993. + /* everything wanted was shifted */
  55994. + if (including_insert_coord) {
  55995. + /* @insert_coord is set before first unit of
  55996. + @to node */
  55997. + coord_init_before_first_item(insert_coord,
  55998. + shift->target);
  55999. + insert_coord->between = BEFORE_UNIT;
  56000. + } else {
  56001. + /* @insert_coord is set after last unit of
  56002. + @insert->node */
  56003. + coord_init_last_unit(insert_coord,
  56004. + shift->wish_stop.node);
  56005. + insert_coord->between = AFTER_UNIT;
  56006. + }
  56007. + }
  56008. + return;
  56009. + }
  56010. +
  56011. + /* there was shifting to left */
  56012. + if (shift->everything) {
  56013. + /* everything wanted was shifted */
  56014. + if (including_insert_coord) {
  56015. + /* @insert_coord is set after last unit in @to node */
  56016. + coord_init_after_last_item(insert_coord, shift->target);
  56017. + } else {
  56018. + /* @insert_coord is set before first unit in the same
  56019. + node */
  56020. + coord_init_before_first_item(insert_coord,
  56021. + shift->wish_stop.node);
  56022. + }
  56023. + return;
  56024. + }
  56025. +
  56026. + /* FIXME-VS: the code below is complicated because with between ==
  56027. + AFTER_ITEM unit_pos is set to 0 */
  56028. +
  56029. + if (!removed) {
  56030. + /* no items were shifted entirely */
  56031. + assert("vs-195", shift->merging_units == 0
  56032. + || shift->part_units == 0);
  56033. +
  56034. + if (shift->real_stop.item_pos == insert_coord->item_pos) {
  56035. + if (shift->merging_units) {
  56036. + if (insert_coord->between == AFTER_UNIT) {
  56037. + assert("nikita-1441",
  56038. + insert_coord->unit_pos >=
  56039. + shift->merging_units);
  56040. + insert_coord->unit_pos -=
  56041. + shift->merging_units;
  56042. + } else if (insert_coord->between == BEFORE_UNIT) {
  56043. + assert("nikita-2090",
  56044. + insert_coord->unit_pos >
  56045. + shift->merging_units);
  56046. + insert_coord->unit_pos -=
  56047. + shift->merging_units;
  56048. + }
  56049. +
  56050. + assert("nikita-2083",
  56051. + insert_coord->unit_pos + 1);
  56052. + } else {
  56053. + if (insert_coord->between == AFTER_UNIT) {
  56054. + assert("nikita-1442",
  56055. + insert_coord->unit_pos >=
  56056. + shift->part_units);
  56057. + insert_coord->unit_pos -=
  56058. + shift->part_units;
  56059. + } else if (insert_coord->between == BEFORE_UNIT) {
  56060. + assert("nikita-2089",
  56061. + insert_coord->unit_pos >
  56062. + shift->part_units);
  56063. + insert_coord->unit_pos -=
  56064. + shift->part_units;
  56065. + }
  56066. +
  56067. + assert("nikita-2084",
  56068. + insert_coord->unit_pos + 1);
  56069. + }
  56070. + }
  56071. + return;
  56072. + }
  56073. +
  56074. + /* we shifted to left and there was no enough space for everything */
  56075. + switch (insert_coord->between) {
  56076. + case AFTER_UNIT:
  56077. + case BEFORE_UNIT:
  56078. + if (shift->real_stop.item_pos == insert_coord->item_pos)
  56079. + insert_coord->unit_pos -= shift->part_units;
  56080. + case AFTER_ITEM:
  56081. + coord_add_item_pos(insert_coord, -removed);
  56082. + break;
  56083. + default:
  56084. + impossible("nikita-2087", "not ready");
  56085. + }
  56086. + assert("nikita-2085", insert_coord->unit_pos + 1);
  56087. +}
  56088. +
  56089. +static int call_shift_hooks(struct shift_params *shift)
  56090. +{
  56091. + unsigned i, shifted;
  56092. + coord_t coord;
  56093. + item_plugin *iplug;
  56094. +
  56095. + assert("vs-275", !node_is_empty(shift->target));
  56096. +
  56097. + /* number of items shift touches */
  56098. + shifted =
  56099. + shift->entire + (shift->merging_units ? 1 : 0) +
  56100. + (shift->part_units ? 1 : 0);
  56101. +
  56102. + if (shift->pend == SHIFT_LEFT) {
  56103. + /* moved items are at the end */
  56104. + coord_init_last_unit(&coord, shift->target);
  56105. + coord.unit_pos = 0;
  56106. +
  56107. + assert("vs-279", shift->pend == 1);
  56108. + for (i = 0; i < shifted; i++) {
  56109. + unsigned from, count;
  56110. +
  56111. + iplug = item_plugin_by_coord(&coord);
  56112. + if (i == 0 && shift->part_units) {
  56113. + assert("vs-277",
  56114. + coord_num_units(&coord) ==
  56115. + shift->part_units);
  56116. + count = shift->part_units;
  56117. + from = 0;
  56118. + } else if (i == shifted - 1 && shift->merging_units) {
  56119. + count = shift->merging_units;
  56120. + from = coord_num_units(&coord) - count;
  56121. + } else {
  56122. + count = coord_num_units(&coord);
  56123. + from = 0;
  56124. + }
  56125. +
  56126. + if (iplug->b.shift_hook) {
  56127. + iplug->b.shift_hook(&coord, from, count,
  56128. + shift->wish_stop.node);
  56129. + }
  56130. + coord_add_item_pos(&coord, -shift->pend);
  56131. + }
  56132. + } else {
  56133. + /* moved items are at the beginning */
  56134. + coord_init_first_unit(&coord, shift->target);
  56135. +
  56136. + assert("vs-278", shift->pend == -1);
  56137. + for (i = 0; i < shifted; i++) {
  56138. + unsigned from, count;
  56139. +
  56140. + iplug = item_plugin_by_coord(&coord);
  56141. + if (i == 0 && shift->part_units) {
  56142. + assert("vs-277",
  56143. + coord_num_units(&coord) ==
  56144. + shift->part_units);
  56145. + count = coord_num_units(&coord);
  56146. + from = 0;
  56147. + } else if (i == shifted - 1 && shift->merging_units) {
  56148. + count = shift->merging_units;
  56149. + from = 0;
  56150. + } else {
  56151. + count = coord_num_units(&coord);
  56152. + from = 0;
  56153. + }
  56154. +
  56155. + if (iplug->b.shift_hook) {
  56156. + iplug->b.shift_hook(&coord, from, count,
  56157. + shift->wish_stop.node);
  56158. + }
  56159. + coord_add_item_pos(&coord, -shift->pend);
  56160. + }
  56161. + }
  56162. +
  56163. + return 0;
  56164. +}
  56165. +
  56166. +/* shift to left is completed. Return 1 if unit @old was moved to left neighbor */
  56167. +static int
  56168. +unit_moved_left(const struct shift_params *shift, const coord_t * old)
  56169. +{
  56170. + assert("vs-944", shift->real_stop.node == old->node);
  56171. +
  56172. + if (shift->real_stop.item_pos < old->item_pos)
  56173. + return 0;
  56174. + if (shift->real_stop.item_pos == old->item_pos) {
  56175. + if (shift->real_stop.unit_pos < old->unit_pos)
  56176. + return 0;
  56177. + }
  56178. + return 1;
  56179. +}
  56180. +
  56181. +/* shift to right is completed. Return 1 if unit @old was moved to right
  56182. + neighbor */
  56183. +static int
  56184. +unit_moved_right(const struct shift_params *shift, const coord_t * old)
  56185. +{
  56186. + assert("vs-944", shift->real_stop.node == old->node);
  56187. +
  56188. + if (shift->real_stop.item_pos > old->item_pos)
  56189. + return 0;
  56190. + if (shift->real_stop.item_pos == old->item_pos) {
  56191. + if (shift->real_stop.unit_pos > old->unit_pos)
  56192. + return 0;
  56193. + }
  56194. + return 1;
  56195. +}
  56196. +
  56197. +/* coord @old was set in node from which shift was performed. What was shifted
  56198. + is stored in @shift. Update @old correspondingly to performed shift */
  56199. +static coord_t *adjust_coord2(const struct shift_params *shift,
  56200. + const coord_t * old, coord_t * new)
  56201. +{
  56202. + coord_clear_iplug(new);
  56203. + new->between = old->between;
  56204. +
  56205. + coord_clear_iplug(new);
  56206. + if (old->node == shift->target) {
  56207. + if (shift->pend == SHIFT_LEFT) {
  56208. + /* coord which is set inside of left neighbor does not
  56209. + change during shift to left */
  56210. + coord_dup(new, old);
  56211. + return new;
  56212. + }
  56213. + new->node = old->node;
  56214. + coord_set_item_pos(new,
  56215. + old->item_pos + shift->entire +
  56216. + (shift->part_units ? 1 : 0));
  56217. + new->unit_pos = old->unit_pos;
  56218. + if (old->item_pos == 0 && shift->merging_units)
  56219. + new->unit_pos += shift->merging_units;
  56220. + return new;
  56221. + }
  56222. +
  56223. + assert("vs-977", old->node == shift->wish_stop.node);
  56224. + if (shift->pend == SHIFT_LEFT) {
  56225. + if (unit_moved_left(shift, old)) {
  56226. + /* unit @old moved to left neighbor. Calculate its
  56227. + coordinate there */
  56228. + new->node = shift->target;
  56229. + coord_set_item_pos(new,
  56230. + node_num_items(shift->target) -
  56231. + shift->entire -
  56232. + (shift->part_units ? 1 : 0) +
  56233. + old->item_pos);
  56234. +
  56235. + new->unit_pos = old->unit_pos;
  56236. + if (shift->merging_units) {
  56237. + coord_dec_item_pos(new);
  56238. + if (old->item_pos == 0) {
  56239. + /* unit_pos only changes if item got
  56240. + merged */
  56241. + new->unit_pos =
  56242. + coord_num_units(new) -
  56243. + (shift->merging_units -
  56244. + old->unit_pos);
  56245. + }
  56246. + }
  56247. + } else {
  56248. + /* unit @old did not move to left neighbor.
  56249. +
  56250. + Use _nocheck, because @old is outside of its node.
  56251. + */
  56252. + coord_dup_nocheck(new, old);
  56253. + coord_add_item_pos(new,
  56254. + -shift->u.future_first.item_pos);
  56255. + if (new->item_pos == 0)
  56256. + new->unit_pos -= shift->u.future_first.unit_pos;
  56257. + }
  56258. + } else {
  56259. + if (unit_moved_right(shift, old)) {
  56260. + /* unit @old moved to right neighbor */
  56261. + new->node = shift->target;
  56262. + coord_set_item_pos(new,
  56263. + old->item_pos -
  56264. + shift->real_stop.item_pos);
  56265. + if (new->item_pos == 0) {
  56266. + /* unit @old might change unit pos */
  56267. + coord_set_item_pos(new,
  56268. + old->unit_pos -
  56269. + shift->real_stop.unit_pos);
  56270. + }
  56271. + } else {
  56272. + /* unit @old did not move to right neighbor, therefore
  56273. + it did not change */
  56274. + coord_dup(new, old);
  56275. + }
  56276. + }
  56277. + coord_set_iplug(new, item_plugin_by_coord(new));
  56278. + return new;
  56279. +}
  56280. +
  56281. +/* this is called when shift is completed (something of source node is copied
  56282. + to target and deleted in source) to update all taps set in current
  56283. + context */
  56284. +static void update_taps(const struct shift_params *shift)
  56285. +{
  56286. + tap_t *tap;
  56287. + coord_t new;
  56288. +
  56289. + for_all_taps(tap) {
  56290. + /* update only taps set to nodes participating in shift */
  56291. + if (tap->coord->node == shift->wish_stop.node
  56292. + || tap->coord->node == shift->target)
  56293. + tap_to_coord(tap,
  56294. + adjust_coord2(shift, tap->coord, &new));
  56295. + }
  56296. +}
  56297. +
  56298. +#if REISER4_DEBUG
  56299. +
  56300. +struct shift_check {
  56301. + reiser4_key key;
  56302. + __u16 plugin_id;
  56303. + union {
  56304. + __u64 bytes;
  56305. + __u64 entries;
  56306. + void *unused;
  56307. + } u;
  56308. +};
  56309. +
  56310. +void *shift_check_prepare(const znode * left, const znode * right)
  56311. +{
  56312. + pos_in_node_t i, nr_items;
  56313. + int mergeable;
  56314. + struct shift_check *data;
  56315. + item_header40 *ih;
  56316. +
  56317. + if (node_is_empty(left) || node_is_empty(right))
  56318. + mergeable = 0;
  56319. + else {
  56320. + coord_t l, r;
  56321. +
  56322. + coord_init_last_unit(&l, left);
  56323. + coord_init_first_unit(&r, right);
  56324. + mergeable = are_items_mergeable(&l, &r);
  56325. + }
  56326. + nr_items =
  56327. + node40_num_of_items_internal(left) +
  56328. + node40_num_of_items_internal(right) - (mergeable ? 1 : 0);
  56329. + data =
  56330. + kmalloc(sizeof(struct shift_check) * nr_items,
  56331. + reiser4_ctx_gfp_mask_get());
  56332. + if (data != NULL) {
  56333. + coord_t coord;
  56334. + pos_in_node_t item_pos;
  56335. +
  56336. + coord_init_first_unit(&coord, left);
  56337. + i = 0;
  56338. +
  56339. + for (item_pos = 0;
  56340. + item_pos < node40_num_of_items_internal(left);
  56341. + item_pos++) {
  56342. +
  56343. + coord_set_item_pos(&coord, item_pos);
  56344. + ih = node40_ih_at_coord(&coord);
  56345. +
  56346. + data[i].key = ih->key;
  56347. + data[i].plugin_id = le16_to_cpu(get_unaligned(&ih->plugin_id));
  56348. + switch (data[i].plugin_id) {
  56349. + case CTAIL_ID:
  56350. + case FORMATTING_ID:
  56351. + data[i].u.bytes = coord_num_units(&coord);
  56352. + break;
  56353. + case EXTENT_POINTER_ID:
  56354. + data[i].u.bytes =
  56355. + reiser4_extent_size(&coord,
  56356. + coord_num_units(&coord));
  56357. + break;
  56358. + case COMPOUND_DIR_ID:
  56359. + data[i].u.entries = coord_num_units(&coord);
  56360. + break;
  56361. + default:
  56362. + data[i].u.unused = NULL;
  56363. + break;
  56364. + }
  56365. + i++;
  56366. + }
  56367. +
  56368. + coord_init_first_unit(&coord, right);
  56369. +
  56370. + if (mergeable) {
  56371. + assert("vs-1609", i != 0);
  56372. +
  56373. + ih = node40_ih_at_coord(&coord);
  56374. +
  56375. + assert("vs-1589",
  56376. + data[i - 1].plugin_id ==
  56377. + le16_to_cpu(get_unaligned(&ih->plugin_id)));
  56378. + switch (data[i - 1].plugin_id) {
  56379. + case CTAIL_ID:
  56380. + case FORMATTING_ID:
  56381. + data[i - 1].u.bytes += coord_num_units(&coord);
  56382. + break;
  56383. + case EXTENT_POINTER_ID:
  56384. + data[i - 1].u.bytes +=
  56385. + reiser4_extent_size(&coord,
  56386. + coord_num_units(&coord));
  56387. + break;
  56388. + case COMPOUND_DIR_ID:
  56389. + data[i - 1].u.entries +=
  56390. + coord_num_units(&coord);
  56391. + break;
  56392. + default:
  56393. + impossible("vs-1605", "wrong mergeable item");
  56394. + break;
  56395. + }
  56396. + item_pos = 1;
  56397. + } else
  56398. + item_pos = 0;
  56399. + for (; item_pos < node40_num_of_items_internal(right);
  56400. + item_pos++) {
  56401. +
  56402. + assert("vs-1604", i < nr_items);
  56403. + coord_set_item_pos(&coord, item_pos);
  56404. + ih = node40_ih_at_coord(&coord);
  56405. +
  56406. + data[i].key = ih->key;
  56407. + data[i].plugin_id = le16_to_cpu(get_unaligned(&ih->plugin_id));
  56408. + switch (data[i].plugin_id) {
  56409. + case CTAIL_ID:
  56410. + case FORMATTING_ID:
  56411. + data[i].u.bytes = coord_num_units(&coord);
  56412. + break;
  56413. + case EXTENT_POINTER_ID:
  56414. + data[i].u.bytes =
  56415. + reiser4_extent_size(&coord,
  56416. + coord_num_units(&coord));
  56417. + break;
  56418. + case COMPOUND_DIR_ID:
  56419. + data[i].u.entries = coord_num_units(&coord);
  56420. + break;
  56421. + default:
  56422. + data[i].u.unused = NULL;
  56423. + break;
  56424. + }
  56425. + i++;
  56426. + }
  56427. + assert("vs-1606", i == nr_items);
  56428. + }
  56429. + return data;
  56430. +}
  56431. +
  56432. +void shift_check(void *vp, const znode * left, const znode * right)
  56433. +{
  56434. + pos_in_node_t i, nr_items;
  56435. + coord_t coord;
  56436. + __u64 last_bytes;
  56437. + int mergeable;
  56438. + item_header40 *ih;
  56439. + pos_in_node_t item_pos;
  56440. + struct shift_check *data;
  56441. +
  56442. + data = (struct shift_check *)vp;
  56443. +
  56444. + if (data == NULL)
  56445. + return;
  56446. +
  56447. + if (node_is_empty(left) || node_is_empty(right))
  56448. + mergeable = 0;
  56449. + else {
  56450. + coord_t l, r;
  56451. +
  56452. + coord_init_last_unit(&l, left);
  56453. + coord_init_first_unit(&r, right);
  56454. + mergeable = are_items_mergeable(&l, &r);
  56455. + }
  56456. +
  56457. + nr_items =
  56458. + node40_num_of_items_internal(left) +
  56459. + node40_num_of_items_internal(right) - (mergeable ? 1 : 0);
  56460. +
  56461. + i = 0;
  56462. + last_bytes = 0;
  56463. +
  56464. + coord_init_first_unit(&coord, left);
  56465. +
  56466. + for (item_pos = 0; item_pos < node40_num_of_items_internal(left);
  56467. + item_pos++) {
  56468. +
  56469. + coord_set_item_pos(&coord, item_pos);
  56470. + ih = node40_ih_at_coord(&coord);
  56471. +
  56472. + assert("vs-1611", i == item_pos);
  56473. + assert("vs-1590", keyeq(&ih->key, &data[i].key));
  56474. + assert("vs-1591",
  56475. + le16_to_cpu(get_unaligned(&ih->plugin_id)) == data[i].plugin_id);
  56476. + if ((i < (node40_num_of_items_internal(left) - 1))
  56477. + || !mergeable) {
  56478. + switch (data[i].plugin_id) {
  56479. + case CTAIL_ID:
  56480. + case FORMATTING_ID:
  56481. + assert("vs-1592",
  56482. + data[i].u.bytes ==
  56483. + coord_num_units(&coord));
  56484. + break;
  56485. + case EXTENT_POINTER_ID:
  56486. + assert("vs-1593",
  56487. + data[i].u.bytes ==
  56488. + reiser4_extent_size(&coord,
  56489. + coord_num_units
  56490. + (&coord)));
  56491. + break;
  56492. + case COMPOUND_DIR_ID:
  56493. + assert("vs-1594",
  56494. + data[i].u.entries ==
  56495. + coord_num_units(&coord));
  56496. + break;
  56497. + default:
  56498. + break;
  56499. + }
  56500. + }
  56501. + if (item_pos == (node40_num_of_items_internal(left) - 1)
  56502. + && mergeable) {
  56503. + switch (data[i].plugin_id) {
  56504. + case CTAIL_ID:
  56505. + case FORMATTING_ID:
  56506. + last_bytes = coord_num_units(&coord);
  56507. + break;
  56508. + case EXTENT_POINTER_ID:
  56509. + last_bytes =
  56510. + reiser4_extent_size(&coord,
  56511. + coord_num_units(&coord));
  56512. + break;
  56513. + case COMPOUND_DIR_ID:
  56514. + last_bytes = coord_num_units(&coord);
  56515. + break;
  56516. + default:
  56517. + impossible("vs-1595", "wrong mergeable item");
  56518. + break;
  56519. + }
  56520. + }
  56521. + i++;
  56522. + }
  56523. +
  56524. + coord_init_first_unit(&coord, right);
  56525. + if (mergeable) {
  56526. + ih = node40_ih_at_coord(&coord);
  56527. +
  56528. + assert("vs-1589",
  56529. + data[i - 1].plugin_id == le16_to_cpu(get_unaligned(&ih->plugin_id)));
  56530. + assert("vs-1608", last_bytes != 0);
  56531. + switch (data[i - 1].plugin_id) {
  56532. + case CTAIL_ID:
  56533. + case FORMATTING_ID:
  56534. + assert("vs-1596",
  56535. + data[i - 1].u.bytes ==
  56536. + last_bytes + coord_num_units(&coord));
  56537. + break;
  56538. +
  56539. + case EXTENT_POINTER_ID:
  56540. + assert("vs-1597",
  56541. + data[i - 1].u.bytes ==
  56542. + last_bytes + reiser4_extent_size(&coord,
  56543. + coord_num_units
  56544. + (&coord)));
  56545. + break;
  56546. +
  56547. + case COMPOUND_DIR_ID:
  56548. + assert("vs-1598",
  56549. + data[i - 1].u.bytes ==
  56550. + last_bytes + coord_num_units(&coord));
  56551. + break;
  56552. + default:
  56553. + impossible("vs-1599", "wrong mergeable item");
  56554. + break;
  56555. + }
  56556. + item_pos = 1;
  56557. + } else
  56558. + item_pos = 0;
  56559. +
  56560. + for (; item_pos < node40_num_of_items_internal(right); item_pos++) {
  56561. +
  56562. + coord_set_item_pos(&coord, item_pos);
  56563. + ih = node40_ih_at_coord(&coord);
  56564. +
  56565. + assert("vs-1612", keyeq(&ih->key, &data[i].key));
  56566. + assert("vs-1613",
  56567. + le16_to_cpu(get_unaligned(&ih->plugin_id)) == data[i].plugin_id);
  56568. + switch (data[i].plugin_id) {
  56569. + case CTAIL_ID:
  56570. + case FORMATTING_ID:
  56571. + assert("vs-1600",
  56572. + data[i].u.bytes == coord_num_units(&coord));
  56573. + break;
  56574. + case EXTENT_POINTER_ID:
  56575. + assert("vs-1601",
  56576. + data[i].u.bytes ==
  56577. + reiser4_extent_size(&coord,
  56578. + coord_num_units
  56579. + (&coord)));
  56580. + break;
  56581. + case COMPOUND_DIR_ID:
  56582. + assert("vs-1602",
  56583. + data[i].u.entries == coord_num_units(&coord));
  56584. + break;
  56585. + default:
  56586. + break;
  56587. + }
  56588. + i++;
  56589. + }
  56590. +
  56591. + assert("vs-1603", i == nr_items);
  56592. + kfree(data);
  56593. +}
  56594. +
  56595. +#endif
  56596. +
  56597. +/*
  56598. + * common part of ->shift() for all nodes,
  56599. + * which contain node40_header at the beginning and
  56600. + * the table of item headers at the end
  56601. + */
  56602. +int shift_node40_common(coord_t *from, znode *to,
  56603. + shift_direction pend,
  56604. + int delete_child, /* if @from->node becomes empty,
  56605. + * it will be deleted from the
  56606. + * tree if this is set to 1 */
  56607. + int including_stop_coord,
  56608. + carry_plugin_info *info,
  56609. + size_t node_header_size)
  56610. +{
  56611. + struct shift_params shift;
  56612. + int result;
  56613. + znode *left, *right;
  56614. + znode *source;
  56615. + int target_empty;
  56616. +
  56617. + assert("nikita-2161", coord_check(from));
  56618. +
  56619. + memset(&shift, 0, sizeof(shift));
  56620. + shift.pend = pend;
  56621. + shift.wish_stop = *from;
  56622. + shift.target = to;
  56623. +
  56624. + assert("nikita-1473", znode_is_write_locked(from->node));
  56625. + assert("nikita-1474", znode_is_write_locked(to));
  56626. +
  56627. + source = from->node;
  56628. +
  56629. + /* set @shift.wish_stop to rightmost/leftmost unit among units we want
  56630. + shifted */
  56631. + if (pend == SHIFT_LEFT) {
  56632. + result = coord_set_to_left(&shift.wish_stop);
  56633. + left = to;
  56634. + right = from->node;
  56635. + } else {
  56636. + result = coord_set_to_right(&shift.wish_stop);
  56637. + left = from->node;
  56638. + right = to;
  56639. + }
  56640. +
  56641. + if (result) {
  56642. + /* move insertion coord even if there is nothing to move */
  56643. + if (including_stop_coord) {
  56644. + /* move insertion coord (@from) */
  56645. + if (pend == SHIFT_LEFT) {
  56646. + /* after last item in target node */
  56647. + coord_init_after_last_item(from, to);
  56648. + } else {
  56649. + /* before first item in target node */
  56650. + coord_init_before_first_item(from, to);
  56651. + }
  56652. + }
  56653. +
  56654. + if (delete_child && node_is_empty(shift.wish_stop.node))
  56655. + result =
  56656. + prepare_removal_node40(shift.wish_stop.node, info);
  56657. + else
  56658. + result = 0;
  56659. + /* there is nothing to shift */
  56660. + assert("nikita-2078", coord_check(from));
  56661. + return result;
  56662. + }
  56663. +
  56664. + target_empty = node_is_empty(to);
  56665. +
  56666. + /* when first node plugin with item body compression is implemented,
  56667. + this must be changed to call node specific plugin */
  56668. +
  56669. + /* shift->stop_coord is updated to last unit which really will be
  56670. + shifted */
  56671. + estimate_shift(&shift, get_current_context());
  56672. + if (!shift.shift_bytes) {
  56673. + /* we could not shift anything */
  56674. + assert("nikita-2079", coord_check(from));
  56675. + return 0;
  56676. + }
  56677. +
  56678. + copy(&shift, node_header_size);
  56679. +
  56680. + /* result value of this is important. It is used by adjust_coord below */
  56681. + result = delete_copied(&shift);
  56682. +
  56683. + assert("vs-1610", result >= 0);
  56684. + assert("vs-1471",
  56685. + ((reiser4_context *) current->journal_info)->magic ==
  56686. + context_magic);
  56687. +
  56688. + /* item which has been moved from one node to another might want to do
  56689. + something on that event. This can be done by item's shift_hook
  56690. + method, which will be now called for every moved items */
  56691. + call_shift_hooks(&shift);
  56692. +
  56693. + assert("vs-1472",
  56694. + ((reiser4_context *) current->journal_info)->magic ==
  56695. + context_magic);
  56696. +
  56697. + update_taps(&shift);
  56698. +
  56699. + assert("vs-1473",
  56700. + ((reiser4_context *) current->journal_info)->magic ==
  56701. + context_magic);
  56702. +
  56703. + /* adjust @from pointer in accordance with @including_stop_coord flag
  56704. + and amount of data which was really shifted */
  56705. + adjust_coord(from, &shift, result, including_stop_coord);
  56706. +
  56707. + if (target_empty)
  56708. + /*
  56709. + * items were shifted into empty node. Update delimiting key.
  56710. + */
  56711. + result = prepare_for_update(NULL, left, info);
  56712. +
  56713. + /* add update operation to @info, which is the list of operations to
  56714. + be performed on a higher level */
  56715. + result = prepare_for_update(left, right, info);
  56716. + if (!result && node_is_empty(source) && delete_child) {
  56717. + /* all contents of @from->node is moved to @to and @from->node
  56718. + has to be removed from the tree, so, on higher level we
  56719. + will be removing the pointer to node @from->node */
  56720. + result = prepare_removal_node40(source, info);
  56721. + }
  56722. + assert("nikita-2080", coord_check(from));
  56723. + return result ? result : (int)shift.shift_bytes;
  56724. +}
  56725. +
  56726. +/*
  56727. + * plugin->u.node.shift
  56728. + * look for description of this method in plugin/node/node.h
  56729. + */
  56730. +int shift_node40(coord_t *from, znode *to,
  56731. + shift_direction pend,
  56732. + int delete_child, /* if @from->node becomes empty,
  56733. + * it will be deleted from the
  56734. + * tree if this is set to 1 */
  56735. + int including_stop_coord,
  56736. + carry_plugin_info *info)
  56737. +{
  56738. + return shift_node40_common(from, to, pend, delete_child,
  56739. + including_stop_coord, info,
  56740. + sizeof(node40_header));
  56741. +}
  56742. +
  56743. +/* plugin->u.node.fast_insert()
  56744. + look for description of this method in plugin/node/node.h */
  56745. +int fast_insert_node40(const coord_t * coord UNUSED_ARG /* node to query */ )
  56746. +{
  56747. + return 1;
  56748. +}
  56749. +
  56750. +/* plugin->u.node.fast_paste()
  56751. + look for description of this method in plugin/node/node.h */
  56752. +int fast_paste_node40(const coord_t * coord UNUSED_ARG /* node to query */ )
  56753. +{
  56754. + return 1;
  56755. +}
  56756. +
  56757. +/* plugin->u.node.fast_cut()
  56758. + look for description of this method in plugin/node/node.h */
  56759. +int fast_cut_node40(const coord_t * coord UNUSED_ARG /* node to query */ )
  56760. +{
  56761. + return 1;
  56762. +}
  56763. +
  56764. +/* plugin->u.node.modify - not defined */
  56765. +
  56766. +/* plugin->u.node.max_item_size */
  56767. +int max_item_size_node40(void)
  56768. +{
  56769. + return reiser4_get_current_sb()->s_blocksize - sizeof(node40_header) -
  56770. + sizeof(item_header40);
  56771. +}
  56772. +
  56773. +/* plugin->u.node.set_item_plugin */
  56774. +int set_item_plugin_node40(coord_t *coord, item_id id)
  56775. +{
  56776. + item_header40 *ih;
  56777. +
  56778. + ih = node40_ih_at_coord(coord);
  56779. + put_unaligned(cpu_to_le16(id), &ih->plugin_id);
  56780. + coord->iplugid = id;
  56781. + return 0;
  56782. +}
  56783. +
  56784. +/*
  56785. + Local variables:
  56786. + c-indentation-style: "K&R"
  56787. + mode-name: "LC"
  56788. + c-basic-offset: 8
  56789. + tab-width: 8
  56790. + fill-column: 120
  56791. + scroll-step: 1
  56792. + End:
  56793. +*/
  56794. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/node/node40.h linux-4.14.2/fs/reiser4/plugin/node/node40.h
  56795. --- linux-4.14.2.orig/fs/reiser4/plugin/node/node40.h 1970-01-01 01:00:00.000000000 +0100
  56796. +++ linux-4.14.2/fs/reiser4/plugin/node/node40.h 2017-11-26 22:13:09.000000000 +0100
  56797. @@ -0,0 +1,130 @@
  56798. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  56799. +
  56800. +#if !defined( __REISER4_NODE40_H__ )
  56801. +#define __REISER4_NODE40_H__
  56802. +
  56803. +#include "../../forward.h"
  56804. +#include "../../dformat.h"
  56805. +#include "node.h"
  56806. +
  56807. +#include <linux/types.h>
  56808. +
  56809. +/* format of node header for 40 node layouts. Keep bloat out of this struct. */
  56810. +typedef struct node40_header {
  56811. + /* identifier of node plugin. Must be located at the very beginning
  56812. + of a node. */
  56813. + common_node_header common_header; /* this is 16 bits */
  56814. + /* number of items. Should be first element in the node header,
  56815. + because we haven't yet finally decided whether it shouldn't go into
  56816. + common_header.
  56817. + */
  56818. +/* NIKITA-FIXME-HANS: Create a macro such that if there is only one
  56819. + * node format at compile time, and it is this one, accesses do not function dereference when
  56820. + * accessing these fields (and otherwise they do). Probably 80% of users will only have one node format at a time throughout the life of reiser4. */
  56821. + d16 nr_items;
  56822. + /* free space in node measured in bytes */
  56823. + d16 free_space;
  56824. + /* offset to start of free space in node */
  56825. + d16 free_space_start;
  56826. + /* for reiser4_fsck. When information about what is a free
  56827. + block is corrupted, and we try to recover everything even
  56828. + if marked as freed, then old versions of data may
  56829. + duplicate newer versions, and this field allows us to
  56830. + restore the newer version. Also useful for when users
  56831. + who don't have the new trashcan installed on their linux distro
  56832. + delete the wrong files and send us desperate emails
  56833. + offering $25 for them back. */
  56834. +
  56835. + /* magic field we need to tell formatted nodes NIKITA-FIXME-HANS: improve this comment */
  56836. + d32 magic;
  56837. + /* flushstamp is made of mk_id and write_counter. mk_id is an
  56838. + id generated randomly at mkreiserfs time. So we can just
  56839. + skip all nodes with different mk_id. write_counter is d64
  56840. + incrementing counter of writes on disk. It is used for
  56841. + choosing the newest data at fsck time. NIKITA-FIXME-HANS: why was field name changed but not comment? */
  56842. +
  56843. + d32 mkfs_id;
  56844. + d64 flush_id;
  56845. + /* node flags to be used by fsck (reiser4ck or reiser4fsck?)
  56846. + and repacker NIKITA-FIXME-HANS: say more or reference elsewhere that says more */
  56847. + d16 flags;
  56848. +
  56849. + /* 1 is leaf level, 2 is twig level, root is the numerically
  56850. + largest level */
  56851. + d8 level;
  56852. +
  56853. + d8 pad;
  56854. +} PACKED node40_header;
  56855. +
  56856. +/* item headers are not standard across all node layouts, pass
  56857. + pos_in_node to functions instead */
  56858. +typedef struct item_header40 {
  56859. + /* key of item */
  56860. + /* 0 */ reiser4_key key;
  56861. + /* offset from start of a node measured in 8-byte chunks */
  56862. + /* 24 */ d16 offset;
  56863. + /* 26 */ d16 flags;
  56864. + /* 28 */ d16 plugin_id;
  56865. +} PACKED item_header40;
  56866. +
  56867. +size_t item_overhead_node40(const znode * node, flow_t * aflow);
  56868. +size_t free_space_node40(znode * node);
  56869. +node_search_result lookup_node40(znode * node, const reiser4_key * key,
  56870. + lookup_bias bias, coord_t * coord);
  56871. +int num_of_items_node40(const znode * node);
  56872. +char *item_by_coord_node40(const coord_t * coord);
  56873. +int length_by_coord_node40(const coord_t * coord);
  56874. +item_plugin *plugin_by_coord_node40(const coord_t * coord);
  56875. +reiser4_key *key_at_node40(const coord_t * coord, reiser4_key * key);
  56876. +size_t estimate_node40(znode * node);
  56877. +int check_node40(const znode * node, __u32 flags, const char **error);
  56878. +int parse_node40_common(znode *node, const __u32 magic);
  56879. +int parse_node40(znode * node);
  56880. +int init_node40_common(znode *node, node_plugin *nplug,
  56881. + size_t node_header_size, const __u32 magic);
  56882. +int init_node40(znode *node);
  56883. +
  56884. +#ifdef GUESS_EXISTS
  56885. +int guess_node40_common(const znode *node, reiser4_node_id id,
  56886. + const __u32 magic);
  56887. +int guess_node40(const znode *node);
  56888. +#endif
  56889. +
  56890. +void change_item_size_node40(coord_t * coord, int by);
  56891. +int create_item_node40(coord_t * target, const reiser4_key * key,
  56892. + reiser4_item_data * data, carry_plugin_info * info);
  56893. +void update_item_key_node40(coord_t * target, const reiser4_key * key,
  56894. + carry_plugin_info * info);
  56895. +int kill_node40(struct carry_kill_data *, carry_plugin_info *);
  56896. +int cut_node40(struct carry_cut_data *, carry_plugin_info *);
  56897. +int shift_node40_common(coord_t *from, znode *to, shift_direction pend,
  56898. + int delete_child, int including_stop_coord,
  56899. + carry_plugin_info *info, size_t nh_size);
  56900. +int shift_node40(coord_t *from, znode *to, shift_direction pend,
  56901. + int delete_child, int including_stop_coord,
  56902. + carry_plugin_info *info);
  56903. +int fast_insert_node40(const coord_t * coord);
  56904. +int fast_paste_node40(const coord_t * coord);
  56905. +int fast_cut_node40(const coord_t * coord);
  56906. +int max_item_size_node40(void);
  56907. +int prepare_removal_node40(znode * empty, carry_plugin_info * info);
  56908. +int set_item_plugin_node40(coord_t * coord, item_id id);
  56909. +int shrink_item_node40(coord_t * coord, int delta);
  56910. +
  56911. +#if REISER4_DEBUG
  56912. +void *shift_check_prepare(const znode *left, const znode *right);
  56913. +void shift_check(void *vp, const znode *left, const znode *right);
  56914. +#endif
  56915. +
  56916. +/* __REISER4_NODE40_H__ */
  56917. +#endif
  56918. +/*
  56919. + Local variables:
  56920. + c-indentation-style: "K&R"
  56921. + mode-name: "LC"
  56922. + c-basic-offset: 8
  56923. + tab-width: 8
  56924. + fill-column: 120
  56925. + scroll-step: 1
  56926. + End:
  56927. +*/
  56928. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/node/node41.c linux-4.14.2/fs/reiser4/plugin/node/node41.c
  56929. --- linux-4.14.2.orig/fs/reiser4/plugin/node/node41.c 1970-01-01 01:00:00.000000000 +0100
  56930. +++ linux-4.14.2/fs/reiser4/plugin/node/node41.c 2017-11-26 22:13:09.000000000 +0100
  56931. @@ -0,0 +1,137 @@
  56932. +/*
  56933. + * Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README
  56934. + */
  56935. +
  56936. +#include "../../debug.h"
  56937. +#include "../../key.h"
  56938. +#include "../../coord.h"
  56939. +#include "../plugin_header.h"
  56940. +#include "../item/item.h"
  56941. +#include "node.h"
  56942. +#include "node41.h"
  56943. +#include "../plugin.h"
  56944. +#include "../../jnode.h"
  56945. +#include "../../znode.h"
  56946. +#include "../../pool.h"
  56947. +#include "../../carry.h"
  56948. +#include "../../tap.h"
  56949. +#include "../../tree.h"
  56950. +#include "../../super.h"
  56951. +#include "../../checksum.h"
  56952. +#include "../../reiser4.h"
  56953. +
  56954. +#include <asm/uaccess.h>
  56955. +#include <linux/types.h>
  56956. +#include <linux/prefetch.h>
  56957. +
  56958. +/*
  56959. + * node41 layout it almost the same as node40:
  56960. + * node41_header is at the beginning and a table of item headers
  56961. + * is at the end. Ther difference is that node41_header contains
  56962. + * a 32-bit checksum (see node41.h)
  56963. + */
  56964. +
  56965. +static const __u32 REISER4_NODE41_MAGIC = 0x19051966;
  56966. +
  56967. +static inline node41_header *node41_node_header(const znode *node)
  56968. +{
  56969. + assert("edward-1634", node != NULL);
  56970. + assert("edward-1635", znode_page(node) != NULL);
  56971. + assert("edward-1636", zdata(node) != NULL);
  56972. +
  56973. + return (node41_header *)zdata(node);
  56974. +}
  56975. +
  56976. +int csum_node41(znode *node, int check)
  56977. +{
  56978. + __u32 cpu_csum;
  56979. +
  56980. + cpu_csum = reiser4_crc32c(get_current_super_private()->csum_tfm,
  56981. + ~0,
  56982. + zdata(node),
  56983. + sizeof(struct node40_header));
  56984. + cpu_csum = reiser4_crc32c(get_current_super_private()->csum_tfm,
  56985. + cpu_csum,
  56986. + zdata(node) + sizeof(struct node41_header),
  56987. + reiser4_get_current_sb()->s_blocksize -
  56988. + sizeof(node41_header));
  56989. + if (check)
  56990. + return cpu_csum == nh41_get_csum(node41_node_header(node));
  56991. + else {
  56992. + nh41_set_csum(node41_node_header(node), cpu_csum);
  56993. + return 1;
  56994. + }
  56995. +}
  56996. +
  56997. +/*
  56998. + * plugin->u.node.parse
  56999. + * look for description of this method in plugin/node/node.h
  57000. + */
  57001. +int parse_node41(znode *node /* node to parse */)
  57002. +{
  57003. + int ret;
  57004. +
  57005. + ret = csum_node41(node, 1/* check */);
  57006. + if (!ret) {
  57007. + warning("edward-1645",
  57008. + "block %llu: bad checksum. FSCK?",
  57009. + *jnode_get_block(ZJNODE(node)));
  57010. + reiser4_handle_error();
  57011. + return RETERR(-EIO);
  57012. + }
  57013. + return parse_node40_common(node, REISER4_NODE41_MAGIC);
  57014. +}
  57015. +
  57016. +/*
  57017. + * plugin->u.node.init
  57018. + * look for description of this method in plugin/node/node.h
  57019. + */
  57020. +int init_node41(znode *node /* node to initialise */)
  57021. +{
  57022. + return init_node40_common(node, node_plugin_by_id(NODE41_ID),
  57023. + sizeof(node41_header), REISER4_NODE41_MAGIC);
  57024. +}
  57025. +
  57026. +/*
  57027. + * plugin->u.node.shift
  57028. + * look for description of this method in plugin/node/node.h
  57029. + */
  57030. +int shift_node41(coord_t *from, znode *to,
  57031. + shift_direction pend,
  57032. + int delete_child, /* if @from->node becomes empty,
  57033. + * it will be deleted from the
  57034. + * tree if this is set to 1 */
  57035. + int including_stop_coord,
  57036. + carry_plugin_info *info)
  57037. +{
  57038. + return shift_node40_common(from, to, pend, delete_child,
  57039. + including_stop_coord, info,
  57040. + sizeof(node41_header));
  57041. +}
  57042. +
  57043. +#ifdef GUESS_EXISTS
  57044. +int guess_node41(const znode *node /* node to guess plugin of */)
  57045. +{
  57046. + return guess_node40_common(node, NODE41_ID, REISER4_NODE41_MAGIC);
  57047. +}
  57048. +#endif
  57049. +
  57050. +/*
  57051. + * plugin->u.node.max_item_size
  57052. + */
  57053. +int max_item_size_node41(void)
  57054. +{
  57055. + return reiser4_get_current_sb()->s_blocksize - sizeof(node41_header) -
  57056. + sizeof(item_header40);
  57057. +}
  57058. +
  57059. +/*
  57060. + Local variables:
  57061. + c-indentation-style: "K&R"
  57062. + mode-name: "LC"
  57063. + c-basic-offset: 8
  57064. + tab-width: 8
  57065. + fill-column: 80
  57066. + scroll-step: 1
  57067. + End:
  57068. +*/
  57069. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/node/node41.h linux-4.14.2/fs/reiser4/plugin/node/node41.h
  57070. --- linux-4.14.2.orig/fs/reiser4/plugin/node/node41.h 1970-01-01 01:00:00.000000000 +0100
  57071. +++ linux-4.14.2/fs/reiser4/plugin/node/node41.h 2017-11-26 22:13:09.000000000 +0100
  57072. @@ -0,0 +1,50 @@
  57073. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  57074. +
  57075. +#if !defined( __REISER4_NODE41_H__ )
  57076. +#define __REISER4_NODE41_H__
  57077. +
  57078. +#include "../../forward.h"
  57079. +#include "../../dformat.h"
  57080. +#include "node40.h"
  57081. +#include <linux/types.h>
  57082. +
  57083. +/*
  57084. + * node41 layout: the same as node40, but with 32-bit checksum
  57085. + */
  57086. +
  57087. +typedef struct node41_header {
  57088. + node40_header head;
  57089. + d32 csum;
  57090. +} PACKED node41_header;
  57091. +
  57092. +/*
  57093. + * functions to get/set fields of node41_header
  57094. + */
  57095. +#define nh41_get_csum(nh) le32_to_cpu(get_unaligned(&(nh)->csum))
  57096. +#define nh41_set_csum(nh, value) put_unaligned(cpu_to_le32(value), &(nh)->csum)
  57097. +
  57098. +int init_node41(znode * node);
  57099. +int parse_node41(znode *node);
  57100. +int max_item_size_node41(void);
  57101. +int shift_node41(coord_t *from, znode *to, shift_direction pend,
  57102. + int delete_child, int including_stop_coord,
  57103. + carry_plugin_info *info);
  57104. +int csum_node41(znode *node, int check);
  57105. +
  57106. +#ifdef GUESS_EXISTS
  57107. +int guess_node41(const znode * node);
  57108. +#endif
  57109. +extern void reiser4_handle_error(void);
  57110. +
  57111. +/* __REISER4_NODE41_H__ */
  57112. +#endif
  57113. +/*
  57114. + Local variables:
  57115. + c-indentation-style: "K&R"
  57116. + mode-name: "LC"
  57117. + c-basic-offset: 8
  57118. + tab-width: 8
  57119. + fill-column: 80
  57120. + scroll-step: 1
  57121. + End:
  57122. +*/
  57123. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/node/node.c linux-4.14.2/fs/reiser4/plugin/node/node.c
  57124. --- linux-4.14.2.orig/fs/reiser4/plugin/node/node.c 1970-01-01 01:00:00.000000000 +0100
  57125. +++ linux-4.14.2/fs/reiser4/plugin/node/node.c 2017-11-26 22:13:09.000000000 +0100
  57126. @@ -0,0 +1,170 @@
  57127. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  57128. +
  57129. +/* Node plugin interface.
  57130. +
  57131. + Description: The tree provides the abstraction of flows, which it
  57132. + internally fragments into items which it stores in nodes.
  57133. +
  57134. + A key_atom is a piece of data bound to a single key.
  57135. +
  57136. + For reasonable space efficiency to be achieved it is often
  57137. + necessary to store key_atoms in the nodes in the form of items, where
  57138. + an item is a sequence of key_atoms of the same or similar type. It is
  57139. + more space-efficient, because the item can implement (very)
  57140. + efficient compression of key_atom's bodies using internal knowledge
  57141. + about their semantics, and it can often avoid having a key for each
  57142. + key_atom. Each type of item has specific operations implemented by its
  57143. + item handler (see balance.c).
  57144. +
  57145. + Rationale: the rest of the code (specifically balancing routines)
  57146. + accesses leaf level nodes through this interface. This way we can
  57147. + implement various block layouts and even combine various layouts
  57148. + within the same tree. Balancing/allocating algorithms should not
  57149. + care about peculiarities of splitting/merging specific item types,
  57150. + but rather should leave that to the item's item handler.
  57151. +
  57152. + Items, including those that provide the abstraction of flows, have
  57153. + the property that if you move them in part or in whole to another
  57154. + node, the balancing code invokes their is_left_mergeable()
  57155. + item_operation to determine if they are mergeable with their new
  57156. + neighbor in the node you have moved them to. For some items the
  57157. + is_left_mergeable() function always returns null.
  57158. +
  57159. + When moving the bodies of items from one node to another:
  57160. +
  57161. + if a partial item is shifted to another node the balancing code invokes
  57162. + an item handler method to handle the item splitting.
  57163. +
  57164. + if the balancing code needs to merge with an item in the node it
  57165. + is shifting to, it will invoke an item handler method to handle
  57166. + the item merging.
  57167. +
  57168. + if it needs to move whole item bodies unchanged, the balancing code uses xmemcpy()
  57169. + adjusting the item headers after the move is done using the node handler.
  57170. +*/
  57171. +
  57172. +#include "../../forward.h"
  57173. +#include "../../debug.h"
  57174. +#include "../../key.h"
  57175. +#include "../../coord.h"
  57176. +#include "../plugin_header.h"
  57177. +#include "../item/item.h"
  57178. +#include "node.h"
  57179. +#include "../plugin.h"
  57180. +#include "../../znode.h"
  57181. +#include "../../tree.h"
  57182. +#include "../../super.h"
  57183. +#include "../../reiser4.h"
  57184. +
  57185. +/**
  57186. + * leftmost_key_in_node - get the smallest key in node
  57187. + * @node:
  57188. + * @key: store result here
  57189. + *
  57190. + * Stores the leftmost key of @node in @key.
  57191. + */
  57192. +reiser4_key *leftmost_key_in_node(const znode *node, reiser4_key *key)
  57193. +{
  57194. + assert("nikita-1634", node != NULL);
  57195. + assert("nikita-1635", key != NULL);
  57196. +
  57197. + if (!node_is_empty(node)) {
  57198. + coord_t first_item;
  57199. +
  57200. + coord_init_first_unit(&first_item, (znode *) node);
  57201. + item_key_by_coord(&first_item, key);
  57202. + } else
  57203. + *key = *reiser4_max_key();
  57204. + return key;
  57205. +}
  57206. +
  57207. +node_plugin node_plugins[LAST_NODE_ID] = {
  57208. + [NODE40_ID] = {
  57209. + .h = {
  57210. + .type_id = REISER4_NODE_PLUGIN_TYPE,
  57211. + .id = NODE40_ID,
  57212. + .pops = NULL,
  57213. + .label = "unified",
  57214. + .desc = "unified node layout",
  57215. + .linkage = {NULL, NULL}
  57216. + },
  57217. + .item_overhead = item_overhead_node40,
  57218. + .free_space = free_space_node40,
  57219. + .lookup = lookup_node40,
  57220. + .num_of_items = num_of_items_node40,
  57221. + .item_by_coord = item_by_coord_node40,
  57222. + .length_by_coord = length_by_coord_node40,
  57223. + .plugin_by_coord = plugin_by_coord_node40,
  57224. + .key_at = key_at_node40,
  57225. + .estimate = estimate_node40,
  57226. + .check = check_node40,
  57227. + .parse = parse_node40,
  57228. + .init = init_node40,
  57229. +#ifdef GUESS_EXISTS
  57230. + .guess = guess_node40,
  57231. +#endif
  57232. + .change_item_size = change_item_size_node40,
  57233. + .create_item = create_item_node40,
  57234. + .update_item_key = update_item_key_node40,
  57235. + .cut_and_kill = kill_node40,
  57236. + .cut = cut_node40,
  57237. + .shift = shift_node40,
  57238. + .shrink_item = shrink_item_node40,
  57239. + .fast_insert = fast_insert_node40,
  57240. + .fast_paste = fast_paste_node40,
  57241. + .fast_cut = fast_cut_node40,
  57242. + .max_item_size = max_item_size_node40,
  57243. + .prepare_removal = prepare_removal_node40,
  57244. + .set_item_plugin = set_item_plugin_node40
  57245. + },
  57246. + [NODE41_ID] = {
  57247. + .h = {
  57248. + .type_id = REISER4_NODE_PLUGIN_TYPE,
  57249. + .id = NODE41_ID,
  57250. + .pops = NULL,
  57251. + .label = "node41",
  57252. + .desc = "node41 layout",
  57253. + .linkage = {NULL, NULL}
  57254. + },
  57255. + .item_overhead = item_overhead_node40,
  57256. + .free_space = free_space_node40,
  57257. + .lookup = lookup_node40,
  57258. + .num_of_items = num_of_items_node40,
  57259. + .item_by_coord = item_by_coord_node40,
  57260. + .length_by_coord = length_by_coord_node40,
  57261. + .plugin_by_coord = plugin_by_coord_node40,
  57262. + .key_at = key_at_node40,
  57263. + .estimate = estimate_node40,
  57264. + .check = NULL,
  57265. + .parse = parse_node41,
  57266. + .init = init_node41,
  57267. +#ifdef GUESS_EXISTS
  57268. + .guess = guess_node41,
  57269. +#endif
  57270. + .change_item_size = change_item_size_node40,
  57271. + .create_item = create_item_node40,
  57272. + .update_item_key = update_item_key_node40,
  57273. + .cut_and_kill = kill_node40,
  57274. + .cut = cut_node40,
  57275. + .shift = shift_node41,
  57276. + .shrink_item = shrink_item_node40,
  57277. + .fast_insert = fast_insert_node40,
  57278. + .fast_paste = fast_paste_node40,
  57279. + .fast_cut = fast_cut_node40,
  57280. + .max_item_size = max_item_size_node41,
  57281. + .prepare_removal = prepare_removal_node40,
  57282. + .set_item_plugin = set_item_plugin_node40,
  57283. + .csum = csum_node41
  57284. + }
  57285. +};
  57286. +
  57287. +/*
  57288. + Local variables:
  57289. + c-indentation-style: "K&R"
  57290. + mode-name: "LC"
  57291. + c-basic-offset: 8
  57292. + tab-width: 8
  57293. + fill-column: 120
  57294. + scroll-step: 1
  57295. + End:
  57296. +*/
  57297. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/node/node.h linux-4.14.2/fs/reiser4/plugin/node/node.h
  57298. --- linux-4.14.2.orig/fs/reiser4/plugin/node/node.h 1970-01-01 01:00:00.000000000 +0100
  57299. +++ linux-4.14.2/fs/reiser4/plugin/node/node.h 2017-11-26 22:13:09.000000000 +0100
  57300. @@ -0,0 +1,275 @@
  57301. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  57302. +
  57303. +/* We need a definition of the default node layout here. */
  57304. +
  57305. +/* Generally speaking, it is best to have free space in the middle of the
  57306. + node so that two sets of things can grow towards it, and to have the
  57307. + item bodies on the left so that the last one of them grows into free
  57308. + space. We optimize for the case where we append new items to the end
  57309. + of the node, or grow the last item, because it hurts nothing to so
  57310. + optimize and it is a common special case to do massive insertions in
  57311. + increasing key order (and one of cases more likely to have a real user
  57312. + notice the delay time for).
  57313. +
  57314. + formatted leaf default layout: (leaf1)
  57315. +
  57316. + |node header:item bodies:free space:key + pluginid + item offset|
  57317. +
  57318. + We grow towards the middle, optimizing layout for the case where we
  57319. + append new items to the end of the node. The node header is fixed
  57320. + length. Keys, and item offsets plus pluginids for the items
  57321. + corresponding to them are in increasing key order, and are fixed
  57322. + length. Item offsets are relative to start of node (16 bits creating
  57323. + a node size limit of 64k, 12 bits might be a better choice....). Item
  57324. + bodies are in decreasing key order. Item bodies have a variable size.
  57325. + There is a one to one to one mapping of keys to item offsets to item
  57326. + bodies. Item offsets consist of pointers to the zeroth byte of the
  57327. + item body. Item length equals the start of the next item minus the
  57328. + start of this item, except the zeroth item whose length equals the end
  57329. + of the node minus the start of that item (plus a byte). In other
  57330. + words, the item length is not recorded anywhere, and it does not need
  57331. + to be since it is computable.
  57332. +
  57333. + Leaf variable length items and keys layout : (lvar)
  57334. +
  57335. + |node header:key offset + item offset + pluginid triplets:free space:key bodies:item bodies|
  57336. +
  57337. + We grow towards the middle, optimizing layout for the case where we
  57338. + append new items to the end of the node. The node header is fixed
  57339. + length. Keys and item offsets for the items corresponding to them are
  57340. + in increasing key order, and keys are variable length. Item offsets
  57341. + are relative to start of node (16 bits). Item bodies are in
  57342. + decreasing key order. Item bodies have a variable size. There is a
  57343. + one to one to one mapping of keys to item offsets to item bodies.
  57344. + Item offsets consist of pointers to the zeroth byte of the item body.
  57345. + Item length equals the start of the next item's key minus the start of
  57346. + this item, except the zeroth item whose length equals the end of the
  57347. + node minus the start of that item (plus a byte).
  57348. +
  57349. + leaf compressed keys layout: (lcomp)
  57350. +
  57351. + |node header:key offset + key inherit + item offset pairs:free space:key bodies:item bodies|
  57352. +
  57353. + We grow towards the middle, optimizing layout for the case where we
  57354. + append new items to the end of the node. The node header is fixed
  57355. + length. Keys and item offsets for the items corresponding to them are
  57356. + in increasing key order, and keys are variable length. The "key
  57357. + inherit" field indicates how much of the key prefix is identical to
  57358. + the previous key (stem compression as described in "Managing
  57359. + Gigabytes" is used). key_inherit is a one byte integer. The
  57360. + intra-node searches performed through this layout are linear searches,
  57361. + and this is theorized to not hurt performance much due to the high
  57362. + cost of processor stalls on modern CPUs, and the small number of keys
  57363. + in a single node. Item offsets are relative to start of node (16
  57364. + bits). Item bodies are in decreasing key order. Item bodies have a
  57365. + variable size. There is a one to one to one mapping of keys to item
  57366. + offsets to item bodies. Item offsets consist of pointers to the
  57367. + zeroth byte of the item body. Item length equals the start of the
  57368. + next item minus the start of this item, except the zeroth item whose
  57369. + length equals the end of the node minus the start of that item (plus a
  57370. + byte). In other words, item length and key length is not recorded
  57371. + anywhere, and it does not need to be since it is computable.
  57372. +
  57373. + internal node default layout: (idef1)
  57374. +
  57375. + just like ldef1 except that item bodies are either blocknrs of
  57376. + children or extents, and moving them may require updating parent
  57377. + pointers in the nodes that they point to.
  57378. +*/
  57379. +
  57380. +/* There is an inherent 3-way tradeoff between optimizing and
  57381. + exchanging disks between different architectures and code
  57382. + complexity. This is optimal and simple and inexchangeable.
  57383. + Someone else can do the code for exchanging disks and make it
  57384. + complex. It would not be that hard. Using other than the PAGE_SIZE
  57385. + might be suboptimal.
  57386. +*/
  57387. +
  57388. +#if !defined( __REISER4_NODE_H__ )
  57389. +#define __REISER4_NODE_H__
  57390. +
  57391. +#define LEAF40_NODE_SIZE PAGE_CACHE_SIZE
  57392. +
  57393. +#include "../../dformat.h"
  57394. +#include "../plugin_header.h"
  57395. +
  57396. +#include <linux/types.h>
  57397. +
  57398. +typedef enum {
  57399. + NS_FOUND = 0,
  57400. + NS_NOT_FOUND = -ENOENT
  57401. +} node_search_result;
  57402. +
  57403. +/* Maximal possible space overhead for creation of new item in a node */
  57404. +#define REISER4_NODE_MAX_OVERHEAD ( sizeof( reiser4_key ) + 32 )
  57405. +
  57406. +typedef enum {
  57407. + REISER4_NODE_DKEYS = (1 << 0),
  57408. + REISER4_NODE_TREE_STABLE = (1 << 1)
  57409. +} reiser4_node_check_flag;
  57410. +
  57411. +/* cut and cut_and_kill have too long list of parameters. This structure is just to safe some space on stack */
  57412. +struct cut_list {
  57413. + coord_t *from;
  57414. + coord_t *to;
  57415. + const reiser4_key *from_key;
  57416. + const reiser4_key *to_key;
  57417. + reiser4_key *smallest_removed;
  57418. + carry_plugin_info *info;
  57419. + __u32 flags;
  57420. + struct inode *inode; /* this is to pass list of eflushed jnodes down to extent_kill_hook */
  57421. + lock_handle *left;
  57422. + lock_handle *right;
  57423. +};
  57424. +
  57425. +struct carry_cut_data;
  57426. +struct carry_kill_data;
  57427. +
  57428. +/* The responsibility of the node plugin is to store and give access
  57429. + to the sequence of items within the node. */
  57430. +typedef struct node_plugin {
  57431. + /* generic plugin fields */
  57432. + plugin_header h;
  57433. +
  57434. + /* calculates the amount of space that will be required to store an
  57435. + item which is in addition to the space consumed by the item body.
  57436. + (the space consumed by the item body can be gotten by calling
  57437. + item->estimate) */
  57438. + size_t(*item_overhead) (const znode * node, flow_t * f);
  57439. +
  57440. + /* returns free space by looking into node (i.e., without using
  57441. + znode->free_space). */
  57442. + size_t(*free_space) (znode * node);
  57443. + /* search within the node for the one item which might
  57444. + contain the key, invoking item->search_within to search within
  57445. + that item to see if it is in there */
  57446. + node_search_result(*lookup) (znode * node, const reiser4_key * key,
  57447. + lookup_bias bias, coord_t * coord);
  57448. + /* number of items in node */
  57449. + int (*num_of_items) (const znode * node);
  57450. +
  57451. + /* store information about item in @coord in @data */
  57452. + /* break into several node ops, don't add any more uses of this before doing so */
  57453. + /*int ( *item_at )( const coord_t *coord, reiser4_item_data *data ); */
  57454. + char *(*item_by_coord) (const coord_t * coord);
  57455. + int (*length_by_coord) (const coord_t * coord);
  57456. + item_plugin *(*plugin_by_coord) (const coord_t * coord);
  57457. +
  57458. + /* store item key in @key */
  57459. + reiser4_key *(*key_at) (const coord_t * coord, reiser4_key * key);
  57460. + /* conservatively estimate whether unit of what size can fit
  57461. + into node. This estimation should be performed without
  57462. + actually looking into the node's content (free space is saved in
  57463. + znode). */
  57464. + size_t(*estimate) (znode * node);
  57465. +
  57466. + /* performs every consistency check the node plugin author could
  57467. + imagine. Optional. */
  57468. + int (*check) (const znode * node, __u32 flags, const char **error);
  57469. +
  57470. + /* Called when node is read into memory and node plugin is
  57471. + already detected. This should read some data into znode (like free
  57472. + space counter) and, optionally, check data consistency.
  57473. + */
  57474. + int (*parse) (znode * node);
  57475. + /* This method is called on a new node to initialise plugin specific
  57476. + data (header, etc.) */
  57477. + int (*init) (znode * node);
  57478. + /* Check whether @node content conforms to this plugin format.
  57479. + Probably only useful after support for old V3.x formats is added.
  57480. + Uncomment after 4.0 only.
  57481. + */
  57482. + /* int ( *guess )( const znode *node ); */
  57483. +#if REISER4_DEBUG
  57484. + void (*print) (const char *prefix, const znode * node, __u32 flags);
  57485. +#endif
  57486. + /* change size of @item by @by bytes. @item->node has enough free
  57487. + space. When @by > 0 - free space is appended to end of item. When
  57488. + @by < 0 - item is truncated - it is assumed that last @by bytes if
  57489. + the item are freed already */
  57490. + void (*change_item_size) (coord_t * item, int by);
  57491. +
  57492. + /* create new item @length bytes long in coord @target */
  57493. + int (*create_item) (coord_t * target, const reiser4_key * key,
  57494. + reiser4_item_data * data, carry_plugin_info * info);
  57495. +
  57496. + /* update key of item. */
  57497. + void (*update_item_key) (coord_t * target, const reiser4_key * key,
  57498. + carry_plugin_info * info);
  57499. +
  57500. + int (*cut_and_kill) (struct carry_kill_data *, carry_plugin_info *);
  57501. + int (*cut) (struct carry_cut_data *, carry_plugin_info *);
  57502. +
  57503. + /*
  57504. + * shrink item pointed to by @coord by @delta bytes.
  57505. + */
  57506. + int (*shrink_item) (coord_t * coord, int delta);
  57507. +
  57508. + /* copy as much as possible but not more than up to @stop from
  57509. + @stop->node to @target. If (pend == append) then data from beginning of
  57510. + @stop->node are copied to the end of @target. If (pend == prepend) then
  57511. + data from the end of @stop->node are copied to the beginning of
  57512. + @target. Copied data are removed from @stop->node. Information
  57513. + about what to do on upper level is stored in @todo */
  57514. + int (*shift) (coord_t * stop, znode * target, shift_direction pend,
  57515. + int delete_node, int including_insert_coord,
  57516. + carry_plugin_info * info);
  57517. + /* return true if this node allows skip carry() in some situations
  57518. + (see fs/reiser4/tree.c:insert_by_coord()). Reiser3.x format
  57519. + emulation doesn't.
  57520. +
  57521. + This will speedup insertions that doesn't require updates to the
  57522. + parent, by bypassing initialisation of carry() structures. It's
  57523. + believed that majority of insertions will fit there.
  57524. +
  57525. + */
  57526. + int (*fast_insert) (const coord_t * coord);
  57527. + int (*fast_paste) (const coord_t * coord);
  57528. + int (*fast_cut) (const coord_t * coord);
  57529. + /* this limits max size of item which can be inserted into a node and
  57530. + number of bytes item in a node may be appended with */
  57531. + int (*max_item_size) (void);
  57532. + int (*prepare_removal) (znode * empty, carry_plugin_info * info);
  57533. + /* change plugin id of items which are in a node already. Currently it is Used in tail conversion for regular
  57534. + * files */
  57535. + int (*set_item_plugin) (coord_t * coord, item_id);
  57536. + /* calculate and check/update znode's checksum
  57537. + (if @check is true, then check, otherwise update) */
  57538. + int (*csum)(znode *node, int check);
  57539. +} node_plugin;
  57540. +
  57541. +typedef enum {
  57542. + NODE40_ID, /* standard unified node layout used for both,
  57543. + leaf and internal nodes */
  57544. + NODE41_ID, /* node layout with a checksum */
  57545. + LAST_NODE_ID
  57546. +} reiser4_node_id;
  57547. +
  57548. +extern reiser4_key *leftmost_key_in_node(const znode * node, reiser4_key * key);
  57549. +#if REISER4_DEBUG
  57550. +extern void print_node_content(const char *prefix, const znode * node,
  57551. + __u32 flags);
  57552. +#endif
  57553. +
  57554. +extern void indent_znode(const znode * node);
  57555. +
  57556. +typedef struct common_node_header {
  57557. + /*
  57558. + * identifier of node plugin. Must be located at the very beginning of
  57559. + * a node.
  57560. + */
  57561. + __le16 plugin_id;
  57562. +} common_node_header;
  57563. +
  57564. +/* __REISER4_NODE_H__ */
  57565. +#endif
  57566. +/*
  57567. + * Local variables:
  57568. + * c-indentation-style: "K&R"
  57569. + * mode-name: "LC"
  57570. + * c-basic-offset: 8
  57571. + * tab-width: 8
  57572. + * fill-column: 79
  57573. + * scroll-step: 1
  57574. + * End:
  57575. + */
  57576. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/object.c linux-4.14.2/fs/reiser4/plugin/object.c
  57577. --- linux-4.14.2.orig/fs/reiser4/plugin/object.c 1970-01-01 01:00:00.000000000 +0100
  57578. +++ linux-4.14.2/fs/reiser4/plugin/object.c 2017-11-26 22:13:09.000000000 +0100
  57579. @@ -0,0 +1,553 @@
  57580. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  57581. + * reiser4/README */
  57582. +
  57583. +/*
  57584. + * Examples of object plugins: file, directory, symlink, special file.
  57585. + *
  57586. + * Plugins associated with inode:
  57587. + *
  57588. + * Plugin of inode is plugin referenced by plugin-id field of on-disk
  57589. + * stat-data. How we store this plugin in in-core inode is not
  57590. + * important. Currently pointers are used, another variant is to store offsets
  57591. + * and do array lookup on each access.
  57592. + *
  57593. + * Now, each inode has one selected plugin: object plugin that
  57594. + * determines what type of file this object is: directory, regular etc.
  57595. + *
  57596. + * This main plugin can use other plugins that are thus subordinated to
  57597. + * it. Directory instance of object plugin uses hash; regular file
  57598. + * instance uses tail policy plugin.
  57599. + *
  57600. + * Object plugin is either taken from id in stat-data or guessed from
  57601. + * i_mode bits. Once it is established we ask it to install its
  57602. + * subordinate plugins, by looking again in stat-data or inheriting them
  57603. + * from parent.
  57604. + *
  57605. + * How new inode is initialized during ->read_inode():
  57606. + * 1 read stat-data and initialize inode fields: i_size, i_mode,
  57607. + * i_generation, capabilities etc.
  57608. + * 2 read plugin id from stat data or try to guess plugin id
  57609. + * from inode->i_mode bits if plugin id is missing.
  57610. + * 3 Call ->init_inode() method of stat-data plugin to initialise inode fields.
  57611. + *
  57612. + * NIKITA-FIXME-HANS: can you say a little about 1 being done before 3? What
  57613. + * if stat data does contain i_size, etc., due to it being an unusual plugin?
  57614. + *
  57615. + * 4 Call ->activate() method of object's plugin. Plugin is either read from
  57616. + * from stat-data or guessed from mode bits
  57617. + * 5 Call ->inherit() method of object plugin to inherit as yet un initialized
  57618. + * plugins from parent.
  57619. + *
  57620. + * Easy induction proves that on last step all plugins of inode would be
  57621. + * initialized.
  57622. + *
  57623. + * When creating new object:
  57624. + * 1 obtain object plugin id (see next period)
  57625. + * NIKITA-FIXME-HANS: period?
  57626. + * 2 ->install() this plugin
  57627. + * 3 ->inherit() the rest from the parent
  57628. + *
  57629. + * We need some examples of creating an object with default and non-default
  57630. + * plugin ids. Nikita, please create them.
  57631. + */
  57632. +
  57633. +#include "../inode.h"
  57634. +
  57635. +int _bugop(void)
  57636. +{
  57637. + BUG_ON(1);
  57638. + return 0;
  57639. +}
  57640. +
  57641. +#define bugop ((void *)_bugop)
  57642. +
  57643. +static int flow_by_inode_bugop(struct inode *inode, const char __user *buf,
  57644. + int user, loff_t size,
  57645. + loff_t off, rw_op op, flow_t *f)
  57646. +{
  57647. + BUG_ON(1);
  57648. + return 0;
  57649. +}
  57650. +
  57651. +static int key_by_inode_bugop(struct inode *inode, loff_t off, reiser4_key *key)
  57652. +{
  57653. + BUG_ON(1);
  57654. + return 0;
  57655. +}
  57656. +
  57657. +static int _dummyop(void)
  57658. +{
  57659. + return 0;
  57660. +}
  57661. +
  57662. +#define dummyop ((void *)_dummyop)
  57663. +
  57664. +static int change_file(struct inode *inode,
  57665. + reiser4_plugin * plugin,
  57666. + pset_member memb)
  57667. +{
  57668. + /* cannot change object plugin of already existing object */
  57669. + if (memb == PSET_FILE)
  57670. + return RETERR(-EINVAL);
  57671. +
  57672. + /* Change PSET_CREATE */
  57673. + return aset_set_unsafe(&reiser4_inode_data(inode)->pset, memb, plugin);
  57674. +}
  57675. +
  57676. +static reiser4_plugin_ops file_plugin_ops = {
  57677. + .change = change_file
  57678. +};
  57679. +
  57680. +static struct inode_operations null_i_ops = {.create = NULL};
  57681. +static struct file_operations null_f_ops = {.owner = NULL};
  57682. +static struct address_space_operations null_a_ops = {.writepage = NULL};
  57683. +
  57684. +/*
  57685. + * Reiser4 provides for VFS either dispatcher, or common (fop,
  57686. + * iop, aop) method.
  57687. + *
  57688. + * Dispatchers (suffixed with "dispatch") pass management to
  57689. + * proper plugin in accordance with plugin table (pset) located
  57690. + * in the private part of inode.
  57691. + *
  57692. + * Common methods are NOT prefixed with "dispatch". They are
  57693. + * the same for all plugins of FILE interface, and, hence, no
  57694. + * dispatching is needed.
  57695. + */
  57696. +
  57697. +/*
  57698. + * VFS methods for regular files
  57699. + */
  57700. +static struct inode_operations regular_file_i_ops = {
  57701. + .permission = reiser4_permission_common,
  57702. + .setattr = reiser4_setattr_dispatch,
  57703. + .getattr = reiser4_getattr_common
  57704. +};
  57705. +static struct file_operations regular_file_f_ops = {
  57706. + .llseek = generic_file_llseek,
  57707. + .read = reiser4_read_dispatch,
  57708. + .write = reiser4_write_dispatch,
  57709. + .read_iter = generic_file_read_iter,
  57710. + .unlocked_ioctl = reiser4_ioctl_dispatch,
  57711. +#ifdef CONFIG_COMPAT
  57712. + .compat_ioctl = reiser4_ioctl_dispatch,
  57713. +#endif
  57714. + .mmap = reiser4_mmap_dispatch,
  57715. + .open = reiser4_open_dispatch,
  57716. + .release = reiser4_release_dispatch,
  57717. + .fsync = reiser4_sync_file_common,
  57718. + .splice_read = generic_file_splice_read,
  57719. +};
  57720. +static struct address_space_operations regular_file_a_ops = {
  57721. + .writepage = reiser4_writepage,
  57722. + .readpage = reiser4_readpage_dispatch,
  57723. + //.sync_page = block_sync_page,
  57724. + .writepages = reiser4_writepages_dispatch,
  57725. + .set_page_dirty = reiser4_set_page_dirty,
  57726. + .readpages = reiser4_readpages_dispatch,
  57727. + .write_begin = reiser4_write_begin_dispatch,
  57728. + .write_end = reiser4_write_end_dispatch,
  57729. + .bmap = reiser4_bmap_dispatch,
  57730. + .invalidatepage = reiser4_invalidatepage,
  57731. + .releasepage = reiser4_releasepage,
  57732. + .migratepage = reiser4_migratepage
  57733. +};
  57734. +
  57735. +/* VFS methods for symlink files */
  57736. +static struct inode_operations symlink_file_i_ops = {
  57737. + .get_link = reiser4_get_link_common,
  57738. + .permission = reiser4_permission_common,
  57739. + .setattr = reiser4_setattr_common,
  57740. + .getattr = reiser4_getattr_common
  57741. +};
  57742. +
  57743. +/* VFS methods for special files */
  57744. +static struct inode_operations special_file_i_ops = {
  57745. + .permission = reiser4_permission_common,
  57746. + .setattr = reiser4_setattr_common,
  57747. + .getattr = reiser4_getattr_common
  57748. +};
  57749. +
  57750. +/* VFS methods for directories */
  57751. +static struct inode_operations directory_i_ops = {
  57752. + .create = reiser4_create_common,
  57753. + .lookup = reiser4_lookup_common,
  57754. + .link = reiser4_link_common,
  57755. + .unlink = reiser4_unlink_common,
  57756. + .symlink = reiser4_symlink_common,
  57757. + .mkdir = reiser4_mkdir_common,
  57758. + .rmdir = reiser4_unlink_common,
  57759. + .mknod = reiser4_mknod_common,
  57760. + .rename = reiser4_rename2_common,
  57761. + .permission = reiser4_permission_common,
  57762. + .setattr = reiser4_setattr_common,
  57763. + .getattr = reiser4_getattr_common
  57764. +};
  57765. +static struct file_operations directory_f_ops = {
  57766. + .llseek = reiser4_llseek_dir_common,
  57767. + .read = generic_read_dir,
  57768. + .iterate = reiser4_iterate_common,
  57769. + .release = reiser4_release_dir_common,
  57770. + .fsync = reiser4_sync_common
  57771. +};
  57772. +static struct address_space_operations directory_a_ops = {
  57773. + .writepages = dummyop,
  57774. +};
  57775. +
  57776. +/*
  57777. + * Definitions of object plugins.
  57778. + */
  57779. +
  57780. +file_plugin file_plugins[LAST_FILE_PLUGIN_ID] = {
  57781. + [UNIX_FILE_PLUGIN_ID] = {
  57782. + .h = {
  57783. + .type_id = REISER4_FILE_PLUGIN_TYPE,
  57784. + .id = UNIX_FILE_PLUGIN_ID,
  57785. + .groups = (1 << REISER4_REGULAR_FILE),
  57786. + .pops = &file_plugin_ops,
  57787. + .label = "reg",
  57788. + .desc = "regular file",
  57789. + .linkage = {NULL, NULL},
  57790. + },
  57791. + /*
  57792. + * invariant vfs ops
  57793. + */
  57794. + .inode_ops = &regular_file_i_ops,
  57795. + .file_ops = &regular_file_f_ops,
  57796. + .as_ops = &regular_file_a_ops,
  57797. + /*
  57798. + * private i_ops
  57799. + */
  57800. + .setattr = setattr_unix_file,
  57801. + .open = open_unix_file,
  57802. + .read = read_unix_file,
  57803. + .write = write_unix_file,
  57804. + .ioctl = ioctl_unix_file,
  57805. + .mmap = mmap_unix_file,
  57806. + .release = release_unix_file,
  57807. + /*
  57808. + * private f_ops
  57809. + */
  57810. + .readpage = readpage_unix_file,
  57811. + .readpages = readpages_unix_file,
  57812. + .writepages = writepages_unix_file,
  57813. + .write_begin = write_begin_unix_file,
  57814. + .write_end = write_end_unix_file,
  57815. + /*
  57816. + * private a_ops
  57817. + */
  57818. + .bmap = bmap_unix_file,
  57819. + /*
  57820. + * other private methods
  57821. + */
  57822. + .write_sd_by_inode = write_sd_by_inode_common,
  57823. + .flow_by_inode = flow_by_inode_unix_file,
  57824. + .key_by_inode = key_by_inode_and_offset_common,
  57825. + .set_plug_in_inode = set_plug_in_inode_common,
  57826. + .adjust_to_parent = adjust_to_parent_common,
  57827. + .create_object = reiser4_create_object_common,
  57828. + .delete_object = delete_object_unix_file,
  57829. + .add_link = reiser4_add_link_common,
  57830. + .rem_link = reiser4_rem_link_common,
  57831. + .owns_item = owns_item_unix_file,
  57832. + .can_add_link = can_add_link_common,
  57833. + .detach = dummyop,
  57834. + .bind = dummyop,
  57835. + .safelink = safelink_common,
  57836. + .estimate = {
  57837. + .create = estimate_create_common,
  57838. + .update = estimate_update_common,
  57839. + .unlink = estimate_unlink_common
  57840. + },
  57841. + .init_inode_data = init_inode_data_unix_file,
  57842. + .cut_tree_worker = cut_tree_worker_common,
  57843. + .wire = {
  57844. + .write = wire_write_common,
  57845. + .read = wire_read_common,
  57846. + .get = wire_get_common,
  57847. + .size = wire_size_common,
  57848. + .done = wire_done_common
  57849. + }
  57850. + },
  57851. + [DIRECTORY_FILE_PLUGIN_ID] = {
  57852. + .h = {
  57853. + .type_id = REISER4_FILE_PLUGIN_TYPE,
  57854. + .id = DIRECTORY_FILE_PLUGIN_ID,
  57855. + .groups = (1 << REISER4_DIRECTORY_FILE),
  57856. + .pops = &file_plugin_ops,
  57857. + .label = "dir",
  57858. + .desc = "directory",
  57859. + .linkage = {NULL, NULL}
  57860. + },
  57861. + .inode_ops = &null_i_ops,
  57862. + .file_ops = &null_f_ops,
  57863. + .as_ops = &null_a_ops,
  57864. +
  57865. + .write_sd_by_inode = write_sd_by_inode_common,
  57866. + .flow_by_inode = flow_by_inode_bugop,
  57867. + .key_by_inode = key_by_inode_bugop,
  57868. + .set_plug_in_inode = set_plug_in_inode_common,
  57869. + .adjust_to_parent = adjust_to_parent_common_dir,
  57870. + .create_object = reiser4_create_object_common,
  57871. + .delete_object = reiser4_delete_dir_common,
  57872. + .add_link = reiser4_add_link_common,
  57873. + .rem_link = rem_link_common_dir,
  57874. + .owns_item = owns_item_common_dir,
  57875. + .can_add_link = can_add_link_common,
  57876. + .can_rem_link = can_rem_link_common_dir,
  57877. + .detach = reiser4_detach_common_dir,
  57878. + .bind = reiser4_bind_common_dir,
  57879. + .safelink = safelink_common,
  57880. + .estimate = {
  57881. + .create = estimate_create_common_dir,
  57882. + .update = estimate_update_common,
  57883. + .unlink = estimate_unlink_common_dir
  57884. + },
  57885. + .wire = {
  57886. + .write = wire_write_common,
  57887. + .read = wire_read_common,
  57888. + .get = wire_get_common,
  57889. + .size = wire_size_common,
  57890. + .done = wire_done_common
  57891. + },
  57892. + .init_inode_data = init_inode_ordering,
  57893. + .cut_tree_worker = cut_tree_worker_common,
  57894. + },
  57895. + [SYMLINK_FILE_PLUGIN_ID] = {
  57896. + .h = {
  57897. + .type_id = REISER4_FILE_PLUGIN_TYPE,
  57898. + .id = SYMLINK_FILE_PLUGIN_ID,
  57899. + .groups = (1 << REISER4_SYMLINK_FILE),
  57900. + .pops = &file_plugin_ops,
  57901. + .label = "symlink",
  57902. + .desc = "symbolic link",
  57903. + .linkage = {NULL,NULL}
  57904. + },
  57905. + .inode_ops = &symlink_file_i_ops,
  57906. + /* inode->i_fop of symlink is initialized
  57907. + by NULL in setup_inode_ops */
  57908. + .file_ops = &null_f_ops,
  57909. + .as_ops = &null_a_ops,
  57910. +
  57911. + .write_sd_by_inode = write_sd_by_inode_common,
  57912. + .set_plug_in_inode = set_plug_in_inode_common,
  57913. + .adjust_to_parent = adjust_to_parent_common,
  57914. + .create_object = reiser4_create_symlink,
  57915. + .delete_object = reiser4_delete_object_common,
  57916. + .add_link = reiser4_add_link_common,
  57917. + .rem_link = reiser4_rem_link_common,
  57918. + .can_add_link = can_add_link_common,
  57919. + .detach = dummyop,
  57920. + .bind = dummyop,
  57921. + .safelink = safelink_common,
  57922. + .estimate = {
  57923. + .create = estimate_create_common,
  57924. + .update = estimate_update_common,
  57925. + .unlink = estimate_unlink_common
  57926. + },
  57927. + .init_inode_data = init_inode_ordering,
  57928. + .cut_tree_worker = cut_tree_worker_common,
  57929. + .destroy_inode = destroy_inode_symlink,
  57930. + .wire = {
  57931. + .write = wire_write_common,
  57932. + .read = wire_read_common,
  57933. + .get = wire_get_common,
  57934. + .size = wire_size_common,
  57935. + .done = wire_done_common
  57936. + }
  57937. + },
  57938. + [SPECIAL_FILE_PLUGIN_ID] = {
  57939. + .h = {
  57940. + .type_id = REISER4_FILE_PLUGIN_TYPE,
  57941. + .id = SPECIAL_FILE_PLUGIN_ID,
  57942. + .groups = (1 << REISER4_SPECIAL_FILE),
  57943. + .pops = &file_plugin_ops,
  57944. + .label = "special",
  57945. + .desc =
  57946. + "special: fifo, device or socket",
  57947. + .linkage = {NULL, NULL}
  57948. + },
  57949. + .inode_ops = &special_file_i_ops,
  57950. + /* file_ops of special files (sockets, block, char, fifo) are
  57951. + initialized by init_special_inode. */
  57952. + .file_ops = &null_f_ops,
  57953. + .as_ops = &null_a_ops,
  57954. +
  57955. + .write_sd_by_inode = write_sd_by_inode_common,
  57956. + .set_plug_in_inode = set_plug_in_inode_common,
  57957. + .adjust_to_parent = adjust_to_parent_common,
  57958. + .create_object = reiser4_create_object_common,
  57959. + .delete_object = reiser4_delete_object_common,
  57960. + .add_link = reiser4_add_link_common,
  57961. + .rem_link = reiser4_rem_link_common,
  57962. + .owns_item = owns_item_common,
  57963. + .can_add_link = can_add_link_common,
  57964. + .detach = dummyop,
  57965. + .bind = dummyop,
  57966. + .safelink = safelink_common,
  57967. + .estimate = {
  57968. + .create = estimate_create_common,
  57969. + .update = estimate_update_common,
  57970. + .unlink = estimate_unlink_common
  57971. + },
  57972. + .init_inode_data = init_inode_ordering,
  57973. + .cut_tree_worker = cut_tree_worker_common,
  57974. + .wire = {
  57975. + .write = wire_write_common,
  57976. + .read = wire_read_common,
  57977. + .get = wire_get_common,
  57978. + .size = wire_size_common,
  57979. + .done = wire_done_common
  57980. + }
  57981. + },
  57982. + [CRYPTCOMPRESS_FILE_PLUGIN_ID] = {
  57983. + .h = {
  57984. + .type_id = REISER4_FILE_PLUGIN_TYPE,
  57985. + .id = CRYPTCOMPRESS_FILE_PLUGIN_ID,
  57986. + .groups = (1 << REISER4_REGULAR_FILE),
  57987. + .pops = &file_plugin_ops,
  57988. + .label = "cryptcompress",
  57989. + .desc = "cryptcompress file",
  57990. + .linkage = {NULL, NULL}
  57991. + },
  57992. + .inode_ops = &regular_file_i_ops,
  57993. + .file_ops = &regular_file_f_ops,
  57994. + .as_ops = &regular_file_a_ops,
  57995. +
  57996. + .setattr = setattr_cryptcompress,
  57997. + .open = open_cryptcompress,
  57998. + .read = read_cryptcompress,
  57999. + .write = write_cryptcompress,
  58000. + .ioctl = ioctl_cryptcompress,
  58001. + .mmap = mmap_cryptcompress,
  58002. + .release = release_cryptcompress,
  58003. +
  58004. + .readpage = readpage_cryptcompress,
  58005. + .readpages = readpages_cryptcompress,
  58006. + .writepages = writepages_cryptcompress,
  58007. + .write_begin = write_begin_cryptcompress,
  58008. + .write_end = write_end_cryptcompress,
  58009. +
  58010. + .bmap = bmap_cryptcompress,
  58011. +
  58012. + .write_sd_by_inode = write_sd_by_inode_common,
  58013. + .flow_by_inode = flow_by_inode_cryptcompress,
  58014. + .key_by_inode = key_by_inode_cryptcompress,
  58015. + .set_plug_in_inode = set_plug_in_inode_common,
  58016. + .adjust_to_parent = adjust_to_parent_cryptcompress,
  58017. + .create_object = create_object_cryptcompress,
  58018. + .delete_object = delete_object_cryptcompress,
  58019. + .add_link = reiser4_add_link_common,
  58020. + .rem_link = reiser4_rem_link_common,
  58021. + .owns_item = owns_item_common,
  58022. + .can_add_link = can_add_link_common,
  58023. + .detach = dummyop,
  58024. + .bind = dummyop,
  58025. + .safelink = safelink_common,
  58026. + .estimate = {
  58027. + .create = estimate_create_common,
  58028. + .update = estimate_update_common,
  58029. + .unlink = estimate_unlink_common
  58030. + },
  58031. + .init_inode_data = init_inode_data_cryptcompress,
  58032. + .cut_tree_worker = cut_tree_worker_cryptcompress,
  58033. + .destroy_inode = destroy_inode_cryptcompress,
  58034. + .wire = {
  58035. + .write = wire_write_common,
  58036. + .read = wire_read_common,
  58037. + .get = wire_get_common,
  58038. + .size = wire_size_common,
  58039. + .done = wire_done_common
  58040. + }
  58041. + }
  58042. +};
  58043. +
  58044. +static int change_dir(struct inode *inode,
  58045. + reiser4_plugin * plugin,
  58046. + pset_member memb)
  58047. +{
  58048. + /* cannot change dir plugin of already existing object */
  58049. + return RETERR(-EINVAL);
  58050. +}
  58051. +
  58052. +static reiser4_plugin_ops dir_plugin_ops = {
  58053. + .change = change_dir
  58054. +};
  58055. +
  58056. +/*
  58057. + * definition of directory plugins
  58058. + */
  58059. +
  58060. +dir_plugin dir_plugins[LAST_DIR_ID] = {
  58061. + /* standard hashed directory plugin */
  58062. + [HASHED_DIR_PLUGIN_ID] = {
  58063. + .h = {
  58064. + .type_id = REISER4_DIR_PLUGIN_TYPE,
  58065. + .id = HASHED_DIR_PLUGIN_ID,
  58066. + .pops = &dir_plugin_ops,
  58067. + .label = "dir",
  58068. + .desc = "hashed directory",
  58069. + .linkage = {NULL, NULL}
  58070. + },
  58071. + .inode_ops = &directory_i_ops,
  58072. + .file_ops = &directory_f_ops,
  58073. + .as_ops = &directory_a_ops,
  58074. +
  58075. + .get_parent = get_parent_common,
  58076. + .is_name_acceptable = is_name_acceptable_common,
  58077. + .build_entry_key = build_entry_key_hashed,
  58078. + .build_readdir_key = build_readdir_key_common,
  58079. + .add_entry = reiser4_add_entry_common,
  58080. + .rem_entry = reiser4_rem_entry_common,
  58081. + .init = reiser4_dir_init_common,
  58082. + .done = reiser4_dir_done_common,
  58083. + .attach = reiser4_attach_common,
  58084. + .detach = reiser4_detach_common,
  58085. + .estimate = {
  58086. + .add_entry = estimate_add_entry_common,
  58087. + .rem_entry = estimate_rem_entry_common,
  58088. + .unlink = dir_estimate_unlink_common
  58089. + }
  58090. + },
  58091. + /* hashed directory for which seekdir/telldir are guaranteed to
  58092. + * work. Brain-damage. */
  58093. + [SEEKABLE_HASHED_DIR_PLUGIN_ID] = {
  58094. + .h = {
  58095. + .type_id = REISER4_DIR_PLUGIN_TYPE,
  58096. + .id = SEEKABLE_HASHED_DIR_PLUGIN_ID,
  58097. + .pops = &dir_plugin_ops,
  58098. + .label = "dir32",
  58099. + .desc = "directory hashed with 31 bit hash",
  58100. + .linkage = {NULL, NULL}
  58101. + },
  58102. + .inode_ops = &directory_i_ops,
  58103. + .file_ops = &directory_f_ops,
  58104. + .as_ops = &directory_a_ops,
  58105. +
  58106. + .get_parent = get_parent_common,
  58107. + .is_name_acceptable = is_name_acceptable_common,
  58108. + .build_entry_key = build_entry_key_seekable,
  58109. + .build_readdir_key = build_readdir_key_common,
  58110. + .add_entry = reiser4_add_entry_common,
  58111. + .rem_entry = reiser4_rem_entry_common,
  58112. + .init = reiser4_dir_init_common,
  58113. + .done = reiser4_dir_done_common,
  58114. + .attach = reiser4_attach_common,
  58115. + .detach = reiser4_detach_common,
  58116. + .estimate = {
  58117. + .add_entry = estimate_add_entry_common,
  58118. + .rem_entry = estimate_rem_entry_common,
  58119. + .unlink = dir_estimate_unlink_common
  58120. + }
  58121. + }
  58122. +};
  58123. +
  58124. +/* Make Linus happy.
  58125. + Local variables:
  58126. + c-indentation-style: "K&R"
  58127. + mode-name: "LC"
  58128. + c-basic-offset: 8
  58129. + tab-width: 8
  58130. + fill-column: 120
  58131. + End:
  58132. +*/
  58133. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/object.h linux-4.14.2/fs/reiser4/plugin/object.h
  58134. --- linux-4.14.2.orig/fs/reiser4/plugin/object.h 1970-01-01 01:00:00.000000000 +0100
  58135. +++ linux-4.14.2/fs/reiser4/plugin/object.h 2017-11-26 22:13:09.000000000 +0100
  58136. @@ -0,0 +1,117 @@
  58137. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by
  58138. + * reiser4/README */
  58139. +
  58140. +/* Declaration of object plugin functions. */
  58141. +
  58142. +#if !defined(__FS_REISER4_PLUGIN_OBJECT_H__)
  58143. +#define __FS_REISER4_PLUGIN_OBJECT_H__
  58144. +
  58145. +#include "../type_safe_hash.h"
  58146. +
  58147. +/* common implementations of inode operations */
  58148. +int reiser4_create_common(struct inode *parent, struct dentry *dentry,
  58149. + umode_t mode, bool);
  58150. +struct dentry *reiser4_lookup_common(struct inode *parent,
  58151. + struct dentry *dentry,
  58152. + unsigned int);
  58153. +int reiser4_link_common(struct dentry *existing, struct inode *parent,
  58154. + struct dentry *newname);
  58155. +int reiser4_unlink_common(struct inode *parent, struct dentry *victim);
  58156. +int reiser4_mkdir_common(struct inode *parent, struct dentry *dentry, umode_t mode);
  58157. +int reiser4_symlink_common(struct inode *parent, struct dentry *dentry,
  58158. + const char *linkname);
  58159. +int reiser4_mknod_common(struct inode *parent, struct dentry *dentry,
  58160. + umode_t mode, dev_t rdev);
  58161. +int reiser4_rename2_common(struct inode *old_dir, struct dentry *old_name,
  58162. + struct inode *new_dir, struct dentry *new_name,
  58163. + unsigned flags);
  58164. +const char *reiser4_get_link_common(struct dentry *, struct inode *inode,
  58165. + struct delayed_call *done);
  58166. +int reiser4_permission_common(struct inode *, int mask);
  58167. +int reiser4_setattr_common(struct dentry *, struct iattr *);
  58168. +int reiser4_getattr_common(const struct path *path, struct kstat *stat,
  58169. + u32 request_mask, unsigned int flags);
  58170. +
  58171. +/* common implementations of file operations */
  58172. +loff_t reiser4_llseek_dir_common(struct file *, loff_t off, int origin);
  58173. +int reiser4_iterate_common(struct file *, struct dir_context *context);
  58174. +int reiser4_release_dir_common(struct inode *, struct file *);
  58175. +int reiser4_sync_common(struct file *, loff_t, loff_t, int datasync);
  58176. +
  58177. +/* file plugin operations: common implementations */
  58178. +int write_sd_by_inode_common(struct inode *);
  58179. +int key_by_inode_and_offset_common(struct inode *, loff_t, reiser4_key *);
  58180. +int set_plug_in_inode_common(struct inode *object, struct inode *parent,
  58181. + reiser4_object_create_data *);
  58182. +int adjust_to_parent_common(struct inode *object, struct inode *parent,
  58183. + struct inode *root);
  58184. +int adjust_to_parent_common_dir(struct inode *object, struct inode *parent,
  58185. + struct inode *root);
  58186. +int adjust_to_parent_cryptcompress(struct inode *object, struct inode *parent,
  58187. + struct inode *root);
  58188. +int reiser4_create_object_common(struct inode *object, struct inode *parent,
  58189. + reiser4_object_create_data *);
  58190. +int reiser4_delete_object_common(struct inode *);
  58191. +int reiser4_delete_dir_common(struct inode *);
  58192. +int reiser4_add_link_common(struct inode *object, struct inode *parent);
  58193. +int reiser4_rem_link_common(struct inode *object, struct inode *parent);
  58194. +int rem_link_common_dir(struct inode *object, struct inode *parent);
  58195. +int owns_item_common(const struct inode *, const coord_t *);
  58196. +int owns_item_common_dir(const struct inode *, const coord_t *);
  58197. +int can_add_link_common(const struct inode *);
  58198. +int can_rem_link_common_dir(const struct inode *);
  58199. +int reiser4_detach_common_dir(struct inode *child, struct inode *parent);
  58200. +int reiser4_bind_common_dir(struct inode *child, struct inode *parent);
  58201. +int safelink_common(struct inode *, reiser4_safe_link_t, __u64 value);
  58202. +reiser4_block_nr estimate_create_common(const struct inode *);
  58203. +reiser4_block_nr estimate_create_common_dir(const struct inode *);
  58204. +reiser4_block_nr estimate_update_common(const struct inode *);
  58205. +reiser4_block_nr estimate_unlink_common(const struct inode *,
  58206. + const struct inode *);
  58207. +reiser4_block_nr estimate_unlink_common_dir(const struct inode *,
  58208. + const struct inode *);
  58209. +char *wire_write_common(struct inode *, char *start);
  58210. +char *wire_read_common(char *addr, reiser4_object_on_wire *);
  58211. +struct dentry *wire_get_common(struct super_block *, reiser4_object_on_wire *);
  58212. +int wire_size_common(struct inode *);
  58213. +void wire_done_common(reiser4_object_on_wire *);
  58214. +
  58215. +/* dir plugin operations: common implementations */
  58216. +struct dentry *get_parent_common(struct inode *child);
  58217. +int is_name_acceptable_common(const struct inode *, const char *name, int len);
  58218. +void build_entry_key_common(const struct inode *,
  58219. + const struct qstr *qname, reiser4_key *);
  58220. +int build_readdir_key_common(struct file *dir, reiser4_key *);
  58221. +int reiser4_add_entry_common(struct inode *object, struct dentry *where,
  58222. + reiser4_object_create_data * , reiser4_dir_entry_desc *);
  58223. +int reiser4_rem_entry_common(struct inode *object, struct dentry *where,
  58224. + reiser4_dir_entry_desc *);
  58225. +int reiser4_dir_init_common(struct inode *object, struct inode *parent,
  58226. + reiser4_object_create_data *);
  58227. +int reiser4_dir_done_common(struct inode *);
  58228. +int reiser4_attach_common(struct inode *child, struct inode *parent);
  58229. +int reiser4_detach_common(struct inode *object, struct inode *parent);
  58230. +reiser4_block_nr estimate_add_entry_common(const struct inode *);
  58231. +reiser4_block_nr estimate_rem_entry_common(const struct inode *);
  58232. +reiser4_block_nr dir_estimate_unlink_common(const struct inode *,
  58233. + const struct inode *);
  58234. +
  58235. +/* these are essential parts of common implementations, they are to make
  58236. + customized implementations easier */
  58237. +
  58238. +/* merely useful functions */
  58239. +int lookup_sd(struct inode *, znode_lock_mode, coord_t *, lock_handle * ,
  58240. + const reiser4_key * , int silent);
  58241. +
  58242. +/* __FS_REISER4_PLUGIN_OBJECT_H__ */
  58243. +#endif
  58244. +
  58245. +/* Make Linus happy.
  58246. + Local variables:
  58247. + c-indentation-style: "K&R"
  58248. + mode-name: "LC"
  58249. + c-basic-offset: 8
  58250. + tab-width: 8
  58251. + fill-column: 120
  58252. + End:
  58253. +*/
  58254. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/plugin.c linux-4.14.2/fs/reiser4/plugin/plugin.c
  58255. --- linux-4.14.2.orig/fs/reiser4/plugin/plugin.c 1970-01-01 01:00:00.000000000 +0100
  58256. +++ linux-4.14.2/fs/reiser4/plugin/plugin.c 2017-11-26 22:13:09.000000000 +0100
  58257. @@ -0,0 +1,569 @@
  58258. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  58259. + * reiser4/README */
  58260. +
  58261. +/* Basic plugin infrastructure, lookup etc. */
  58262. +
  58263. +/* PLUGINS:
  58264. +
  58265. + Plugins are internal Reiser4 "modules" or "objects" used to increase
  58266. + extensibility and allow external users to easily adapt reiser4 to
  58267. + their needs.
  58268. +
  58269. + Plugins are classified into several disjoint "types". Plugins
  58270. + belonging to the particular plugin type are termed "instances" of
  58271. + this type. Existing types are listed by enum reiser4_plugin_type
  58272. + (see plugin/plugin_header.h)
  58273. +
  58274. +NIKITA-FIXME-HANS: update this list, and review this entire comment for currency
  58275. +
  58276. + Object (file) plugin determines how given file-system object serves
  58277. + standard VFS requests for read, write, seek, mmap etc. Instances of
  58278. + file plugins are: regular file, directory, symlink. Another example
  58279. + of file plugin is audit plugin, that optionally records accesses to
  58280. + underlying object and forwards requests to it.
  58281. +
  58282. + Hash plugins compute hashes used by reiser4 to store and locate
  58283. + files within directories. Instances of hash plugin type are: r5,
  58284. + tea, rupasov.
  58285. +
  58286. + Tail plugins (or, more precisely, tail policy plugins) determine
  58287. + when last part of the file should be stored in a formatted item.
  58288. +
  58289. + Scope and lookup:
  58290. +
  58291. + label such that pair ( type_label, plugin_label ) is unique. This
  58292. + pair is a globally persistent and user-visible plugin
  58293. + identifier. Internally kernel maintains plugins and plugin types in
  58294. + arrays using an index into those arrays as plugin and plugin type
  58295. + identifiers. File-system in turn, also maintains persistent
  58296. + "dictionary" which is mapping from plugin label to numerical
  58297. + identifier which is stored in file-system objects. That is, we
  58298. + store the offset into the plugin array for that plugin type as the
  58299. + plugin id in the stat data of the filesystem object.
  58300. +
  58301. + Internal kernel plugin type identifier (index in plugins[] array) is
  58302. + of type reiser4_plugin_type. Set of available plugin types is
  58303. + currently static, but dynamic loading doesn't seem to pose
  58304. + insurmountable problems.
  58305. +
  58306. + Within each type plugins are addressed by the identifiers of type
  58307. + reiser4_plugin_id (indices in reiser4_plugin_type_data.builtin[]).
  58308. + Such identifiers are only required to be unique within one type,
  58309. + not globally.
  58310. +
  58311. + Thus, plugin in memory is uniquely identified by the pair (type_id,
  58312. + id).
  58313. +
  58314. + Usage:
  58315. +
  58316. + There exists only one instance of each plugin instance, but this
  58317. + single instance can be associated with many entities (file-system
  58318. + objects, items, nodes, transactions, file-descriptors etc.). Entity
  58319. + to which plugin of given type is termed (due to the lack of
  58320. + imagination) "subject" of this plugin type and, by abuse of
  58321. + terminology, subject of particular instance of this type to which
  58322. + it's attached currently. For example, inode is subject of object
  58323. + plugin type. Inode representing directory is subject of directory
  58324. + plugin, hash plugin type and some particular instance of hash plugin
  58325. + type. Inode, representing regular file is subject of "regular file"
  58326. + plugin, tail-policy plugin type etc.
  58327. +
  58328. + With each subject the plugin possibly stores some state. For example,
  58329. + the state of a directory plugin (instance of object plugin type) is pointer
  58330. + to hash plugin (if directories always use hashing that is).
  58331. +
  58332. + Interface:
  58333. +
  58334. + In addition to a scalar identifier, each plugin type and plugin
  58335. + proper has a "label": short string and a "description"---longer
  58336. + descriptive string. Labels and descriptions of plugin types are
  58337. + hard-coded into plugins[] array, declared and defined in
  58338. + plugin.c. Label and description of plugin are stored in .label and
  58339. + .desc fields of reiser4_plugin_header respectively. It's possible to
  58340. + locate plugin by the pair of labels.
  58341. +
  58342. + Features (not implemented):
  58343. +
  58344. + . user-level plugin manipulations:
  58345. + + reiser4("filename/..file_plugin<='audit'");
  58346. + + write(open("filename/..file_plugin"), "audit", 8);
  58347. +
  58348. + . user level utilities lsplug and chplug to manipulate plugins.
  58349. + Utilities are not of primary priority. Possibly they will be not
  58350. + working on v4.0
  58351. +
  58352. + NIKITA-FIXME-HANS: this should be a mkreiserfs option not a mount
  58353. + option, do you agree? I don't think that specifying it at mount time,
  58354. + and then changing it with each mount, is a good model for usage.
  58355. +
  58356. + . mount option "plug" to set-up plugins of root-directory.
  58357. + "plug=foo:bar" will set "bar" as default plugin of type "foo".
  58358. +
  58359. + Limitations:
  58360. +
  58361. + . each plugin type has to provide at least one builtin
  58362. + plugin. This is technical limitation and it can be lifted in the
  58363. + future.
  58364. +
  58365. + TODO:
  58366. +
  58367. + New plugin types/plugings:
  58368. + Things we should be able to separately choose to inherit:
  58369. +
  58370. + security plugins
  58371. +
  58372. + stat data
  58373. +
  58374. + file bodies
  58375. +
  58376. + file plugins
  58377. +
  58378. + dir plugins
  58379. +
  58380. + . perm:acl
  58381. +
  58382. + . audi---audit plugin intercepting and possibly logging all
  58383. + accesses to object. Requires to put stub functions in file_operations
  58384. + in stead of generic_file_*.
  58385. +
  58386. +NIKITA-FIXME-HANS: why make overflows a plugin?
  58387. + . over---handle hash overflows
  58388. +
  58389. + . sqnt---handle different access patterns and instruments read-ahead
  58390. +
  58391. +NIKITA-FIXME-HANS: describe the line below in more detail.
  58392. +
  58393. + . hier---handle inheritance of plugins along file-system hierarchy
  58394. +
  58395. + Different kinds of inheritance: on creation vs. on access.
  58396. + Compatible/incompatible plugins.
  58397. + Inheritance for multi-linked files.
  58398. + Layered plugins.
  58399. + Notion of plugin context is abandoned.
  58400. +
  58401. +Each file is associated
  58402. + with one plugin and dependant plugins (hash, etc.) are stored as
  58403. + main plugin state. Now, if we have plugins used for regular files
  58404. + but not for directories, how such plugins would be inherited?
  58405. + . always store them with directories also
  58406. +
  58407. +NIKTIA-FIXME-HANS: Do the line above. It is not exclusive of doing
  58408. +the line below which is also useful.
  58409. +
  58410. + . use inheritance hierarchy, independent of file-system namespace
  58411. +*/
  58412. +
  58413. +#include "../debug.h"
  58414. +#include "../dformat.h"
  58415. +#include "plugin_header.h"
  58416. +#include "item/static_stat.h"
  58417. +#include "node/node.h"
  58418. +#include "security/perm.h"
  58419. +#include "space/space_allocator.h"
  58420. +#include "disk_format/disk_format.h"
  58421. +#include "plugin.h"
  58422. +#include "../reiser4.h"
  58423. +#include "../jnode.h"
  58424. +#include "../inode.h"
  58425. +
  58426. +#include <linux/fs.h> /* for struct super_block */
  58427. +
  58428. +/*
  58429. + * init_plugins - initialize plugin sub-system.
  58430. + * Just call this once on reiser4 startup.
  58431. + *
  58432. + * Initializes plugin sub-system. It is part of reiser4 module
  58433. + * initialization. For each plugin of each type init method is called and each
  58434. + * plugin is put into list of plugins.
  58435. + */
  58436. +int init_plugins(void)
  58437. +{
  58438. + reiser4_plugin_type type_id;
  58439. +
  58440. + for (type_id = 0; type_id < REISER4_PLUGIN_TYPES; ++type_id) {
  58441. + struct reiser4_plugin_type_data *ptype;
  58442. + int i;
  58443. +
  58444. + ptype = &plugins[type_id];
  58445. + assert("nikita-3508", ptype->label != NULL);
  58446. + assert("nikita-3509", ptype->type_id == type_id);
  58447. +
  58448. + INIT_LIST_HEAD(&ptype->plugins_list);
  58449. +/* NIKITA-FIXME-HANS: change builtin_num to some other name lacking the term
  58450. + * builtin. */
  58451. + for (i = 0; i < ptype->builtin_num; ++i) {
  58452. + reiser4_plugin *plugin;
  58453. +
  58454. + plugin = plugin_at(ptype, i);
  58455. +
  58456. + if (plugin->h.label == NULL)
  58457. + /* uninitialized slot encountered */
  58458. + continue;
  58459. + assert("nikita-3445", plugin->h.type_id == type_id);
  58460. + plugin->h.id = i;
  58461. + if (plugin->h.pops != NULL &&
  58462. + plugin->h.pops->init != NULL) {
  58463. + int result;
  58464. +
  58465. + result = plugin->h.pops->init(plugin);
  58466. + if (result != 0)
  58467. + return result;
  58468. + }
  58469. + INIT_LIST_HEAD(&plugin->h.linkage);
  58470. + list_add_tail(&plugin->h.linkage, &ptype->plugins_list);
  58471. + }
  58472. + }
  58473. + return 0;
  58474. +}
  58475. +
  58476. +/* true if plugin type id is valid */
  58477. +int is_plugin_type_valid(reiser4_plugin_type type)
  58478. +{
  58479. + /* "type" is unsigned, so no comparison with 0 is
  58480. + necessary */
  58481. + return (type < REISER4_PLUGIN_TYPES);
  58482. +}
  58483. +
  58484. +/* true if plugin id is valid */
  58485. +int is_plugin_id_valid(reiser4_plugin_type type, reiser4_plugin_id id)
  58486. +{
  58487. + assert("nikita-1653", is_plugin_type_valid(type));
  58488. + return id < plugins[type].builtin_num;
  58489. +}
  58490. +
  58491. +/* return plugin by its @type and @id.
  58492. +
  58493. + Both arguments are checked for validness: this is supposed to be called
  58494. + from user-level.
  58495. +
  58496. +NIKITA-FIXME-HANS: Do you instead mean that this checks ids created in
  58497. +user space, and passed to the filesystem by use of method files? Your
  58498. +comment really confused me on the first reading....
  58499. +
  58500. +*/
  58501. +reiser4_plugin *plugin_by_unsafe_id(reiser4_plugin_type type /* plugin type
  58502. + * unchecked */,
  58503. + reiser4_plugin_id id /* plugin id,
  58504. + * unchecked */)
  58505. +{
  58506. + if (is_plugin_type_valid(type)) {
  58507. + if (is_plugin_id_valid(type, id))
  58508. + return plugin_at(&plugins[type], id);
  58509. + else
  58510. + /* id out of bounds */
  58511. + warning("nikita-2913",
  58512. + "Invalid plugin id: [%i:%i]", type, id);
  58513. + } else
  58514. + /* type_id out of bounds */
  58515. + warning("nikita-2914", "Invalid type_id: %i", type);
  58516. + return NULL;
  58517. +}
  58518. +
  58519. +/**
  58520. + * save_plugin_id - store plugin id in disk format
  58521. + * @plugin: plugin to convert
  58522. + * @area: where to store result
  58523. + *
  58524. + * Puts id of @plugin in little endian format to address @area.
  58525. + */
  58526. +int save_plugin_id(reiser4_plugin *plugin /* plugin to convert */ ,
  58527. + d16 * area/* where to store result */)
  58528. +{
  58529. + assert("nikita-1261", plugin != NULL);
  58530. + assert("nikita-1262", area != NULL);
  58531. +
  58532. + put_unaligned(cpu_to_le16(plugin->h.id), area);
  58533. + return 0;
  58534. +}
  58535. +
  58536. +/* list of all plugins of given type */
  58537. +struct list_head *get_plugin_list(reiser4_plugin_type type)
  58538. +{
  58539. + assert("nikita-1056", is_plugin_type_valid(type));
  58540. + return &plugins[type].plugins_list;
  58541. +}
  58542. +
  58543. +static void update_pset_mask(reiser4_inode * info, pset_member memb)
  58544. +{
  58545. + struct dentry *rootdir;
  58546. + reiser4_inode *root;
  58547. +
  58548. + assert("edward-1443", memb != PSET_FILE);
  58549. +
  58550. + rootdir = inode_by_reiser4_inode(info)->i_sb->s_root;
  58551. + if (rootdir != NULL) {
  58552. + root = reiser4_inode_data(rootdir->d_inode);
  58553. + /*
  58554. + * if inode is different from the default one, or we are
  58555. + * changing plugin of root directory, update plugin_mask
  58556. + */
  58557. + if (aset_get(info->pset, memb) !=
  58558. + aset_get(root->pset, memb) ||
  58559. + info == root)
  58560. + info->plugin_mask |= (1 << memb);
  58561. + else
  58562. + info->plugin_mask &= ~(1 << memb);
  58563. + }
  58564. +}
  58565. +
  58566. +/* Get specified plugin set member from parent,
  58567. + or from fs-defaults (if no parent is given) and
  58568. + install the result to pset of @self */
  58569. +int grab_plugin_pset(struct inode *self,
  58570. + struct inode *ancestor,
  58571. + pset_member memb)
  58572. +{
  58573. + reiser4_plugin *plug;
  58574. + reiser4_inode *info;
  58575. + int result = 0;
  58576. +
  58577. + /* Do not grab if initialised already. */
  58578. + info = reiser4_inode_data(self);
  58579. + if (aset_get(info->pset, memb) != NULL)
  58580. + return 0;
  58581. + if (ancestor) {
  58582. + reiser4_inode *parent;
  58583. +
  58584. + parent = reiser4_inode_data(ancestor);
  58585. + plug = aset_get(parent->hset, memb) ? :
  58586. + aset_get(parent->pset, memb);
  58587. + } else
  58588. + plug = get_default_plugin(memb);
  58589. +
  58590. + result = set_plugin(&info->pset, memb, plug);
  58591. + if (result == 0) {
  58592. + if (!ancestor || self->i_sb->s_root->d_inode != self)
  58593. + update_pset_mask(info, memb);
  58594. + }
  58595. + return result;
  58596. +}
  58597. +
  58598. +/* Take missing pset members from root inode */
  58599. +int finish_pset(struct inode *inode)
  58600. +{
  58601. + reiser4_plugin *plug;
  58602. + reiser4_inode *root;
  58603. + reiser4_inode *info;
  58604. + pset_member memb;
  58605. + int result = 0;
  58606. +
  58607. + root = reiser4_inode_data(inode->i_sb->s_root->d_inode);
  58608. + info = reiser4_inode_data(inode);
  58609. +
  58610. + assert("edward-1455", root != NULL);
  58611. + assert("edward-1456", info != NULL);
  58612. +
  58613. + /* file and directory plugins are already initialized. */
  58614. + for (memb = PSET_DIR + 1; memb < PSET_LAST; ++memb) {
  58615. +
  58616. + /* Do not grab if initialised already. */
  58617. + if (aset_get(info->pset, memb) != NULL)
  58618. + continue;
  58619. +
  58620. + plug = aset_get(root->pset, memb);
  58621. + result = set_plugin(&info->pset, memb, plug);
  58622. + if (result != 0)
  58623. + break;
  58624. + }
  58625. + if (result != 0) {
  58626. + warning("nikita-3447",
  58627. + "Cannot set up plugins for %lli",
  58628. + (unsigned long long)
  58629. + get_inode_oid(inode));
  58630. + }
  58631. + return result;
  58632. +}
  58633. +
  58634. +int force_plugin_pset(struct inode *self, pset_member memb,
  58635. + reiser4_plugin * plug)
  58636. +{
  58637. + reiser4_inode *info;
  58638. + int result = 0;
  58639. +
  58640. + if (!self->i_sb->s_root || self->i_sb->s_root->d_inode == self) {
  58641. + /* Changing pset in the root object. */
  58642. + return RETERR(-EINVAL);
  58643. + }
  58644. +
  58645. + info = reiser4_inode_data(self);
  58646. + if (plug->h.pops != NULL && plug->h.pops->change != NULL)
  58647. + result = plug->h.pops->change(self, plug, memb);
  58648. + else
  58649. + result = aset_set_unsafe(&info->pset, memb, plug);
  58650. + if (result == 0) {
  58651. + __u16 oldmask = info->plugin_mask;
  58652. +
  58653. + update_pset_mask(info, memb);
  58654. + if (oldmask != info->plugin_mask)
  58655. + reiser4_inode_clr_flag(self, REISER4_SDLEN_KNOWN);
  58656. + }
  58657. + return result;
  58658. +}
  58659. +
  58660. +struct reiser4_plugin_type_data plugins[REISER4_PLUGIN_TYPES] = {
  58661. + /* C90 initializers */
  58662. + [REISER4_FILE_PLUGIN_TYPE] = {
  58663. + .type_id = REISER4_FILE_PLUGIN_TYPE,
  58664. + .label = "file",
  58665. + .desc = "Object plugins",
  58666. + .builtin_num = sizeof_array(file_plugins),
  58667. + .builtin = file_plugins,
  58668. + .plugins_list = {NULL, NULL},
  58669. + .size = sizeof(file_plugin)
  58670. + },
  58671. + [REISER4_DIR_PLUGIN_TYPE] = {
  58672. + .type_id = REISER4_DIR_PLUGIN_TYPE,
  58673. + .label = "dir",
  58674. + .desc = "Directory plugins",
  58675. + .builtin_num = sizeof_array(dir_plugins),
  58676. + .builtin = dir_plugins,
  58677. + .plugins_list = {NULL, NULL},
  58678. + .size = sizeof(dir_plugin)
  58679. + },
  58680. + [REISER4_HASH_PLUGIN_TYPE] = {
  58681. + .type_id = REISER4_HASH_PLUGIN_TYPE,
  58682. + .label = "hash",
  58683. + .desc = "Directory hashes",
  58684. + .builtin_num = sizeof_array(hash_plugins),
  58685. + .builtin = hash_plugins,
  58686. + .plugins_list = {NULL, NULL},
  58687. + .size = sizeof(hash_plugin)
  58688. + },
  58689. + [REISER4_FIBRATION_PLUGIN_TYPE] = {
  58690. + .type_id =
  58691. + REISER4_FIBRATION_PLUGIN_TYPE,
  58692. + .label = "fibration",
  58693. + .desc = "Directory fibrations",
  58694. + .builtin_num = sizeof_array(fibration_plugins),
  58695. + .builtin = fibration_plugins,
  58696. + .plugins_list = {NULL, NULL},
  58697. + .size = sizeof(fibration_plugin)
  58698. + },
  58699. + [REISER4_CIPHER_PLUGIN_TYPE] = {
  58700. + .type_id = REISER4_CIPHER_PLUGIN_TYPE,
  58701. + .label = "cipher",
  58702. + .desc = "Cipher plugins",
  58703. + .builtin_num = sizeof_array(cipher_plugins),
  58704. + .builtin = cipher_plugins,
  58705. + .plugins_list = {NULL, NULL},
  58706. + .size = sizeof(cipher_plugin)
  58707. + },
  58708. + [REISER4_DIGEST_PLUGIN_TYPE] = {
  58709. + .type_id = REISER4_DIGEST_PLUGIN_TYPE,
  58710. + .label = "digest",
  58711. + .desc = "Digest plugins",
  58712. + .builtin_num = sizeof_array(digest_plugins),
  58713. + .builtin = digest_plugins,
  58714. + .plugins_list = {NULL, NULL},
  58715. + .size = sizeof(digest_plugin)
  58716. + },
  58717. + [REISER4_COMPRESSION_PLUGIN_TYPE] = {
  58718. + .type_id = REISER4_COMPRESSION_PLUGIN_TYPE,
  58719. + .label = "compression",
  58720. + .desc = "Compression plugins",
  58721. + .builtin_num = sizeof_array(compression_plugins),
  58722. + .builtin = compression_plugins,
  58723. + .plugins_list = {NULL, NULL},
  58724. + .size = sizeof(compression_plugin)
  58725. + },
  58726. + [REISER4_FORMATTING_PLUGIN_TYPE] = {
  58727. + .type_id = REISER4_FORMATTING_PLUGIN_TYPE,
  58728. + .label = "formatting",
  58729. + .desc = "Tail inlining policies",
  58730. + .builtin_num = sizeof_array(formatting_plugins),
  58731. + .builtin = formatting_plugins,
  58732. + .plugins_list = {NULL, NULL},
  58733. + .size = sizeof(formatting_plugin)
  58734. + },
  58735. + [REISER4_PERM_PLUGIN_TYPE] = {
  58736. + .type_id = REISER4_PERM_PLUGIN_TYPE,
  58737. + .label = "perm",
  58738. + .desc = "Permission checks",
  58739. + .builtin_num = sizeof_array(perm_plugins),
  58740. + .builtin = perm_plugins,
  58741. + .plugins_list = {NULL, NULL},
  58742. + .size = sizeof(perm_plugin)
  58743. + },
  58744. + [REISER4_ITEM_PLUGIN_TYPE] = {
  58745. + .type_id = REISER4_ITEM_PLUGIN_TYPE,
  58746. + .label = "item",
  58747. + .desc = "Item handlers",
  58748. + .builtin_num = sizeof_array(item_plugins),
  58749. + .builtin = item_plugins,
  58750. + .plugins_list = {NULL, NULL},
  58751. + .size = sizeof(item_plugin)
  58752. + },
  58753. + [REISER4_NODE_PLUGIN_TYPE] = {
  58754. + .type_id = REISER4_NODE_PLUGIN_TYPE,
  58755. + .label = "node",
  58756. + .desc = "node layout handlers",
  58757. + .builtin_num = sizeof_array(node_plugins),
  58758. + .builtin = node_plugins,
  58759. + .plugins_list = {NULL, NULL},
  58760. + .size = sizeof(node_plugin)
  58761. + },
  58762. + [REISER4_SD_EXT_PLUGIN_TYPE] = {
  58763. + .type_id = REISER4_SD_EXT_PLUGIN_TYPE,
  58764. + .label = "sd_ext",
  58765. + .desc = "Parts of stat-data",
  58766. + .builtin_num = sizeof_array(sd_ext_plugins),
  58767. + .builtin = sd_ext_plugins,
  58768. + .plugins_list = {NULL, NULL},
  58769. + .size = sizeof(sd_ext_plugin)
  58770. + },
  58771. + [REISER4_FORMAT_PLUGIN_TYPE] = {
  58772. + .type_id = REISER4_FORMAT_PLUGIN_TYPE,
  58773. + .label = "disk_layout",
  58774. + .desc = "defines filesystem on disk layout",
  58775. + .builtin_num = sizeof_array(format_plugins),
  58776. + .builtin = format_plugins,
  58777. + .plugins_list = {NULL, NULL},
  58778. + .size = sizeof(disk_format_plugin)
  58779. + },
  58780. + [REISER4_JNODE_PLUGIN_TYPE] = {
  58781. + .type_id = REISER4_JNODE_PLUGIN_TYPE,
  58782. + .label = "jnode",
  58783. + .desc = "defines kind of jnode",
  58784. + .builtin_num = sizeof_array(jnode_plugins),
  58785. + .builtin = jnode_plugins,
  58786. + .plugins_list = {NULL, NULL},
  58787. + .size = sizeof(jnode_plugin)
  58788. + },
  58789. + [REISER4_COMPRESSION_MODE_PLUGIN_TYPE] = {
  58790. + .type_id = REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
  58791. + .label = "compression_mode",
  58792. + .desc = "Defines compression mode",
  58793. + .builtin_num = sizeof_array(compression_mode_plugins),
  58794. + .builtin = compression_mode_plugins,
  58795. + .plugins_list = {NULL, NULL},
  58796. + .size = sizeof(compression_mode_plugin)
  58797. + },
  58798. + [REISER4_CLUSTER_PLUGIN_TYPE] = {
  58799. + .type_id = REISER4_CLUSTER_PLUGIN_TYPE,
  58800. + .label = "cluster",
  58801. + .desc = "Defines cluster size",
  58802. + .builtin_num = sizeof_array(cluster_plugins),
  58803. + .builtin = cluster_plugins,
  58804. + .plugins_list = {NULL, NULL},
  58805. + .size = sizeof(cluster_plugin)
  58806. + },
  58807. + [REISER4_TXMOD_PLUGIN_TYPE] = {
  58808. + .type_id = REISER4_TXMOD_PLUGIN_TYPE,
  58809. + .label = "txmod",
  58810. + .desc = "Defines transaction model",
  58811. + .builtin_num = sizeof_array(txmod_plugins),
  58812. + .builtin = txmod_plugins,
  58813. + .plugins_list = {NULL, NULL},
  58814. + .size = sizeof(txmod_plugin)
  58815. + }
  58816. +};
  58817. +
  58818. +/*
  58819. + * Local variables:
  58820. + * c-indentation-style: "K&R"
  58821. + * mode-name: "LC"
  58822. + * c-basic-offset: 8
  58823. + * tab-width: 8
  58824. + * fill-column: 120
  58825. + * End:
  58826. + */
  58827. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/plugin.h linux-4.14.2/fs/reiser4/plugin/plugin.h
  58828. --- linux-4.14.2.orig/fs/reiser4/plugin/plugin.h 1970-01-01 01:00:00.000000000 +0100
  58829. +++ linux-4.14.2/fs/reiser4/plugin/plugin.h 2017-11-26 22:15:33.000000000 +0100
  58830. @@ -0,0 +1,999 @@
  58831. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  58832. + * reiser4/README */
  58833. +
  58834. +/* Basic plugin data-types.
  58835. + see fs/reiser4/plugin/plugin.c for details */
  58836. +
  58837. +#if !defined(__FS_REISER4_PLUGIN_TYPES_H__)
  58838. +#define __FS_REISER4_PLUGIN_TYPES_H__
  58839. +
  58840. +#include "../forward.h"
  58841. +#include "../debug.h"
  58842. +#include "../dformat.h"
  58843. +#include "../key.h"
  58844. +#include "compress/compress.h"
  58845. +#include "crypto/cipher.h"
  58846. +#include "plugin_header.h"
  58847. +#include "item/static_stat.h"
  58848. +#include "item/internal.h"
  58849. +#include "item/sde.h"
  58850. +#include "item/cde.h"
  58851. +#include "item/item.h"
  58852. +#include "node/node.h"
  58853. +#include "node/node41.h"
  58854. +#include "security/perm.h"
  58855. +#include "fibration.h"
  58856. +
  58857. +#include "space/bitmap.h"
  58858. +#include "space/space_allocator.h"
  58859. +
  58860. +#include "disk_format/disk_format40.h"
  58861. +#include "disk_format/disk_format.h"
  58862. +
  58863. +#include <linux/fs.h> /* for struct super_block, address_space */
  58864. +#include <linux/mm.h> /* for struct page */
  58865. +#include <linux/buffer_head.h> /* for struct buffer_head */
  58866. +#include <linux/dcache.h> /* for struct dentry */
  58867. +#include <linux/types.h>
  58868. +#include <linux/crypto.h>
  58869. +
  58870. +typedef struct reiser4_object_on_wire reiser4_object_on_wire;
  58871. +
  58872. +/*
  58873. + * File plugin. Defines the set of methods that file plugins implement, some
  58874. + * of which are optional.
  58875. + *
  58876. + * A file plugin offers to the caller an interface for IO ( writing to and/or
  58877. + * reading from) to what the caller sees as one sequence of bytes. An IO to it
  58878. + * may affect more than one physical sequence of bytes, or no physical sequence
  58879. + * of bytes, it may affect sequences of bytes offered by other file plugins to
  58880. + * the semantic layer, and the file plugin may invoke other plugins and
  58881. + * delegate work to them, but its interface is structured for offering the
  58882. + * caller the ability to read and/or write what the caller sees as being a
  58883. + * single sequence of bytes.
  58884. + *
  58885. + * The file plugin must present a sequence of bytes to the caller, but it does
  58886. + * not necessarily have to store a sequence of bytes, it does not necessarily
  58887. + * have to support efficient tree traversal to any offset in the sequence of
  58888. + * bytes (tail and extent items, whose keys contain offsets, do however provide
  58889. + * efficient non-sequential lookup of any offset in the sequence of bytes).
  58890. + *
  58891. + * Directory plugins provide methods for selecting file plugins by resolving a
  58892. + * name for them.
  58893. + *
  58894. + * The functionality other filesystems call an attribute, and rigidly tie
  58895. + * together, we decompose into orthogonal selectable features of files. Using
  58896. + * the terminology we will define next, an attribute is a perhaps constrained,
  58897. + * perhaps static length, file whose parent has a uni-count-intra-link to it,
  58898. + * which might be grandparent-major-packed, and whose parent has a deletion
  58899. + * method that deletes it.
  58900. + *
  58901. + * File plugins can implement constraints.
  58902. + *
  58903. + * Files can be of variable length (e.g. regular unix files), or of static
  58904. + * length (e.g. static sized attributes).
  58905. + *
  58906. + * An object may have many sequences of bytes, and many file plugins, but, it
  58907. + * has exactly one objectid. It is usually desirable that an object has a
  58908. + * deletion method which deletes every item with that objectid. Items cannot
  58909. + * in general be found by just their objectids. This means that an object must
  58910. + * have either a method built into its deletion plugin method for knowing what
  58911. + * items need to be deleted, or links stored with the object that provide the
  58912. + * plugin with a method for finding those items. Deleting a file within an
  58913. + * object may or may not have the effect of deleting the entire object,
  58914. + * depending on the file plugin's deletion method.
  58915. + *
  58916. + * LINK TAXONOMY:
  58917. + *
  58918. + * Many objects have a reference count, and when the reference count reaches 0
  58919. + * the object's deletion method is invoked. Some links embody a reference
  58920. + * count increase ("countlinks"), and others do not ("nocountlinks").
  58921. + *
  58922. + * Some links are bi-directional links ("bilinks"), and some are
  58923. + * uni-directional("unilinks").
  58924. + *
  58925. + * Some links are between parts of the same object ("intralinks"), and some are
  58926. + * between different objects ("interlinks").
  58927. + *
  58928. + * PACKING TAXONOMY:
  58929. + *
  58930. + * Some items of an object are stored with a major packing locality based on
  58931. + * their object's objectid (e.g. unix directory items in plan A), and these are
  58932. + * called "self-major-packed".
  58933. + *
  58934. + * Some items of an object are stored with a major packing locality based on
  58935. + * their semantic parent object's objectid (e.g. unix file bodies in plan A),
  58936. + * and these are called "parent-major-packed".
  58937. + *
  58938. + * Some items of an object are stored with a major packing locality based on
  58939. + * their semantic grandparent, and these are called "grandparent-major-packed".
  58940. + * Now carefully notice that we run into trouble with key length if we have to
  58941. + * store a 8 byte major+minor grandparent based packing locality, an 8 byte
  58942. + * parent objectid, an 8 byte attribute objectid, and an 8 byte offset, all in
  58943. + * a 24 byte key. One of these fields must be sacrificed if an item is to be
  58944. + * grandparent-major-packed, and which to sacrifice is left to the item author
  58945. + * choosing to make the item grandparent-major-packed. You cannot make tail
  58946. + * items and extent items grandparent-major-packed, though you could make them
  58947. + * self-major-packed (usually they are parent-major-packed).
  58948. + *
  58949. + * In the case of ACLs (which are composed of fixed length ACEs which consist
  58950. + * of {subject-type, subject, and permission bitmask} triples), it makes sense
  58951. + * to not have an offset field in the ACE item key, and to allow duplicate keys
  58952. + * for ACEs. Thus, the set of ACES for a given file is found by looking for a
  58953. + * key consisting of the objectid of the grandparent (thus grouping all ACLs in
  58954. + * a directory together), the minor packing locality of ACE, the objectid of
  58955. + * the file, and 0.
  58956. + *
  58957. + * IO involves moving data from one location to another, which means that two
  58958. + * locations must be specified, source and destination.
  58959. + *
  58960. + * This source and destination can be in the filesystem, or they can be a
  58961. + * pointer in the user process address space plus a byte count.
  58962. + *
  58963. + * If both source and destination are in the filesystem, then at least one of
  58964. + * them must be representable as a pure stream of bytes (which we call a flow,
  58965. + * and define as a struct containing a key, a data pointer, and a length).
  58966. + * This may mean converting one of them into a flow. We provide a generic
  58967. + * cast_into_flow() method, which will work for any plugin supporting
  58968. + * read_flow(), though it is inefficiently implemented in that it temporarily
  58969. + * stores the flow in a buffer (Question: what to do with huge flows that
  58970. + * cannot fit into memory? Answer: we must not convert them all at once. )
  58971. + *
  58972. + * Performing a write requires resolving the write request into a flow defining
  58973. + * the source, and a method that performs the write, and a key that defines
  58974. + * where in the tree the write is to go.
  58975. + *
  58976. + * Performing a read requires resolving the read request into a flow defining
  58977. + * the target, and a method that performs the read, and a key that defines
  58978. + * where in the tree the read is to come from.
  58979. + *
  58980. + * There will exist file plugins which have no pluginid stored on the disk for
  58981. + * them, and which are only invoked by other plugins.
  58982. + */
  58983. +
  58984. +/*
  58985. + * This should be incremented in every release which adds one
  58986. + * or more new plugins.
  58987. + * NOTE: Make sure that respective marco is also incremented in
  58988. + * the new release of reiser4progs.
  58989. + */
  58990. +#define PLUGIN_LIBRARY_VERSION 2
  58991. +
  58992. + /* enumeration of fields within plugin_set */
  58993. +typedef enum {
  58994. + PSET_FILE,
  58995. + PSET_DIR, /* PSET_FILE and PSET_DIR should be first
  58996. + * elements: inode.c:read_inode() depends on
  58997. + * this. */
  58998. + PSET_PERM,
  58999. + PSET_FORMATTING,
  59000. + PSET_HASH,
  59001. + PSET_FIBRATION,
  59002. + PSET_SD,
  59003. + PSET_DIR_ITEM,
  59004. + PSET_CIPHER,
  59005. + PSET_DIGEST,
  59006. + PSET_COMPRESSION,
  59007. + PSET_COMPRESSION_MODE,
  59008. + PSET_CLUSTER,
  59009. + PSET_CREATE,
  59010. + PSET_LAST
  59011. +} pset_member;
  59012. +
  59013. +/* builtin file-plugins */
  59014. +typedef enum {
  59015. + /* regular file */
  59016. + UNIX_FILE_PLUGIN_ID,
  59017. + /* directory */
  59018. + DIRECTORY_FILE_PLUGIN_ID,
  59019. + /* symlink */
  59020. + SYMLINK_FILE_PLUGIN_ID,
  59021. + /* for objects completely handled by the VFS: fifos, devices,
  59022. + sockets */
  59023. + SPECIAL_FILE_PLUGIN_ID,
  59024. + /* regular cryptcompress file */
  59025. + CRYPTCOMPRESS_FILE_PLUGIN_ID,
  59026. + /* number of file plugins. Used as size of arrays to hold
  59027. + file plugins. */
  59028. + LAST_FILE_PLUGIN_ID
  59029. +} reiser4_file_id;
  59030. +
  59031. +typedef struct file_plugin {
  59032. +
  59033. + /* generic fields */
  59034. + plugin_header h;
  59035. +
  59036. + /* VFS methods */
  59037. + struct inode_operations * inode_ops;
  59038. + struct file_operations * file_ops;
  59039. + struct address_space_operations * as_ops;
  59040. + /**
  59041. + * Private methods. These are optional. If used they will allow you
  59042. + * to minimize the amount of code needed to implement a deviation
  59043. + * from some other method that also uses them.
  59044. + */
  59045. + /*
  59046. + * private inode_ops
  59047. + */
  59048. + int (*setattr)(struct dentry *, struct iattr *);
  59049. + /*
  59050. + * private file_ops
  59051. + */
  59052. + /* do whatever is necessary to do when object is opened */
  59053. + int (*open) (struct inode *inode, struct file *file);
  59054. + ssize_t (*read) (struct file *, char __user *buf, size_t read_amount,
  59055. + loff_t *off);
  59056. + /* write as much as possible bytes from nominated @write_amount
  59057. + * before plugin scheduling is occurred. Save scheduling state
  59058. + * in @cont */
  59059. + ssize_t (*write) (struct file *, const char __user *buf,
  59060. + size_t write_amount, loff_t * off,
  59061. + struct dispatch_context * cont);
  59062. + int (*ioctl) (struct file *filp, unsigned int cmd, unsigned long arg);
  59063. + int (*mmap) (struct file *, struct vm_area_struct *);
  59064. + int (*release) (struct inode *, struct file *);
  59065. + /*
  59066. + * private a_ops
  59067. + */
  59068. + int (*readpage) (struct file *file, struct page *page);
  59069. + int (*readpages)(struct file *file, struct address_space *mapping,
  59070. + struct list_head *pages, unsigned nr_pages);
  59071. + int (*writepages)(struct address_space *mapping,
  59072. + struct writeback_control *wbc);
  59073. + int (*write_begin)(struct file *file, struct page *page,
  59074. + loff_t pos, unsigned len, void **fsdata);
  59075. + int (*write_end)(struct file *file, struct page *page,
  59076. + loff_t pos, unsigned copied, void *fsdata);
  59077. + sector_t (*bmap) (struct address_space * mapping, sector_t lblock);
  59078. + /* other private methods */
  59079. + /* save inode cached stat-data onto disk. It was called
  59080. + reiserfs_update_sd() in 3.x */
  59081. + int (*write_sd_by_inode) (struct inode *);
  59082. + /*
  59083. + * Construct flow into @flow according to user-supplied data.
  59084. + *
  59085. + * This is used by read/write methods to construct a flow to
  59086. + * write/read. ->flow_by_inode() is plugin method, rather than single
  59087. + * global implementation, because key in a flow used by plugin may
  59088. + * depend on data in a @buf.
  59089. + *
  59090. + * NIKITA-FIXME-HANS: please create statistics on what functions are
  59091. + * dereferenced how often for the mongo benchmark. You can supervise
  59092. + * Elena doing this for you if that helps. Email me the list of the
  59093. + * top 10, with their counts, and an estimate of the total number of
  59094. + * CPU cycles spent dereferencing as a percentage of CPU cycles spent
  59095. + * processing (non-idle processing). If the total percent is, say,
  59096. + * less than 1%, it will make our coding discussions much easier, and
  59097. + * keep me from questioning whether functions like the below are too
  59098. + * frequently called to be dereferenced. If the total percent is more
  59099. + * than 1%, perhaps private methods should be listed in a "required"
  59100. + * comment at the top of each plugin (with stern language about how if
  59101. + * the comment is missing it will not be accepted by the maintainer),
  59102. + * and implemented using macros not dereferenced functions. How about
  59103. + * replacing this whole private methods part of the struct with a
  59104. + * thorough documentation of what the standard helper functions are for
  59105. + * use in constructing plugins? I think users have been asking for
  59106. + * that, though not in so many words.
  59107. + */
  59108. + int (*flow_by_inode) (struct inode *, const char __user *buf,
  59109. + int user, loff_t size,
  59110. + loff_t off, rw_op op, flow_t *);
  59111. + /*
  59112. + * Return the key used to retrieve an offset of a file. It is used by
  59113. + * default implementation of ->flow_by_inode() method
  59114. + * (common_build_flow()) and, among other things, to get to the extent
  59115. + * from jnode of unformatted node.
  59116. + */
  59117. + int (*key_by_inode) (struct inode *, loff_t off, reiser4_key *);
  59118. +
  59119. + /* NIKITA-FIXME-HANS: this comment is not as clear to others as you
  59120. + * think.... */
  59121. + /*
  59122. + * set the plugin for a file. Called during file creation in creat()
  59123. + * but not reiser4() unless an inode already exists for the file.
  59124. + */
  59125. + int (*set_plug_in_inode) (struct inode *inode, struct inode *parent,
  59126. + reiser4_object_create_data *);
  59127. +
  59128. + /* NIKITA-FIXME-HANS: comment and name seem to say different things,
  59129. + * are you setting up the object itself also or just adjusting the
  59130. + * parent?.... */
  59131. + /* set up plugins for new @object created in @parent. @root is root
  59132. + directory. */
  59133. + int (*adjust_to_parent) (struct inode *object, struct inode *parent,
  59134. + struct inode *root);
  59135. + /*
  59136. + * this does whatever is necessary to do when object is created. For
  59137. + * instance, for unix files stat data is inserted. It is supposed to be
  59138. + * called by create of struct inode_operations.
  59139. + */
  59140. + int (*create_object) (struct inode *object, struct inode *parent,
  59141. + reiser4_object_create_data *);
  59142. + /*
  59143. + * this method should check REISER4_NO_SD and set REISER4_NO_SD on
  59144. + * success. Deletion of an object usually includes removal of items
  59145. + * building file body (for directories this is removal of "." and "..")
  59146. + * and removal of stat-data item.
  59147. + */
  59148. + int (*delete_object) (struct inode *);
  59149. +
  59150. + /* add link from @parent to @object */
  59151. + int (*add_link) (struct inode *object, struct inode *parent);
  59152. +
  59153. + /* remove link from @parent to @object */
  59154. + int (*rem_link) (struct inode *object, struct inode *parent);
  59155. +
  59156. + /*
  59157. + * return true if item addressed by @coord belongs to @inode. This is
  59158. + * used by read/write to properly slice flow into items in presence of
  59159. + * multiple key assignment policies, because items of a file are not
  59160. + * necessarily contiguous in a key space, for example, in a plan-b.
  59161. + */
  59162. + int (*owns_item) (const struct inode *, const coord_t *);
  59163. +
  59164. + /* checks whether yet another hard links to this object can be
  59165. + added */
  59166. + int (*can_add_link) (const struct inode *);
  59167. +
  59168. + /* checks whether hard links to this object can be removed */
  59169. + int (*can_rem_link) (const struct inode *);
  59170. +
  59171. + /* not empty for DIRECTORY_FILE_PLUGIN_ID only currently. It calls
  59172. + detach of directory plugin to remove ".." */
  59173. + int (*detach) (struct inode *child, struct inode *parent);
  59174. +
  59175. + /* called when @child was just looked up in the @parent. It is not
  59176. + empty for DIRECTORY_FILE_PLUGIN_ID only where it calls attach of
  59177. + directory plugin */
  59178. + int (*bind) (struct inode *child, struct inode *parent);
  59179. +
  59180. + /* process safe-link during mount */
  59181. + int (*safelink) (struct inode *object, reiser4_safe_link_t link,
  59182. + __u64 value);
  59183. +
  59184. + /* The couple of estimate methods for all file operations */
  59185. + struct {
  59186. + reiser4_block_nr(*create) (const struct inode *);
  59187. + reiser4_block_nr(*update) (const struct inode *);
  59188. + reiser4_block_nr(*unlink) (const struct inode *,
  59189. + const struct inode *);
  59190. + } estimate;
  59191. +
  59192. + /*
  59193. + * reiser4 specific part of inode has a union of structures which are
  59194. + * specific to a plugin. This method is called when inode is read
  59195. + * (read_inode) and when file is created (common_create_child) so that
  59196. + * file plugin could initialize its inode data
  59197. + */
  59198. + void (*init_inode_data) (struct inode *, reiser4_object_create_data * ,
  59199. + int);
  59200. +
  59201. + /*
  59202. + * This method performs progressive deletion of items and whole nodes
  59203. + * from right to left.
  59204. + *
  59205. + * @tap: the point deletion process begins from,
  59206. + * @from_key: the beginning of the deleted key range,
  59207. + * @to_key: the end of the deleted key range,
  59208. + * @smallest_removed: the smallest removed key,
  59209. + *
  59210. + * @return: 0 if success, error code otherwise, -E_REPEAT means that
  59211. + * long cut_tree operation was interrupted for allowing atom commit .
  59212. + */
  59213. + int (*cut_tree_worker) (tap_t *, const reiser4_key * from_key,
  59214. + const reiser4_key * to_key,
  59215. + reiser4_key * smallest_removed, struct inode *,
  59216. + int, int *);
  59217. +
  59218. + /* called from ->destroy_inode() */
  59219. + void (*destroy_inode) (struct inode *);
  59220. +
  59221. + /*
  59222. + * methods to serialize object identify. This is used, for example, by
  59223. + * reiser4_{en,de}code_fh().
  59224. + */
  59225. + struct {
  59226. + /* store object's identity at @area */
  59227. + char *(*write) (struct inode *inode, char *area);
  59228. + /* parse object from wire to the @obj */
  59229. + char *(*read) (char *area, reiser4_object_on_wire * obj);
  59230. + /* given object identity in @obj, find or create its dentry */
  59231. + struct dentry *(*get) (struct super_block *s,
  59232. + reiser4_object_on_wire * obj);
  59233. + /* how many bytes ->wire.write() consumes */
  59234. + int (*size) (struct inode *inode);
  59235. + /* finish with object identify */
  59236. + void (*done) (reiser4_object_on_wire * obj);
  59237. + } wire;
  59238. +} file_plugin;
  59239. +
  59240. +extern file_plugin file_plugins[LAST_FILE_PLUGIN_ID];
  59241. +
  59242. +struct reiser4_object_on_wire {
  59243. + file_plugin *plugin;
  59244. + union {
  59245. + struct {
  59246. + obj_key_id key_id;
  59247. + } std;
  59248. + void *generic;
  59249. + } u;
  59250. +};
  59251. +
  59252. +/* builtin dir-plugins */
  59253. +typedef enum {
  59254. + HASHED_DIR_PLUGIN_ID,
  59255. + SEEKABLE_HASHED_DIR_PLUGIN_ID,
  59256. + LAST_DIR_ID
  59257. +} reiser4_dir_id;
  59258. +
  59259. +typedef struct dir_plugin {
  59260. + /* generic fields */
  59261. + plugin_header h;
  59262. +
  59263. + struct inode_operations * inode_ops;
  59264. + struct file_operations * file_ops;
  59265. + struct address_space_operations * as_ops;
  59266. +
  59267. + /*
  59268. + * private methods: These are optional. If used they will allow you to
  59269. + * minimize the amount of code needed to implement a deviation from
  59270. + * some other method that uses them. You could logically argue that
  59271. + * they should be a separate type of plugin.
  59272. + */
  59273. +
  59274. + struct dentry *(*get_parent) (struct inode *childdir);
  59275. +
  59276. + /*
  59277. + * check whether "name" is acceptable name to be inserted into this
  59278. + * object. Optionally implemented by directory-like objects. Can check
  59279. + * for maximal length, reserved symbols etc
  59280. + */
  59281. + int (*is_name_acceptable) (const struct inode *inode, const char *name,
  59282. + int len);
  59283. +
  59284. + void (*build_entry_key) (const struct inode *dir /* directory where
  59285. + * entry is (or will
  59286. + * be) in.*/ ,
  59287. + const struct qstr *name /* name of file
  59288. + * referenced by this
  59289. + * entry */ ,
  59290. + reiser4_key * result /* resulting key of
  59291. + * directory entry */ );
  59292. + int (*build_readdir_key) (struct file *dir, reiser4_key * result);
  59293. + int (*add_entry) (struct inode *object, struct dentry *where,
  59294. + reiser4_object_create_data * data,
  59295. + reiser4_dir_entry_desc * entry);
  59296. + int (*rem_entry) (struct inode *object, struct dentry *where,
  59297. + reiser4_dir_entry_desc * entry);
  59298. +
  59299. + /*
  59300. + * initialize directory structure for newly created object. For normal
  59301. + * unix directories, insert dot and dotdot.
  59302. + */
  59303. + int (*init) (struct inode *object, struct inode *parent,
  59304. + reiser4_object_create_data * data);
  59305. +
  59306. + /* destroy directory */
  59307. + int (*done) (struct inode *child);
  59308. +
  59309. + /* called when @subdir was just looked up in the @dir */
  59310. + int (*attach) (struct inode *subdir, struct inode *dir);
  59311. + int (*detach) (struct inode *subdir, struct inode *dir);
  59312. +
  59313. + struct {
  59314. + reiser4_block_nr(*add_entry) (const struct inode *);
  59315. + reiser4_block_nr(*rem_entry) (const struct inode *);
  59316. + reiser4_block_nr(*unlink) (const struct inode *,
  59317. + const struct inode *);
  59318. + } estimate;
  59319. +} dir_plugin;
  59320. +
  59321. +extern dir_plugin dir_plugins[LAST_DIR_ID];
  59322. +
  59323. +typedef struct formatting_plugin {
  59324. + /* generic fields */
  59325. + plugin_header h;
  59326. + /* returns non-zero iff file's tail has to be stored
  59327. + in a direct item. */
  59328. + int (*have_tail) (const struct inode *inode, loff_t size);
  59329. +} formatting_plugin;
  59330. +
  59331. +/**
  59332. + * Plugins of this interface implement different transaction models.
  59333. + * Transaction model is a high-level block allocator, which assigns block
  59334. + * numbers to dirty nodes, and, thereby, decides, how individual dirty
  59335. + * nodes of an atom will be committed.
  59336. + */
  59337. +typedef struct txmod_plugin {
  59338. + /* generic fields */
  59339. + plugin_header h;
  59340. + /**
  59341. + * allocate blocks in the FORWARD PARENT-FIRST context
  59342. + * for formatted nodes
  59343. + */
  59344. + int (*forward_alloc_formatted)(znode *node, const coord_t *parent_coord,
  59345. + flush_pos_t *pos); //was allocate_znode_loaded
  59346. + /**
  59347. + * allocate blocks in the REVERSE PARENT-FIRST context
  59348. + * for formatted nodes
  59349. + */
  59350. + int (*reverse_alloc_formatted)(jnode * node,
  59351. + const coord_t *parent_coord,
  59352. + flush_pos_t *pos); // was reverse_relocate_test
  59353. + /**
  59354. + * allocate blocks in the FORWARD PARENT-FIRST context
  59355. + * for unformatted nodes.
  59356. + *
  59357. + * This is called by handle_pos_on_twig to proceed extent unit
  59358. + * flush_pos->coord is set to. It is to prepare for flushing
  59359. + * sequence of not flushprepped nodes (slum). It supposes that
  59360. + * slum starts at flush_pos->pos_in_unit position within the extent
  59361. + */
  59362. + int (*forward_alloc_unformatted)(flush_pos_t *flush_pos); //was reiser4_alloc_extent
  59363. + /**
  59364. + * allocale blocks for unformatted nodes in squeeze_right_twig().
  59365. + * @coord is set to extent unit
  59366. + */
  59367. + squeeze_result (*squeeze_alloc_unformatted)(znode *left,
  59368. + const coord_t *coord,
  59369. + flush_pos_t *flush_pos,
  59370. + reiser4_key *stop_key); // was_squalloc_extent
  59371. +} txmod_plugin;
  59372. +
  59373. +typedef struct hash_plugin {
  59374. + /* generic fields */
  59375. + plugin_header h;
  59376. + /* computes hash of the given name */
  59377. + __u64(*hash) (const unsigned char *name, int len);
  59378. +} hash_plugin;
  59379. +
  59380. +typedef struct cipher_plugin {
  59381. + /* generic fields */
  59382. + plugin_header h;
  59383. + struct crypto_blkcipher * (*alloc) (void);
  59384. + void (*free) (struct crypto_blkcipher *tfm);
  59385. + /* Offset translator. For each offset this returns (k * offset), where
  59386. + k (k >= 1) is an expansion factor of the cipher algorithm.
  59387. + For all symmetric algorithms k == 1. For asymmetric algorithms (which
  59388. + inflate data) offset translation guarantees that all disk cluster's
  59389. + units will have keys smaller then next cluster's one.
  59390. + */
  59391. + loff_t(*scale) (struct inode *inode, size_t blocksize, loff_t src);
  59392. + /* Cipher algorithms can accept data only by chunks of cipher block
  59393. + size. This method is to align any flow up to cipher block size when
  59394. + we pass it to cipher algorithm. To align means to append padding of
  59395. + special format specific to the cipher algorithm */
  59396. + int (*align_stream) (__u8 *tail, int clust_size, int blocksize);
  59397. + /* low-level key manager (check, install, etc..) */
  59398. + int (*setkey) (struct crypto_tfm *tfm, const __u8 *key,
  59399. + unsigned int keylen);
  59400. + /* main text processing procedures */
  59401. + void (*encrypt) (__u32 *expkey, __u8 *dst, const __u8 *src);
  59402. + void (*decrypt) (__u32 *expkey, __u8 *dst, const __u8 *src);
  59403. +} cipher_plugin;
  59404. +
  59405. +typedef struct digest_plugin {
  59406. + /* generic fields */
  59407. + plugin_header h;
  59408. + /* fingerprint size in bytes */
  59409. + int fipsize;
  59410. + struct crypto_hash * (*alloc) (void);
  59411. + void (*free) (struct crypto_hash *tfm);
  59412. +} digest_plugin;
  59413. +
  59414. +typedef struct compression_plugin {
  59415. + /* generic fields */
  59416. + plugin_header h;
  59417. + int (*init) (void);
  59418. + /* the maximum number of bytes the size of the "compressed" data can
  59419. + * exceed the uncompressed data. */
  59420. + int (*overrun) (unsigned src_len);
  59421. + coa_t(*alloc) (tfm_action act);
  59422. + void (*free) (coa_t coa, tfm_action act);
  59423. + /* minimal size of the flow we still try to compress */
  59424. + int (*min_size_deflate) (void);
  59425. + __u32(*checksum) (char *data, __u32 length);
  59426. + /* main transform procedures */
  59427. + void (*compress) (coa_t coa, __u8 *src_first, size_t src_len,
  59428. + __u8 *dst_first, size_t *dst_len);
  59429. + void (*decompress) (coa_t coa, __u8 *src_first, size_t src_len,
  59430. + __u8 *dst_first, size_t *dst_len);
  59431. +} compression_plugin;
  59432. +
  59433. +typedef struct compression_mode_plugin {
  59434. + /* generic fields */
  59435. + plugin_header h;
  59436. + /* this is called when estimating compressibility
  59437. + of a logical cluster by its content */
  59438. + int (*should_deflate) (struct inode *inode, cloff_t index);
  59439. + /* this is called when results of compression should be saved */
  59440. + int (*accept_hook) (struct inode *inode, cloff_t index);
  59441. + /* this is called when results of compression should be discarded */
  59442. + int (*discard_hook) (struct inode *inode, cloff_t index);
  59443. +} compression_mode_plugin;
  59444. +
  59445. +typedef struct cluster_plugin {
  59446. + /* generic fields */
  59447. + plugin_header h;
  59448. + int shift;
  59449. +} cluster_plugin;
  59450. +
  59451. +typedef struct sd_ext_plugin {
  59452. + /* generic fields */
  59453. + plugin_header h;
  59454. + int (*present) (struct inode *inode, char **area, int *len);
  59455. + int (*absent) (struct inode *inode);
  59456. + int (*save_len) (struct inode *inode);
  59457. + int (*save) (struct inode *inode, char **area);
  59458. + /* alignment requirement for this stat-data part */
  59459. + int alignment;
  59460. +} sd_ext_plugin;
  59461. +
  59462. +/* this plugin contains methods to allocate objectid for newly created files,
  59463. + to deallocate objectid when file gets removed, to report number of used and
  59464. + free objectids */
  59465. +typedef struct oid_allocator_plugin {
  59466. + /* generic fields */
  59467. + plugin_header h;
  59468. + int (*init_oid_allocator) (reiser4_oid_allocator * map, __u64 nr_files,
  59469. + __u64 oids);
  59470. + /* used to report statfs->f_files */
  59471. + __u64(*oids_used) (reiser4_oid_allocator * map);
  59472. + /* get next oid to use */
  59473. + __u64(*next_oid) (reiser4_oid_allocator * map);
  59474. + /* used to report statfs->f_ffree */
  59475. + __u64(*oids_free) (reiser4_oid_allocator * map);
  59476. + /* allocate new objectid */
  59477. + int (*allocate_oid) (reiser4_oid_allocator * map, oid_t *);
  59478. + /* release objectid */
  59479. + int (*release_oid) (reiser4_oid_allocator * map, oid_t);
  59480. + /* how many pages to reserve in transaction for allocation of new
  59481. + objectid */
  59482. + int (*oid_reserve_allocate) (reiser4_oid_allocator * map);
  59483. + /* how many pages to reserve in transaction for freeing of an
  59484. + objectid */
  59485. + int (*oid_reserve_release) (reiser4_oid_allocator * map);
  59486. + void (*print_info) (const char *, reiser4_oid_allocator *);
  59487. +} oid_allocator_plugin;
  59488. +
  59489. +/* disk layout plugin: this specifies super block, journal, bitmap (if there
  59490. + are any) locations, etc */
  59491. +typedef struct disk_format_plugin {
  59492. + /* generic fields */
  59493. + plugin_header h;
  59494. + /* replay journal, initialize super_info_data, etc */
  59495. + int (*init_format) (struct super_block *, void *data);
  59496. +
  59497. + /* key of root directory stat data */
  59498. + const reiser4_key * (*root_dir_key) (const struct super_block *);
  59499. +
  59500. + int (*release) (struct super_block *);
  59501. + jnode * (*log_super) (struct super_block *);
  59502. + int (*check_open) (const struct inode *object);
  59503. + int (*version_update) (struct super_block *);
  59504. +} disk_format_plugin;
  59505. +
  59506. +struct jnode_plugin {
  59507. + /* generic fields */
  59508. + plugin_header h;
  59509. + int (*init) (jnode * node);
  59510. + int (*parse) (jnode * node);
  59511. + struct address_space *(*mapping) (const jnode * node);
  59512. + unsigned long (*index) (const jnode * node);
  59513. + jnode * (*clone) (jnode * node);
  59514. +};
  59515. +
  59516. +/* plugin instance. */
  59517. +/* */
  59518. +/* This is "wrapper" union for all types of plugins. Most of the code uses */
  59519. +/* plugins of particular type (file_plugin, dir_plugin, etc.) rather than */
  59520. +/* operates with pointers to reiser4_plugin. This union is only used in */
  59521. +/* some generic code in plugin/plugin.c that operates on all */
  59522. +/* plugins. Technically speaking purpose of this union is to add type */
  59523. +/* safety to said generic code: each plugin type (file_plugin, for */
  59524. +/* example), contains plugin_header as its first memeber. This first member */
  59525. +/* is located at the same place in memory as .h member of */
  59526. +/* reiser4_plugin. Generic code, obtains pointer to reiser4_plugin and */
  59527. +/* looks in the .h which is header of plugin type located in union. This */
  59528. +/* allows to avoid type-casts. */
  59529. +union reiser4_plugin {
  59530. + /* generic fields */
  59531. + plugin_header h;
  59532. + /* file plugin */
  59533. + file_plugin file;
  59534. + /* directory plugin */
  59535. + dir_plugin dir;
  59536. + /* hash plugin, used by directory plugin */
  59537. + hash_plugin hash;
  59538. + /* fibration plugin used by directory plugin */
  59539. + fibration_plugin fibration;
  59540. + /* cipher transform plugin, used by file plugin */
  59541. + cipher_plugin cipher;
  59542. + /* digest transform plugin, used by file plugin */
  59543. + digest_plugin digest;
  59544. + /* compression transform plugin, used by file plugin */
  59545. + compression_plugin compression;
  59546. + /* tail plugin, used by file plugin */
  59547. + formatting_plugin formatting;
  59548. + /* permission plugin */
  59549. + perm_plugin perm;
  59550. + /* node plugin */
  59551. + node_plugin node;
  59552. + /* item plugin */
  59553. + item_plugin item;
  59554. + /* stat-data extension plugin */
  59555. + sd_ext_plugin sd_ext;
  59556. + /* disk layout plugin */
  59557. + disk_format_plugin format;
  59558. + /* object id allocator plugin */
  59559. + oid_allocator_plugin oid_allocator;
  59560. + /* plugin for different jnode types */
  59561. + jnode_plugin jnode;
  59562. + /* compression mode plugin, used by object plugin */
  59563. + compression_mode_plugin compression_mode;
  59564. + /* cluster plugin, used by object plugin */
  59565. + cluster_plugin clust;
  59566. + /* transaction mode plugin */
  59567. + txmod_plugin txmod;
  59568. + /* place-holder for new plugin types that can be registered
  59569. + dynamically, and used by other dynamically loaded plugins. */
  59570. + void *generic;
  59571. +};
  59572. +
  59573. +struct reiser4_plugin_ops {
  59574. + /* called when plugin is initialized */
  59575. + int (*init) (reiser4_plugin * plugin);
  59576. + /* called when plugin is unloaded */
  59577. + int (*done) (reiser4_plugin * plugin);
  59578. + /* load given plugin from disk */
  59579. + int (*load) (struct inode *inode,
  59580. + reiser4_plugin * plugin, char **area, int *len);
  59581. + /* how many space is required to store this plugin's state
  59582. + in stat-data */
  59583. + int (*save_len) (struct inode *inode, reiser4_plugin * plugin);
  59584. + /* save persistent plugin-data to disk */
  59585. + int (*save) (struct inode *inode, reiser4_plugin * plugin,
  59586. + char **area);
  59587. + /* alignment requirement for on-disk state of this plugin
  59588. + in number of bytes */
  59589. + int alignment;
  59590. + /* install itself into given inode. This can return error
  59591. + (e.g., you cannot change hash of non-empty directory). */
  59592. + int (*change) (struct inode *inode, reiser4_plugin * plugin,
  59593. + pset_member memb);
  59594. + /* install itself into given inode. This can return error
  59595. + (e.g., you cannot change hash of non-empty directory). */
  59596. + int (*inherit) (struct inode *inode, struct inode *parent,
  59597. + reiser4_plugin * plugin);
  59598. +};
  59599. +
  59600. +/* functions implemented in fs/reiser4/plugin/plugin.c */
  59601. +
  59602. +/* stores plugin reference in reiser4-specific part of inode */
  59603. +extern int set_object_plugin(struct inode *inode, reiser4_plugin_id id);
  59604. +extern int init_plugins(void);
  59605. +
  59606. +/* builtin plugins */
  59607. +
  59608. +/* builtin hash-plugins */
  59609. +
  59610. +typedef enum {
  59611. + RUPASOV_HASH_ID,
  59612. + R5_HASH_ID,
  59613. + TEA_HASH_ID,
  59614. + FNV1_HASH_ID,
  59615. + DEGENERATE_HASH_ID,
  59616. + LAST_HASH_ID
  59617. +} reiser4_hash_id;
  59618. +
  59619. +/* builtin cipher plugins */
  59620. +
  59621. +typedef enum {
  59622. + NONE_CIPHER_ID,
  59623. + LAST_CIPHER_ID
  59624. +} reiser4_cipher_id;
  59625. +
  59626. +/* builtin digest plugins */
  59627. +
  59628. +typedef enum {
  59629. + SHA256_32_DIGEST_ID,
  59630. + LAST_DIGEST_ID
  59631. +} reiser4_digest_id;
  59632. +
  59633. +/* builtin compression mode plugins */
  59634. +typedef enum {
  59635. + NONE_COMPRESSION_MODE_ID,
  59636. + LATTD_COMPRESSION_MODE_ID,
  59637. + ULTIM_COMPRESSION_MODE_ID,
  59638. + FORCE_COMPRESSION_MODE_ID,
  59639. + CONVX_COMPRESSION_MODE_ID,
  59640. + LAST_COMPRESSION_MODE_ID
  59641. +} reiser4_compression_mode_id;
  59642. +
  59643. +/* builtin cluster plugins */
  59644. +typedef enum {
  59645. + CLUSTER_64K_ID,
  59646. + CLUSTER_32K_ID,
  59647. + CLUSTER_16K_ID,
  59648. + CLUSTER_8K_ID,
  59649. + CLUSTER_4K_ID,
  59650. + LAST_CLUSTER_ID
  59651. +} reiser4_cluster_id;
  59652. +
  59653. +/* builtin tail packing policies */
  59654. +typedef enum {
  59655. + NEVER_TAILS_FORMATTING_ID,
  59656. + ALWAYS_TAILS_FORMATTING_ID,
  59657. + SMALL_FILE_FORMATTING_ID,
  59658. + LAST_TAIL_FORMATTING_ID
  59659. +} reiser4_formatting_id;
  59660. +
  59661. +/* builtin transaction models */
  59662. +typedef enum {
  59663. + HYBRID_TXMOD_ID,
  59664. + JOURNAL_TXMOD_ID,
  59665. + WA_TXMOD_ID,
  59666. + LAST_TXMOD_ID
  59667. +} reiser4_txmod_id;
  59668. +
  59669. +
  59670. +/* data type used to pack parameters that we pass to vfs object creation
  59671. + function create_object() */
  59672. +struct reiser4_object_create_data {
  59673. + /* plugin to control created object */
  59674. + reiser4_file_id id;
  59675. + /* mode of regular file, directory or special file */
  59676. +/* what happens if some other sort of perm plugin is in use? */
  59677. + umode_t mode;
  59678. + /* rdev of special file */
  59679. + dev_t rdev;
  59680. + /* symlink target */
  59681. + const char *name;
  59682. + /* add here something for non-standard objects you invent, like
  59683. + query for interpolation file etc. */
  59684. +
  59685. + struct reiser4_crypto_info *crypto;
  59686. +
  59687. + struct inode *parent;
  59688. + struct dentry *dentry;
  59689. +};
  59690. +
  59691. +/* description of directory entry being created/destroyed/sought for
  59692. +
  59693. + It is passed down to the directory plugin and farther to the
  59694. + directory item plugin methods. Creation of new directory is done in
  59695. + several stages: first we search for an entry with the same name, then
  59696. + create new one. reiser4_dir_entry_desc is used to store some information
  59697. + collected at some stage of this process and required later: key of
  59698. + item that we want to insert/delete and pointer to an object that will
  59699. + be bound by the new directory entry. Probably some more fields will
  59700. + be added there.
  59701. +
  59702. +*/
  59703. +struct reiser4_dir_entry_desc {
  59704. + /* key of directory entry */
  59705. + reiser4_key key;
  59706. + /* object bound by this entry. */
  59707. + struct inode *obj;
  59708. +};
  59709. +
  59710. +#define MAX_PLUGIN_TYPE_LABEL_LEN 32
  59711. +#define MAX_PLUGIN_PLUG_LABEL_LEN 32
  59712. +
  59713. +#define PLUGIN_BY_ID(TYPE, ID, FIELD) \
  59714. +static inline TYPE *TYPE ## _by_id(reiser4_plugin_id id) \
  59715. +{ \
  59716. + reiser4_plugin *plugin = plugin_by_id(ID, id); \
  59717. + return plugin ? &plugin->FIELD : NULL; \
  59718. +} \
  59719. +static inline TYPE *TYPE ## _by_disk_id(reiser4_tree * tree, d16 *id) \
  59720. +{ \
  59721. + reiser4_plugin *plugin = plugin_by_disk_id(tree, ID, id); \
  59722. + return plugin ? &plugin->FIELD : NULL; \
  59723. +} \
  59724. +static inline TYPE *TYPE ## _by_unsafe_id(reiser4_plugin_id id) \
  59725. +{ \
  59726. + reiser4_plugin *plugin = plugin_by_unsafe_id(ID, id); \
  59727. + return plugin ? &plugin->FIELD : NULL; \
  59728. +} \
  59729. +static inline reiser4_plugin* TYPE ## _to_plugin(TYPE* plugin) \
  59730. +{ \
  59731. + return (reiser4_plugin *) plugin; \
  59732. +} \
  59733. +static inline reiser4_plugin_id TYPE ## _id(TYPE* plugin) \
  59734. +{ \
  59735. + return TYPE ## _to_plugin(plugin)->h.id; \
  59736. +} \
  59737. +typedef struct { int foo; } TYPE ## _plugin_dummy
  59738. +
  59739. +static inline int get_release_number_major(void)
  59740. +{
  59741. + return LAST_FORMAT_ID - 1;
  59742. +}
  59743. +
  59744. +static inline int get_release_number_minor(void)
  59745. +{
  59746. + return PLUGIN_LIBRARY_VERSION;
  59747. +}
  59748. +
  59749. +PLUGIN_BY_ID(item_plugin, REISER4_ITEM_PLUGIN_TYPE, item);
  59750. +PLUGIN_BY_ID(file_plugin, REISER4_FILE_PLUGIN_TYPE, file);
  59751. +PLUGIN_BY_ID(dir_plugin, REISER4_DIR_PLUGIN_TYPE, dir);
  59752. +PLUGIN_BY_ID(node_plugin, REISER4_NODE_PLUGIN_TYPE, node);
  59753. +PLUGIN_BY_ID(sd_ext_plugin, REISER4_SD_EXT_PLUGIN_TYPE, sd_ext);
  59754. +PLUGIN_BY_ID(perm_plugin, REISER4_PERM_PLUGIN_TYPE, perm);
  59755. +PLUGIN_BY_ID(hash_plugin, REISER4_HASH_PLUGIN_TYPE, hash);
  59756. +PLUGIN_BY_ID(fibration_plugin, REISER4_FIBRATION_PLUGIN_TYPE, fibration);
  59757. +PLUGIN_BY_ID(cipher_plugin, REISER4_CIPHER_PLUGIN_TYPE, cipher);
  59758. +PLUGIN_BY_ID(digest_plugin, REISER4_DIGEST_PLUGIN_TYPE, digest);
  59759. +PLUGIN_BY_ID(compression_plugin, REISER4_COMPRESSION_PLUGIN_TYPE, compression);
  59760. +PLUGIN_BY_ID(formatting_plugin, REISER4_FORMATTING_PLUGIN_TYPE, formatting);
  59761. +PLUGIN_BY_ID(disk_format_plugin, REISER4_FORMAT_PLUGIN_TYPE, format);
  59762. +PLUGIN_BY_ID(jnode_plugin, REISER4_JNODE_PLUGIN_TYPE, jnode);
  59763. +PLUGIN_BY_ID(compression_mode_plugin, REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
  59764. + compression_mode);
  59765. +PLUGIN_BY_ID(cluster_plugin, REISER4_CLUSTER_PLUGIN_TYPE, clust);
  59766. +PLUGIN_BY_ID(txmod_plugin, REISER4_TXMOD_PLUGIN_TYPE, txmod);
  59767. +
  59768. +extern int save_plugin_id(reiser4_plugin * plugin, d16 * area);
  59769. +
  59770. +extern struct list_head *get_plugin_list(reiser4_plugin_type type_id);
  59771. +
  59772. +#define for_all_plugins(ptype, plugin) \
  59773. +for (plugin = list_entry(get_plugin_list(ptype)->next, reiser4_plugin, h.linkage); \
  59774. + get_plugin_list(ptype) != &plugin->h.linkage; \
  59775. + plugin = list_entry(plugin->h.linkage.next, reiser4_plugin, h.linkage))
  59776. +
  59777. +
  59778. +extern int grab_plugin_pset(struct inode *self, struct inode *ancestor,
  59779. + pset_member memb);
  59780. +extern int force_plugin_pset(struct inode *self, pset_member memb,
  59781. + reiser4_plugin *plug);
  59782. +extern int finish_pset(struct inode *inode);
  59783. +
  59784. +/* defined in fs/reiser4/plugin/object.c */
  59785. +extern file_plugin file_plugins[LAST_FILE_PLUGIN_ID];
  59786. +/* defined in fs/reiser4/plugin/object.c */
  59787. +extern dir_plugin dir_plugins[LAST_DIR_ID];
  59788. +/* defined in fs/reiser4/plugin/item/static_stat.c */
  59789. +extern sd_ext_plugin sd_ext_plugins[LAST_SD_EXTENSION];
  59790. +/* defined in fs/reiser4/plugin/hash.c */
  59791. +extern hash_plugin hash_plugins[LAST_HASH_ID];
  59792. +/* defined in fs/reiser4/plugin/fibration.c */
  59793. +extern fibration_plugin fibration_plugins[LAST_FIBRATION_ID];
  59794. +/* defined in fs/reiser4/plugin/txmod.c */
  59795. +extern txmod_plugin txmod_plugins[LAST_TXMOD_ID];
  59796. +/* defined in fs/reiser4/plugin/crypt.c */
  59797. +extern cipher_plugin cipher_plugins[LAST_CIPHER_ID];
  59798. +/* defined in fs/reiser4/plugin/digest.c */
  59799. +extern digest_plugin digest_plugins[LAST_DIGEST_ID];
  59800. +/* defined in fs/reiser4/plugin/compress/compress.c */
  59801. +extern compression_plugin compression_plugins[LAST_COMPRESSION_ID];
  59802. +/* defined in fs/reiser4/plugin/compress/compression_mode.c */
  59803. +extern compression_mode_plugin
  59804. +compression_mode_plugins[LAST_COMPRESSION_MODE_ID];
  59805. +/* defined in fs/reiser4/plugin/cluster.c */
  59806. +extern cluster_plugin cluster_plugins[LAST_CLUSTER_ID];
  59807. +/* defined in fs/reiser4/plugin/tail.c */
  59808. +extern formatting_plugin formatting_plugins[LAST_TAIL_FORMATTING_ID];
  59809. +/* defined in fs/reiser4/plugin/security/security.c */
  59810. +extern perm_plugin perm_plugins[LAST_PERM_ID];
  59811. +/* defined in fs/reiser4/plugin/item/item.c */
  59812. +extern item_plugin item_plugins[LAST_ITEM_ID];
  59813. +/* defined in fs/reiser4/plugin/node/node.c */
  59814. +extern node_plugin node_plugins[LAST_NODE_ID];
  59815. +/* defined in fs/reiser4/plugin/disk_format/disk_format.c */
  59816. +extern disk_format_plugin format_plugins[LAST_FORMAT_ID];
  59817. +
  59818. +/* __FS_REISER4_PLUGIN_TYPES_H__ */
  59819. +#endif
  59820. +
  59821. +/* Make Linus happy.
  59822. + Local variables:
  59823. + c-indentation-style: "K&R"
  59824. + mode-name: "LC"
  59825. + c-basic-offset: 8
  59826. + tab-width: 8
  59827. + fill-column: 120
  59828. + End:
  59829. +*/
  59830. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/plugin_header.h linux-4.14.2/fs/reiser4/plugin/plugin_header.h
  59831. --- linux-4.14.2.orig/fs/reiser4/plugin/plugin_header.h 1970-01-01 01:00:00.000000000 +0100
  59832. +++ linux-4.14.2/fs/reiser4/plugin/plugin_header.h 2017-11-26 22:13:09.000000000 +0100
  59833. @@ -0,0 +1,150 @@
  59834. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  59835. +
  59836. +/* plugin header. Data structures required by all plugin types. */
  59837. +
  59838. +#if !defined(__PLUGIN_HEADER_H__)
  59839. +#define __PLUGIN_HEADER_H__
  59840. +
  59841. +/* plugin data-types and constants */
  59842. +
  59843. +#include "../debug.h"
  59844. +#include "../dformat.h"
  59845. +
  59846. +/* The list of Reiser4 interfaces */
  59847. +typedef enum {
  59848. + REISER4_FILE_PLUGIN_TYPE, /* manage VFS objects */
  59849. + REISER4_DIR_PLUGIN_TYPE, /* manage directories */
  59850. + REISER4_ITEM_PLUGIN_TYPE, /* manage items */
  59851. + REISER4_NODE_PLUGIN_TYPE, /* manage formatted nodes */
  59852. + REISER4_HASH_PLUGIN_TYPE, /* hash methods */
  59853. + REISER4_FIBRATION_PLUGIN_TYPE, /* directory fibrations */
  59854. + REISER4_FORMATTING_PLUGIN_TYPE, /* dispatching policy */
  59855. + REISER4_PERM_PLUGIN_TYPE, /* stub (vacancy) */
  59856. + REISER4_SD_EXT_PLUGIN_TYPE, /* manage stat-data extensions */
  59857. + REISER4_FORMAT_PLUGIN_TYPE, /* disk format specifications */
  59858. + REISER4_JNODE_PLUGIN_TYPE, /* manage in-memory headers */
  59859. + REISER4_CIPHER_PLUGIN_TYPE, /* cipher transform methods */
  59860. + REISER4_DIGEST_PLUGIN_TYPE, /* digest transform methods */
  59861. + REISER4_COMPRESSION_PLUGIN_TYPE, /* compression methods */
  59862. + REISER4_COMPRESSION_MODE_PLUGIN_TYPE, /* dispatching policies */
  59863. + REISER4_CLUSTER_PLUGIN_TYPE, /* manage logical clusters */
  59864. + REISER4_TXMOD_PLUGIN_TYPE, /* transaction models */
  59865. + REISER4_PLUGIN_TYPES
  59866. +} reiser4_plugin_type;
  59867. +
  59868. +/* Supported plugin groups */
  59869. +typedef enum {
  59870. + REISER4_DIRECTORY_FILE,
  59871. + REISER4_REGULAR_FILE,
  59872. + REISER4_SYMLINK_FILE,
  59873. + REISER4_SPECIAL_FILE,
  59874. +} file_plugin_group;
  59875. +
  59876. +struct reiser4_plugin_ops;
  59877. +/* generic plugin operations, supported by each
  59878. + plugin type. */
  59879. +typedef struct reiser4_plugin_ops reiser4_plugin_ops;
  59880. +
  59881. +/* the common part of all plugin instances. */
  59882. +typedef struct plugin_header {
  59883. + /* plugin type */
  59884. + reiser4_plugin_type type_id;
  59885. + /* id of this plugin */
  59886. + reiser4_plugin_id id;
  59887. + /* bitmask of groups the plugin belongs to. */
  59888. + reiser4_plugin_groups groups;
  59889. + /* plugin operations */
  59890. + reiser4_plugin_ops *pops;
  59891. +/* NIKITA-FIXME-HANS: usage of and access to label and desc is not commented and
  59892. + * defined. */
  59893. + /* short label of this plugin */
  59894. + const char *label;
  59895. + /* descriptive string.. */
  59896. + const char *desc;
  59897. + /* list linkage */
  59898. + struct list_head linkage;
  59899. +} plugin_header;
  59900. +
  59901. +#define plugin_of_group(plug, group) (plug->h.groups & (1 << group))
  59902. +
  59903. +/* PRIVATE INTERFACES */
  59904. +/* NIKITA-FIXME-HANS: what is this for and why does it duplicate what is in
  59905. + * plugin_header? */
  59906. +/* plugin type representation. */
  59907. +struct reiser4_plugin_type_data {
  59908. + /* internal plugin type identifier. Should coincide with
  59909. + index of this item in plugins[] array. */
  59910. + reiser4_plugin_type type_id;
  59911. + /* short symbolic label of this plugin type. Should be no longer
  59912. + than MAX_PLUGIN_TYPE_LABEL_LEN characters including '\0'. */
  59913. + const char *label;
  59914. + /* plugin type description longer than .label */
  59915. + const char *desc;
  59916. +
  59917. +/* NIKITA-FIXME-HANS: define built-in */
  59918. + /* number of built-in plugin instances of this type */
  59919. + int builtin_num;
  59920. + /* array of built-in plugins */
  59921. + void *builtin;
  59922. + struct list_head plugins_list;
  59923. + size_t size;
  59924. +};
  59925. +
  59926. +extern struct reiser4_plugin_type_data plugins[REISER4_PLUGIN_TYPES];
  59927. +
  59928. +int is_plugin_type_valid(reiser4_plugin_type type);
  59929. +int is_plugin_id_valid(reiser4_plugin_type type, reiser4_plugin_id id);
  59930. +
  59931. +static inline reiser4_plugin *plugin_at(struct reiser4_plugin_type_data *ptype,
  59932. + int i)
  59933. +{
  59934. + char *builtin;
  59935. +
  59936. + builtin = ptype->builtin;
  59937. + return (reiser4_plugin *) (builtin + i * ptype->size);
  59938. +}
  59939. +
  59940. +/* return plugin by its @type_id and @id */
  59941. +static inline reiser4_plugin *plugin_by_id(reiser4_plugin_type type,
  59942. + reiser4_plugin_id id)
  59943. +{
  59944. + assert("nikita-1651", is_plugin_type_valid(type));
  59945. + assert("nikita-1652", is_plugin_id_valid(type, id));
  59946. + return plugin_at(&plugins[type], id);
  59947. +}
  59948. +
  59949. +extern reiser4_plugin *plugin_by_unsafe_id(reiser4_plugin_type type_id,
  59950. + reiser4_plugin_id id);
  59951. +
  59952. +/**
  59953. + * plugin_by_disk_id - get reiser4_plugin
  59954. + * @type_id: plugin type id
  59955. + * @did: plugin id in disk format
  59956. + *
  59957. + * Returns reiser4_plugin by plugin type id an dplugin_id.
  59958. + */
  59959. +static inline reiser4_plugin *plugin_by_disk_id(reiser4_tree * tree UNUSED_ARG,
  59960. + reiser4_plugin_type type_id,
  59961. + __le16 *plugin_id)
  59962. +{
  59963. + /*
  59964. + * what we should do properly is to maintain within each file-system a
  59965. + * dictionary that maps on-disk plugin ids to "universal" ids. This
  59966. + * dictionary will be resolved on mount time, so that this function
  59967. + * will perform just one additional array lookup.
  59968. + */
  59969. + return plugin_by_unsafe_id(type_id, le16_to_cpu(*plugin_id));
  59970. +}
  59971. +
  59972. +/* __PLUGIN_HEADER_H__ */
  59973. +#endif
  59974. +
  59975. +/*
  59976. + * Local variables:
  59977. + * c-indentation-style: "K&R"
  59978. + * mode-name: "LC"
  59979. + * c-basic-offset: 8
  59980. + * tab-width: 8
  59981. + * fill-column: 79
  59982. + * End:
  59983. + */
  59984. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/plugin_set.c linux-4.14.2/fs/reiser4/plugin/plugin_set.c
  59985. --- linux-4.14.2.orig/fs/reiser4/plugin/plugin_set.c 1970-01-01 01:00:00.000000000 +0100
  59986. +++ linux-4.14.2/fs/reiser4/plugin/plugin_set.c 2017-11-26 22:13:09.000000000 +0100
  59987. @@ -0,0 +1,387 @@
  59988. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  59989. + * reiser4/README */
  59990. +/* This file contains Reiser4 plugin set operations */
  59991. +
  59992. +/* plugin sets
  59993. + *
  59994. + * Each file in reiser4 is controlled by a whole set of plugins (file plugin,
  59995. + * directory plugin, hash plugin, tail policy plugin, security plugin, etc.)
  59996. + * assigned (inherited, deduced from mode bits, etc.) at creation time. This
  59997. + * set of plugins (so called pset) is described by structure plugin_set (see
  59998. + * plugin/plugin_set.h), which contains pointers to all required plugins.
  59999. + *
  60000. + * Children can inherit some pset members from their parent, however sometimes
  60001. + * it is useful to specify members different from parent ones. Since object's
  60002. + * pset can not be easily changed without fatal consequences, we use for this
  60003. + * purpose another special plugin table (so called hset, or heir set) described
  60004. + * by the same structure.
  60005. + *
  60006. + * Inode only stores a pointers to pset and hset. Different inodes with the
  60007. + * same set of pset (hset) members point to the same pset (hset). This is
  60008. + * archived by storing psets and hsets in global hash table. Races are avoided
  60009. + * by simple (and efficient so far) solution of never recycling psets, even
  60010. + * when last inode pointing to it is destroyed.
  60011. + */
  60012. +
  60013. +#include "../debug.h"
  60014. +#include "../super.h"
  60015. +#include "plugin_set.h"
  60016. +
  60017. +#include <linux/slab.h>
  60018. +#include <linux/stddef.h>
  60019. +
  60020. +/* slab for plugin sets */
  60021. +static struct kmem_cache *plugin_set_slab;
  60022. +
  60023. +static spinlock_t plugin_set_lock[8] __cacheline_aligned_in_smp = {
  60024. + __SPIN_LOCK_UNLOCKED(plugin_set_lock[0]),
  60025. + __SPIN_LOCK_UNLOCKED(plugin_set_lock[1]),
  60026. + __SPIN_LOCK_UNLOCKED(plugin_set_lock[2]),
  60027. + __SPIN_LOCK_UNLOCKED(plugin_set_lock[3]),
  60028. + __SPIN_LOCK_UNLOCKED(plugin_set_lock[4]),
  60029. + __SPIN_LOCK_UNLOCKED(plugin_set_lock[5]),
  60030. + __SPIN_LOCK_UNLOCKED(plugin_set_lock[6]),
  60031. + __SPIN_LOCK_UNLOCKED(plugin_set_lock[7])
  60032. +};
  60033. +
  60034. +/* hash table support */
  60035. +
  60036. +#define PS_TABLE_SIZE (32)
  60037. +
  60038. +static inline plugin_set *cast_to(const unsigned long *a)
  60039. +{
  60040. + return container_of(a, plugin_set, hashval);
  60041. +}
  60042. +
  60043. +static inline int pseq(const unsigned long *a1, const unsigned long *a2)
  60044. +{
  60045. + plugin_set *set1;
  60046. + plugin_set *set2;
  60047. +
  60048. + /* make sure fields are not missed in the code below */
  60049. + cassert(sizeof *set1 ==
  60050. + sizeof set1->hashval +
  60051. + sizeof set1->link +
  60052. + sizeof set1->file +
  60053. + sizeof set1->dir +
  60054. + sizeof set1->perm +
  60055. + sizeof set1->formatting +
  60056. + sizeof set1->hash +
  60057. + sizeof set1->fibration +
  60058. + sizeof set1->sd +
  60059. + sizeof set1->dir_item +
  60060. + sizeof set1->cipher +
  60061. + sizeof set1->digest +
  60062. + sizeof set1->compression +
  60063. + sizeof set1->compression_mode +
  60064. + sizeof set1->cluster +
  60065. + sizeof set1->create);
  60066. +
  60067. + set1 = cast_to(a1);
  60068. + set2 = cast_to(a2);
  60069. + return
  60070. + set1->hashval == set2->hashval &&
  60071. + set1->file == set2->file &&
  60072. + set1->dir == set2->dir &&
  60073. + set1->perm == set2->perm &&
  60074. + set1->formatting == set2->formatting &&
  60075. + set1->hash == set2->hash &&
  60076. + set1->fibration == set2->fibration &&
  60077. + set1->sd == set2->sd &&
  60078. + set1->dir_item == set2->dir_item &&
  60079. + set1->cipher == set2->cipher &&
  60080. + set1->digest == set2->digest &&
  60081. + set1->compression == set2->compression &&
  60082. + set1->compression_mode == set2->compression_mode &&
  60083. + set1->cluster == set2->cluster &&
  60084. + set1->create == set2->create;
  60085. +}
  60086. +
  60087. +#define HASH_FIELD(hash, set, field) \
  60088. +({ \
  60089. + (hash) += (unsigned long)(set)->field >> 2; \
  60090. +})
  60091. +
  60092. +static inline unsigned long calculate_hash(const plugin_set * set)
  60093. +{
  60094. + unsigned long result;
  60095. +
  60096. + result = 0;
  60097. + HASH_FIELD(result, set, file);
  60098. + HASH_FIELD(result, set, dir);
  60099. + HASH_FIELD(result, set, perm);
  60100. + HASH_FIELD(result, set, formatting);
  60101. + HASH_FIELD(result, set, hash);
  60102. + HASH_FIELD(result, set, fibration);
  60103. + HASH_FIELD(result, set, sd);
  60104. + HASH_FIELD(result, set, dir_item);
  60105. + HASH_FIELD(result, set, cipher);
  60106. + HASH_FIELD(result, set, digest);
  60107. + HASH_FIELD(result, set, compression);
  60108. + HASH_FIELD(result, set, compression_mode);
  60109. + HASH_FIELD(result, set, cluster);
  60110. + HASH_FIELD(result, set, create);
  60111. + return result & (PS_TABLE_SIZE - 1);
  60112. +}
  60113. +
  60114. +static inline unsigned long
  60115. +pshash(ps_hash_table * table, const unsigned long *a)
  60116. +{
  60117. + return *a;
  60118. +}
  60119. +
  60120. +/* The hash table definition */
  60121. +#define KMALLOC(size) kmalloc((size), reiser4_ctx_gfp_mask_get())
  60122. +#define KFREE(ptr, size) kfree(ptr)
  60123. +TYPE_SAFE_HASH_DEFINE(ps, plugin_set, unsigned long, hashval, link, pshash,
  60124. + pseq);
  60125. +#undef KFREE
  60126. +#undef KMALLOC
  60127. +
  60128. +static ps_hash_table ps_table;
  60129. +static plugin_set empty_set = {
  60130. + .hashval = 0,
  60131. + .file = NULL,
  60132. + .dir = NULL,
  60133. + .perm = NULL,
  60134. + .formatting = NULL,
  60135. + .hash = NULL,
  60136. + .fibration = NULL,
  60137. + .sd = NULL,
  60138. + .dir_item = NULL,
  60139. + .cipher = NULL,
  60140. + .digest = NULL,
  60141. + .compression = NULL,
  60142. + .compression_mode = NULL,
  60143. + .cluster = NULL,
  60144. + .create = NULL,
  60145. + .link = {NULL}
  60146. +};
  60147. +
  60148. +plugin_set *plugin_set_get_empty(void)
  60149. +{
  60150. + return &empty_set;
  60151. +}
  60152. +
  60153. +void plugin_set_put(plugin_set * set)
  60154. +{
  60155. +}
  60156. +
  60157. +static inline unsigned long *pset_field(plugin_set * set, int offset)
  60158. +{
  60159. + return (unsigned long *)(((char *)set) + offset);
  60160. +}
  60161. +
  60162. +static int plugin_set_field(plugin_set ** set, const unsigned long val,
  60163. + const int offset)
  60164. +{
  60165. + unsigned long *spot;
  60166. + spinlock_t *lock;
  60167. + plugin_set replica;
  60168. + plugin_set *twin;
  60169. + plugin_set *psal;
  60170. + plugin_set *orig;
  60171. +
  60172. + assert("nikita-2902", set != NULL);
  60173. + assert("nikita-2904", *set != NULL);
  60174. +
  60175. + spot = pset_field(*set, offset);
  60176. + if (unlikely(*spot == val))
  60177. + return 0;
  60178. +
  60179. + replica = *(orig = *set);
  60180. + *pset_field(&replica, offset) = val;
  60181. + replica.hashval = calculate_hash(&replica);
  60182. + rcu_read_lock();
  60183. + twin = ps_hash_find(&ps_table, &replica.hashval);
  60184. + if (unlikely(twin == NULL)) {
  60185. + rcu_read_unlock();
  60186. + psal = kmem_cache_alloc(plugin_set_slab,
  60187. + reiser4_ctx_gfp_mask_get());
  60188. + if (psal == NULL)
  60189. + return RETERR(-ENOMEM);
  60190. + *psal = replica;
  60191. + lock = &plugin_set_lock[replica.hashval & 7];
  60192. + spin_lock(lock);
  60193. + twin = ps_hash_find(&ps_table, &replica.hashval);
  60194. + if (likely(twin == NULL)) {
  60195. + *set = psal;
  60196. + ps_hash_insert_rcu(&ps_table, psal);
  60197. + } else {
  60198. + *set = twin;
  60199. + kmem_cache_free(plugin_set_slab, psal);
  60200. + }
  60201. + spin_unlock(lock);
  60202. + } else {
  60203. + rcu_read_unlock();
  60204. + *set = twin;
  60205. + }
  60206. + return 0;
  60207. +}
  60208. +
  60209. +static struct {
  60210. + int offset;
  60211. + reiser4_plugin_groups groups;
  60212. + reiser4_plugin_type type;
  60213. +} pset_descr[PSET_LAST] = {
  60214. + [PSET_FILE] = {
  60215. + .offset = offsetof(plugin_set, file),
  60216. + .type = REISER4_FILE_PLUGIN_TYPE,
  60217. + .groups = 0
  60218. + },
  60219. + [PSET_DIR] = {
  60220. + .offset = offsetof(plugin_set, dir),
  60221. + .type = REISER4_DIR_PLUGIN_TYPE,
  60222. + .groups = 0
  60223. + },
  60224. + [PSET_PERM] = {
  60225. + .offset = offsetof(plugin_set, perm),
  60226. + .type = REISER4_PERM_PLUGIN_TYPE,
  60227. + .groups = 0
  60228. + },
  60229. + [PSET_FORMATTING] = {
  60230. + .offset = offsetof(plugin_set, formatting),
  60231. + .type = REISER4_FORMATTING_PLUGIN_TYPE,
  60232. + .groups = 0
  60233. + },
  60234. + [PSET_HASH] = {
  60235. + .offset = offsetof(plugin_set, hash),
  60236. + .type = REISER4_HASH_PLUGIN_TYPE,
  60237. + .groups = 0
  60238. + },
  60239. + [PSET_FIBRATION] = {
  60240. + .offset = offsetof(plugin_set, fibration),
  60241. + .type = REISER4_FIBRATION_PLUGIN_TYPE,
  60242. + .groups = 0
  60243. + },
  60244. + [PSET_SD] = {
  60245. + .offset = offsetof(plugin_set, sd),
  60246. + .type = REISER4_ITEM_PLUGIN_TYPE,
  60247. + .groups = (1 << STAT_DATA_ITEM_TYPE)
  60248. + },
  60249. + [PSET_DIR_ITEM] = {
  60250. + .offset = offsetof(plugin_set, dir_item),
  60251. + .type = REISER4_ITEM_PLUGIN_TYPE,
  60252. + .groups = (1 << DIR_ENTRY_ITEM_TYPE)
  60253. + },
  60254. + [PSET_CIPHER] = {
  60255. + .offset = offsetof(plugin_set, cipher),
  60256. + .type = REISER4_CIPHER_PLUGIN_TYPE,
  60257. + .groups = 0
  60258. + },
  60259. + [PSET_DIGEST] = {
  60260. + .offset = offsetof(plugin_set, digest),
  60261. + .type = REISER4_DIGEST_PLUGIN_TYPE,
  60262. + .groups = 0
  60263. + },
  60264. + [PSET_COMPRESSION] = {
  60265. + .offset = offsetof(plugin_set, compression),
  60266. + .type = REISER4_COMPRESSION_PLUGIN_TYPE,
  60267. + .groups = 0
  60268. + },
  60269. + [PSET_COMPRESSION_MODE] = {
  60270. + .offset = offsetof(plugin_set, compression_mode),
  60271. + .type = REISER4_COMPRESSION_MODE_PLUGIN_TYPE,
  60272. + .groups = 0
  60273. + },
  60274. + [PSET_CLUSTER] = {
  60275. + .offset = offsetof(plugin_set, cluster),
  60276. + .type = REISER4_CLUSTER_PLUGIN_TYPE,
  60277. + .groups = 0
  60278. + },
  60279. + [PSET_CREATE] = {
  60280. + .offset = offsetof(plugin_set, create),
  60281. + .type = REISER4_FILE_PLUGIN_TYPE,
  60282. + .groups = (1 << REISER4_REGULAR_FILE)
  60283. + }
  60284. +};
  60285. +
  60286. +#define DEFINE_PSET_OPS(PREFIX) \
  60287. + reiser4_plugin_type PREFIX##_member_to_type_unsafe(pset_member memb) \
  60288. +{ \
  60289. + if (memb > PSET_LAST) \
  60290. + return REISER4_PLUGIN_TYPES; \
  60291. + return pset_descr[memb].type; \
  60292. +} \
  60293. + \
  60294. +int PREFIX##_set_unsafe(plugin_set ** set, pset_member memb, \
  60295. + reiser4_plugin * plugin) \
  60296. +{ \
  60297. + assert("nikita-3492", set != NULL); \
  60298. + assert("nikita-3493", *set != NULL); \
  60299. + assert("nikita-3494", plugin != NULL); \
  60300. + assert("nikita-3495", 0 <= memb && memb < PSET_LAST); \
  60301. + assert("nikita-3496", plugin->h.type_id == pset_descr[memb].type); \
  60302. + \
  60303. + if (pset_descr[memb].groups) \
  60304. + if (!(pset_descr[memb].groups & plugin->h.groups)) \
  60305. + return -EINVAL; \
  60306. + \
  60307. + return plugin_set_field(set, \
  60308. + (unsigned long)plugin, pset_descr[memb].offset); \
  60309. +} \
  60310. + \
  60311. +reiser4_plugin *PREFIX##_get(plugin_set * set, pset_member memb) \
  60312. +{ \
  60313. + assert("nikita-3497", set != NULL); \
  60314. + assert("nikita-3498", 0 <= memb && memb < PSET_LAST); \
  60315. + \
  60316. + return *(reiser4_plugin **) (((char *)set) + pset_descr[memb].offset); \
  60317. +}
  60318. +
  60319. +DEFINE_PSET_OPS(aset);
  60320. +
  60321. +int set_plugin(plugin_set ** set, pset_member memb, reiser4_plugin * plugin)
  60322. +{
  60323. + return plugin_set_field(set,
  60324. + (unsigned long)plugin, pset_descr[memb].offset);
  60325. +}
  60326. +
  60327. +/**
  60328. + * init_plugin_set - create plugin set cache and hash table
  60329. + *
  60330. + * Initializes slab cache of plugin_set-s and their hash table. It is part of
  60331. + * reiser4 module initialization.
  60332. + */
  60333. +int init_plugin_set(void)
  60334. +{
  60335. + int result;
  60336. +
  60337. + result = ps_hash_init(&ps_table, PS_TABLE_SIZE);
  60338. + if (result == 0) {
  60339. + plugin_set_slab = kmem_cache_create("plugin_set",
  60340. + sizeof(plugin_set), 0,
  60341. + SLAB_HWCACHE_ALIGN,
  60342. + NULL);
  60343. + if (plugin_set_slab == NULL)
  60344. + result = RETERR(-ENOMEM);
  60345. + }
  60346. + return result;
  60347. +}
  60348. +
  60349. +/**
  60350. + * done_plugin_set - delete plugin_set cache and plugin_set hash table
  60351. + *
  60352. + * This is called on reiser4 module unloading or system shutdown.
  60353. + */
  60354. +void done_plugin_set(void)
  60355. +{
  60356. + plugin_set *cur, *next;
  60357. +
  60358. + for_all_in_htable(&ps_table, ps, cur, next) {
  60359. + ps_hash_remove(&ps_table, cur);
  60360. + kmem_cache_free(plugin_set_slab, cur);
  60361. + }
  60362. + destroy_reiser4_cache(&plugin_set_slab);
  60363. + ps_hash_done(&ps_table);
  60364. +}
  60365. +
  60366. +/*
  60367. + * Local variables:
  60368. + * c-indentation-style: "K&R"
  60369. + * mode-name: "LC"
  60370. + * c-basic-offset: 8
  60371. + * tab-width: 8
  60372. + * fill-column: 120
  60373. + * End:
  60374. + */
  60375. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/plugin_set.h linux-4.14.2/fs/reiser4/plugin/plugin_set.h
  60376. --- linux-4.14.2.orig/fs/reiser4/plugin/plugin_set.h 1970-01-01 01:00:00.000000000 +0100
  60377. +++ linux-4.14.2/fs/reiser4/plugin/plugin_set.h 2017-11-26 22:13:09.000000000 +0100
  60378. @@ -0,0 +1,78 @@
  60379. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  60380. + * reiser4/README */
  60381. +
  60382. +/* Reiser4 plugin set definition.
  60383. + See fs/reiser4/plugin/plugin_set.c for details */
  60384. +
  60385. +#if !defined(__PLUGIN_SET_H__)
  60386. +#define __PLUGIN_SET_H__
  60387. +
  60388. +#include "../type_safe_hash.h"
  60389. +#include "plugin.h"
  60390. +
  60391. +#include <linux/rcupdate.h>
  60392. +
  60393. +struct plugin_set;
  60394. +typedef struct plugin_set plugin_set;
  60395. +
  60396. +TYPE_SAFE_HASH_DECLARE(ps, plugin_set);
  60397. +
  60398. +struct plugin_set {
  60399. + unsigned long hashval;
  60400. + /* plugin of file */
  60401. + file_plugin *file;
  60402. + /* plugin of dir */
  60403. + dir_plugin *dir;
  60404. + /* perm plugin for this file */
  60405. + perm_plugin *perm;
  60406. + /* tail policy plugin. Only meaningful for regular files */
  60407. + formatting_plugin *formatting;
  60408. + /* hash plugin. Only meaningful for directories. */
  60409. + hash_plugin *hash;
  60410. + /* fibration plugin. Only meaningful for directories. */
  60411. + fibration_plugin *fibration;
  60412. + /* plugin of stat-data */
  60413. + item_plugin *sd;
  60414. + /* plugin of items a directory is built of */
  60415. + item_plugin *dir_item;
  60416. + /* cipher plugin */
  60417. + cipher_plugin *cipher;
  60418. + /* digest plugin */
  60419. + digest_plugin *digest;
  60420. + /* compression plugin */
  60421. + compression_plugin *compression;
  60422. + /* compression mode plugin */
  60423. + compression_mode_plugin *compression_mode;
  60424. + /* cluster plugin */
  60425. + cluster_plugin *cluster;
  60426. + /* this specifies file plugin of regular children.
  60427. + only meaningful for directories */
  60428. + file_plugin *create;
  60429. + ps_hash_link link;
  60430. +};
  60431. +
  60432. +extern plugin_set *plugin_set_get_empty(void);
  60433. +extern void plugin_set_put(plugin_set * set);
  60434. +
  60435. +extern int init_plugin_set(void);
  60436. +extern void done_plugin_set(void);
  60437. +
  60438. +extern reiser4_plugin *aset_get(plugin_set * set, pset_member memb);
  60439. +extern int set_plugin(plugin_set ** set, pset_member memb,
  60440. + reiser4_plugin * plugin);
  60441. +extern int aset_set_unsafe(plugin_set ** set, pset_member memb,
  60442. + reiser4_plugin * plugin);
  60443. +extern reiser4_plugin_type aset_member_to_type_unsafe(pset_member memb);
  60444. +
  60445. +/* __PLUGIN_SET_H__ */
  60446. +#endif
  60447. +
  60448. +/* Make Linus happy.
  60449. + Local variables:
  60450. + c-indentation-style: "K&R"
  60451. + mode-name: "LC"
  60452. + c-basic-offset: 8
  60453. + tab-width: 8
  60454. + fill-column: 120
  60455. + End:
  60456. +*/
  60457. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/security/Makefile linux-4.14.2/fs/reiser4/plugin/security/Makefile
  60458. --- linux-4.14.2.orig/fs/reiser4/plugin/security/Makefile 1970-01-01 01:00:00.000000000 +0100
  60459. +++ linux-4.14.2/fs/reiser4/plugin/security/Makefile 2017-11-26 22:13:09.000000000 +0100
  60460. @@ -0,0 +1,4 @@
  60461. +obj-$(CONFIG_REISER4_FS) += security_plugins.o
  60462. +
  60463. +security_plugins-objs := \
  60464. + perm.o
  60465. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/security/perm.c linux-4.14.2/fs/reiser4/plugin/security/perm.c
  60466. --- linux-4.14.2.orig/fs/reiser4/plugin/security/perm.c 1970-01-01 01:00:00.000000000 +0100
  60467. +++ linux-4.14.2/fs/reiser4/plugin/security/perm.c 2017-11-26 22:13:09.000000000 +0100
  60468. @@ -0,0 +1,33 @@
  60469. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  60470. +
  60471. +/*
  60472. + * This file contains implementation of permission plugins.
  60473. + * See the comments in perm.h
  60474. + */
  60475. +
  60476. +#include "../plugin.h"
  60477. +#include "../plugin_header.h"
  60478. +#include "../../debug.h"
  60479. +
  60480. +perm_plugin perm_plugins[LAST_PERM_ID] = {
  60481. + [NULL_PERM_ID] = {
  60482. + .h = {
  60483. + .type_id = REISER4_PERM_PLUGIN_TYPE,
  60484. + .id = NULL_PERM_ID,
  60485. + .pops = NULL,
  60486. + .label = "null",
  60487. + .desc = "stub permission plugin",
  60488. + .linkage = {NULL, NULL}
  60489. + }
  60490. + }
  60491. +};
  60492. +
  60493. +/*
  60494. + * Local variables:
  60495. + * c-indentation-style: "K&R"
  60496. + * mode-name: "LC"
  60497. + * c-basic-offset: 8
  60498. + * tab-width: 8
  60499. + * fill-column: 79
  60500. + * End:
  60501. + */
  60502. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/security/perm.h linux-4.14.2/fs/reiser4/plugin/security/perm.h
  60503. --- linux-4.14.2.orig/fs/reiser4/plugin/security/perm.h 1970-01-01 01:00:00.000000000 +0100
  60504. +++ linux-4.14.2/fs/reiser4/plugin/security/perm.h 2017-11-26 22:13:09.000000000 +0100
  60505. @@ -0,0 +1,38 @@
  60506. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  60507. +
  60508. +/* Perm (short for "permissions") plugins common stuff. */
  60509. +
  60510. +#if !defined( __REISER4_PERM_H__ )
  60511. +#define __REISER4_PERM_H__
  60512. +
  60513. +#include "../../forward.h"
  60514. +#include "../plugin_header.h"
  60515. +
  60516. +#include <linux/types.h>
  60517. +
  60518. +/* Definition of permission plugin */
  60519. +/* NIKITA-FIXME-HANS: define what this is targeted for.
  60520. + It does not seem to be intended for use with sys_reiser4. Explain. */
  60521. +
  60522. +/* NOTE-EDWARD: This seems to be intended for deprecated sys_reiser4.
  60523. + Consider it like a temporary "seam" and reserved pset member.
  60524. + If you have something usefull to add, then rename this plugin and add here */
  60525. +typedef struct perm_plugin {
  60526. + /* generic plugin fields */
  60527. + plugin_header h;
  60528. +} perm_plugin;
  60529. +
  60530. +typedef enum { NULL_PERM_ID, LAST_PERM_ID } reiser4_perm_id;
  60531. +
  60532. +/* __REISER4_PERM_H__ */
  60533. +#endif
  60534. +
  60535. +/* Make Linus happy.
  60536. + Local variables:
  60537. + c-indentation-style: "K&R"
  60538. + mode-name: "LC"
  60539. + c-basic-offset: 8
  60540. + tab-width: 8
  60541. + fill-column: 120
  60542. + End:
  60543. +*/
  60544. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/space/bitmap.c linux-4.14.2/fs/reiser4/plugin/space/bitmap.c
  60545. --- linux-4.14.2.orig/fs/reiser4/plugin/space/bitmap.c 1970-01-01 01:00:00.000000000 +0100
  60546. +++ linux-4.14.2/fs/reiser4/plugin/space/bitmap.c 2017-11-26 22:13:09.000000000 +0100
  60547. @@ -0,0 +1,1609 @@
  60548. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  60549. +
  60550. +#include "../../debug.h"
  60551. +#include "../../dformat.h"
  60552. +#include "../../txnmgr.h"
  60553. +#include "../../jnode.h"
  60554. +#include "../../block_alloc.h"
  60555. +#include "../../tree.h"
  60556. +#include "../../super.h"
  60557. +#include "../plugin.h"
  60558. +#include "space_allocator.h"
  60559. +#include "bitmap.h"
  60560. +
  60561. +#include <linux/types.h>
  60562. +#include <linux/fs.h> /* for struct super_block */
  60563. +#include <linux/mutex.h>
  60564. +#include <asm/div64.h>
  60565. +
  60566. +/* Proposed (but discarded) optimization: dynamic loading/unloading of bitmap
  60567. + * blocks
  60568. +
  60569. + A useful optimization of reiser4 bitmap handling would be dynamic bitmap
  60570. + blocks loading/unloading which is different from v3.x where all bitmap
  60571. + blocks are loaded at mount time.
  60572. +
  60573. + To implement bitmap blocks unloading we need to count bitmap block usage
  60574. + and detect currently unused blocks allowing them to be unloaded. It is not
  60575. + a simple task since we allow several threads to modify one bitmap block
  60576. + simultaneously.
  60577. +
  60578. + Briefly speaking, the following schema is proposed: we count in special
  60579. + variable associated with each bitmap block. That is for counting of block
  60580. + alloc/dealloc operations on that bitmap block. With a deferred block
  60581. + deallocation feature of reiser4 all those operation will be represented in
  60582. + atom dirty/deleted lists as jnodes for freshly allocated or deleted
  60583. + nodes.
  60584. +
  60585. + So, we increment usage counter for each new node allocated or deleted, and
  60586. + decrement it at atom commit one time for each node from the dirty/deleted
  60587. + atom's list. Of course, freshly allocated node deletion and node reusing
  60588. + from atom deleted (if we do so) list should decrement bitmap usage counter
  60589. + also.
  60590. +
  60591. + This schema seems to be working but that reference counting is
  60592. + not easy to debug. I think we should agree with Hans and do not implement
  60593. + it in v4.0. Current code implements "on-demand" bitmap blocks loading only.
  60594. +
  60595. + For simplicity all bitmap nodes (both commit and working bitmap blocks) are
  60596. + loaded into memory on fs mount time or each bitmap nodes are loaded at the
  60597. + first access to it, the "dont_load_bitmap" mount option controls whether
  60598. + bimtap nodes should be loaded at mount time. Dynamic unloading of bitmap
  60599. + nodes currently is not supported. */
  60600. +
  60601. +#define CHECKSUM_SIZE 4
  60602. +
  60603. +#define BYTES_PER_LONG (sizeof(long))
  60604. +
  60605. +#if BITS_PER_LONG == 64
  60606. +# define LONG_INT_SHIFT (6)
  60607. +#else
  60608. +# define LONG_INT_SHIFT (5)
  60609. +#endif
  60610. +
  60611. +#define LONG_INT_MASK (BITS_PER_LONG - 1UL)
  60612. +
  60613. +typedef unsigned long ulong_t;
  60614. +
  60615. +#define bmap_size(blocksize) ((blocksize) - CHECKSUM_SIZE)
  60616. +#define bmap_bit_count(blocksize) (bmap_size(blocksize) << 3)
  60617. +
  60618. +/* Block allocation/deallocation are done through special bitmap objects which
  60619. + are allocated in an array at fs mount. */
  60620. +struct bitmap_node {
  60621. + struct mutex mutex; /* long term lock object */
  60622. +
  60623. + jnode *wjnode; /* j-nodes for WORKING ... */
  60624. + jnode *cjnode; /* ... and COMMIT bitmap blocks */
  60625. +
  60626. + bmap_off_t first_zero_bit; /* for skip_busy option implementation */
  60627. +
  60628. + atomic_t loaded; /* a flag which shows that bnode is loaded
  60629. + * already */
  60630. +};
  60631. +
  60632. +static inline char *bnode_working_data(struct bitmap_node *bnode)
  60633. +{
  60634. + char *data;
  60635. +
  60636. + data = jdata(bnode->wjnode);
  60637. + assert("zam-429", data != NULL);
  60638. +
  60639. + return data + CHECKSUM_SIZE;
  60640. +}
  60641. +
  60642. +static inline char *bnode_commit_data(const struct bitmap_node *bnode)
  60643. +{
  60644. + char *data;
  60645. +
  60646. + data = jdata(bnode->cjnode);
  60647. + assert("zam-430", data != NULL);
  60648. +
  60649. + return data + CHECKSUM_SIZE;
  60650. +}
  60651. +
  60652. +static inline __u32 bnode_commit_crc(const struct bitmap_node *bnode)
  60653. +{
  60654. + char *data;
  60655. +
  60656. + data = jdata(bnode->cjnode);
  60657. + assert("vpf-261", data != NULL);
  60658. +
  60659. + return le32_to_cpu(get_unaligned((d32 *)data));
  60660. +}
  60661. +
  60662. +static inline void bnode_set_commit_crc(struct bitmap_node *bnode, __u32 crc)
  60663. +{
  60664. + char *data;
  60665. +
  60666. + data = jdata(bnode->cjnode);
  60667. + assert("vpf-261", data != NULL);
  60668. +
  60669. + put_unaligned(cpu_to_le32(crc), (d32 *)data);
  60670. +}
  60671. +
  60672. +/* ZAM-FIXME-HANS: is the idea that this might be a union someday? having
  60673. + * written the code, does this added abstraction still have */
  60674. +/* ANSWER(Zam): No, the abstractions is in the level above (exact place is the
  60675. + * reiser4_space_allocator structure) */
  60676. +/* ZAM-FIXME-HANS: I don't understand your english in comment above. */
  60677. +/* FIXME-HANS(Zam): I don't understand the questions like "might be a union
  60678. + * someday?". What they about? If there is a reason to have a union, it should
  60679. + * be a union, if not, it should not be a union. "..might be someday" means no
  60680. + * reason. */
  60681. +struct bitmap_allocator_data {
  60682. + /* an array for bitmap blocks direct access */
  60683. + struct bitmap_node *bitmap;
  60684. +};
  60685. +
  60686. +#define get_barray(super) \
  60687. +(((struct bitmap_allocator_data *)(get_super_private(super)->space_allocator.u.generic)) -> bitmap)
  60688. +
  60689. +#define get_bnode(super, i) (get_barray(super) + i)
  60690. +
  60691. +/* allocate and initialize jnode with JNODE_BITMAP type */
  60692. +static jnode *bnew(void)
  60693. +{
  60694. + jnode *jal = jalloc();
  60695. +
  60696. + if (jal)
  60697. + jnode_init(jal, current_tree, JNODE_BITMAP);
  60698. +
  60699. + return jal;
  60700. +}
  60701. +
  60702. +/* this file contains:
  60703. + - bitmap based implementation of space allocation plugin
  60704. + - all the helper functions like set bit, find_first_zero_bit, etc */
  60705. +
  60706. +/* Audited by: green(2002.06.12) */
  60707. +static int find_next_zero_bit_in_word(ulong_t word, int start_bit)
  60708. +{
  60709. + ulong_t mask = 1UL << start_bit;
  60710. + int i = start_bit;
  60711. +
  60712. + while ((word & mask) != 0) {
  60713. + mask <<= 1;
  60714. + if (++i >= BITS_PER_LONG)
  60715. + break;
  60716. + }
  60717. +
  60718. + return i;
  60719. +}
  60720. +
  60721. +#include <linux/bitops.h>
  60722. +
  60723. +#if BITS_PER_LONG == 64
  60724. +
  60725. +#define OFF(addr) (((ulong_t)(addr) & (BYTES_PER_LONG - 1)) << 3)
  60726. +#define BASE(addr) ((ulong_t*) ((ulong_t)(addr) & ~(BYTES_PER_LONG - 1)))
  60727. +
  60728. +static inline void reiser4_set_bit(int nr, void *addr)
  60729. +{
  60730. + __test_and_set_bit_le(nr + OFF(addr), BASE(addr));
  60731. +}
  60732. +
  60733. +static inline void reiser4_clear_bit(int nr, void *addr)
  60734. +{
  60735. + __test_and_clear_bit_le(nr + OFF(addr), BASE(addr));
  60736. +}
  60737. +
  60738. +static inline int reiser4_test_bit(int nr, void *addr)
  60739. +{
  60740. + return test_bit_le(nr + OFF(addr), BASE(addr));
  60741. +}
  60742. +static inline int reiser4_find_next_zero_bit(void *addr, int maxoffset,
  60743. + int offset)
  60744. +{
  60745. + int off = OFF(addr);
  60746. +
  60747. + return find_next_zero_bit_le(BASE(addr), maxoffset + off,
  60748. + offset + off) - off;
  60749. +}
  60750. +
  60751. +#else
  60752. +
  60753. +#define reiser4_set_bit(nr, addr) __test_and_set_bit_le(nr, addr)
  60754. +#define reiser4_clear_bit(nr, addr) __test_and_clear_bit_le(nr, addr)
  60755. +#define reiser4_test_bit(nr, addr) test_bit_le(nr, addr)
  60756. +
  60757. +#define reiser4_find_next_zero_bit(addr, maxoffset, offset) \
  60758. +find_next_zero_bit_le(addr, maxoffset, offset)
  60759. +#endif
  60760. +
  60761. +/* Search for a set bit in the bit array [@start_offset, @max_offset[, offsets
  60762. + * are counted from @addr, return the offset of the first bit if it is found,
  60763. + * @maxoffset otherwise. */
  60764. +static bmap_off_t __reiser4_find_next_set_bit(void *addr, bmap_off_t max_offset,
  60765. + bmap_off_t start_offset)
  60766. +{
  60767. + ulong_t *base = addr;
  60768. + /* start_offset is in bits, convert it to byte offset within bitmap. */
  60769. + int word_nr = start_offset >> LONG_INT_SHIFT;
  60770. + /* bit number within the byte. */
  60771. + int bit_nr = start_offset & LONG_INT_MASK;
  60772. + int max_word_nr = (max_offset - 1) >> LONG_INT_SHIFT;
  60773. +
  60774. + assert("zam-387", max_offset != 0);
  60775. +
  60776. + /* Unaligned @start_offset case. */
  60777. + if (bit_nr != 0) {
  60778. + bmap_nr_t nr;
  60779. +
  60780. + nr = find_next_zero_bit_in_word(~(base[word_nr]), bit_nr);
  60781. +
  60782. + if (nr < BITS_PER_LONG)
  60783. + return (word_nr << LONG_INT_SHIFT) + nr;
  60784. +
  60785. + ++word_nr;
  60786. + }
  60787. +
  60788. + /* Fast scan trough aligned words. */
  60789. + while (word_nr <= max_word_nr) {
  60790. + if (base[word_nr] != 0) {
  60791. + return (word_nr << LONG_INT_SHIFT)
  60792. + + find_next_zero_bit_in_word(~(base[word_nr]), 0);
  60793. + }
  60794. +
  60795. + ++word_nr;
  60796. + }
  60797. +
  60798. + return max_offset;
  60799. +}
  60800. +
  60801. +#if BITS_PER_LONG == 64
  60802. +
  60803. +static bmap_off_t reiser4_find_next_set_bit(void *addr, bmap_off_t max_offset,
  60804. + bmap_off_t start_offset)
  60805. +{
  60806. + bmap_off_t off = OFF(addr);
  60807. +
  60808. + return __reiser4_find_next_set_bit(BASE(addr), max_offset + off,
  60809. + start_offset + off) - off;
  60810. +}
  60811. +
  60812. +#else
  60813. +#define reiser4_find_next_set_bit(addr, max_offset, start_offset) \
  60814. + __reiser4_find_next_set_bit(addr, max_offset, start_offset)
  60815. +#endif
  60816. +
  60817. +/* search for the first set bit in single word. */
  60818. +static int find_last_set_bit_in_word(ulong_t word, int start_bit)
  60819. +{
  60820. + ulong_t bit_mask;
  60821. + int nr = start_bit;
  60822. +
  60823. + assert("zam-965", start_bit < BITS_PER_LONG);
  60824. + assert("zam-966", start_bit >= 0);
  60825. +
  60826. + bit_mask = (1UL << nr);
  60827. +
  60828. + while (bit_mask != 0) {
  60829. + if (bit_mask & word)
  60830. + return nr;
  60831. + bit_mask >>= 1;
  60832. + nr--;
  60833. + }
  60834. + return BITS_PER_LONG;
  60835. +}
  60836. +
  60837. +/* Search bitmap for a set bit in backward direction from the end to the
  60838. + * beginning of given region
  60839. + *
  60840. + * @result: result offset of the last set bit
  60841. + * @addr: base memory address,
  60842. + * @low_off: low end of the search region, edge bit included into the region,
  60843. + * @high_off: high end of the search region, edge bit included into the region,
  60844. + *
  60845. + * @return: 0 - set bit was found, -1 otherwise.
  60846. + */
  60847. +static int
  60848. +reiser4_find_last_set_bit(bmap_off_t * result, void *addr, bmap_off_t low_off,
  60849. + bmap_off_t high_off)
  60850. +{
  60851. + ulong_t *base = addr;
  60852. + int last_word;
  60853. + int first_word;
  60854. + int last_bit;
  60855. + int nr;
  60856. +
  60857. + assert("zam-962", high_off >= low_off);
  60858. +
  60859. + last_word = high_off >> LONG_INT_SHIFT;
  60860. + last_bit = high_off & LONG_INT_MASK;
  60861. + first_word = low_off >> LONG_INT_SHIFT;
  60862. +
  60863. + if (last_bit < BITS_PER_LONG) {
  60864. + nr = find_last_set_bit_in_word(base[last_word], last_bit);
  60865. + if (nr < BITS_PER_LONG) {
  60866. + *result = (last_word << LONG_INT_SHIFT) + nr;
  60867. + return 0;
  60868. + }
  60869. + --last_word;
  60870. + }
  60871. + while (last_word >= first_word) {
  60872. + if (base[last_word] != 0x0) {
  60873. + last_bit =
  60874. + find_last_set_bit_in_word(base[last_word],
  60875. + BITS_PER_LONG - 1);
  60876. + assert("zam-972", last_bit < BITS_PER_LONG);
  60877. + *result = (last_word << LONG_INT_SHIFT) + last_bit;
  60878. + return 0;
  60879. + }
  60880. + --last_word;
  60881. + }
  60882. +
  60883. + return -1; /* set bit not found */
  60884. +}
  60885. +
  60886. +/* Search bitmap for a clear bit in backward direction from the end to the
  60887. + * beginning of given region */
  60888. +static int
  60889. +reiser4_find_last_zero_bit(bmap_off_t * result, void *addr, bmap_off_t low_off,
  60890. + bmap_off_t high_off)
  60891. +{
  60892. + ulong_t *base = addr;
  60893. + int last_word;
  60894. + int first_word;
  60895. + int last_bit;
  60896. + int nr;
  60897. +
  60898. + last_word = high_off >> LONG_INT_SHIFT;
  60899. + last_bit = high_off & LONG_INT_MASK;
  60900. + first_word = low_off >> LONG_INT_SHIFT;
  60901. +
  60902. + if (last_bit < BITS_PER_LONG) {
  60903. + nr = find_last_set_bit_in_word(~base[last_word], last_bit);
  60904. + if (nr < BITS_PER_LONG) {
  60905. + *result = (last_word << LONG_INT_SHIFT) + nr;
  60906. + return 0;
  60907. + }
  60908. + --last_word;
  60909. + }
  60910. + while (last_word >= first_word) {
  60911. + if (base[last_word] != (ulong_t) (-1)) {
  60912. + *result = (last_word << LONG_INT_SHIFT) +
  60913. + find_last_set_bit_in_word(~base[last_word],
  60914. + BITS_PER_LONG - 1);
  60915. + return 0;
  60916. + }
  60917. + --last_word;
  60918. + }
  60919. +
  60920. + return -1; /* zero bit not found */
  60921. +}
  60922. +
  60923. +/* Audited by: green(2002.06.12) */
  60924. +static void reiser4_clear_bits(char *addr, bmap_off_t start, bmap_off_t end)
  60925. +{
  60926. + int first_byte;
  60927. + int last_byte;
  60928. +
  60929. + unsigned char first_byte_mask = 0xFF;
  60930. + unsigned char last_byte_mask = 0xFF;
  60931. +
  60932. + assert("zam-410", start < end);
  60933. +
  60934. + first_byte = start >> 3;
  60935. + last_byte = (end - 1) >> 3;
  60936. +
  60937. + if (last_byte > first_byte + 1)
  60938. + memset(addr + first_byte + 1, 0,
  60939. + (size_t) (last_byte - first_byte - 1));
  60940. +
  60941. + first_byte_mask >>= 8 - (start & 0x7);
  60942. + last_byte_mask <<= ((end - 1) & 0x7) + 1;
  60943. +
  60944. + if (first_byte == last_byte) {
  60945. + addr[first_byte] &= (first_byte_mask | last_byte_mask);
  60946. + } else {
  60947. + addr[first_byte] &= first_byte_mask;
  60948. + addr[last_byte] &= last_byte_mask;
  60949. + }
  60950. +}
  60951. +
  60952. +/* Audited by: green(2002.06.12) */
  60953. +/* ZAM-FIXME-HANS: comment this */
  60954. +static void reiser4_set_bits(char *addr, bmap_off_t start, bmap_off_t end)
  60955. +{
  60956. + int first_byte;
  60957. + int last_byte;
  60958. +
  60959. + unsigned char first_byte_mask = 0xFF;
  60960. + unsigned char last_byte_mask = 0xFF;
  60961. +
  60962. + assert("zam-386", start < end);
  60963. +
  60964. + first_byte = start >> 3;
  60965. + last_byte = (end - 1) >> 3;
  60966. +
  60967. + if (last_byte > first_byte + 1)
  60968. + memset(addr + first_byte + 1, 0xFF,
  60969. + (size_t) (last_byte - first_byte - 1));
  60970. +
  60971. + first_byte_mask <<= start & 0x7;
  60972. + last_byte_mask >>= 7 - ((end - 1) & 0x7);
  60973. +
  60974. + if (first_byte == last_byte) {
  60975. + addr[first_byte] |= (first_byte_mask & last_byte_mask);
  60976. + } else {
  60977. + addr[first_byte] |= first_byte_mask;
  60978. + addr[last_byte] |= last_byte_mask;
  60979. + }
  60980. +}
  60981. +
  60982. +#define ADLER_BASE 65521
  60983. +#define ADLER_NMAX 5552
  60984. +
  60985. +/* Calculates the adler32 checksum for the data pointed by `data` of the
  60986. + length `len`. This function was originally taken from zlib, version 1.1.3,
  60987. + July 9th, 1998.
  60988. +
  60989. + Copyright (C) 1995-1998 Jean-loup Gailly and Mark Adler
  60990. +
  60991. + This software is provided 'as-is', without any express or implied
  60992. + warranty. In no event will the authors be held liable for any damages
  60993. + arising from the use of this software.
  60994. +
  60995. + Permission is granted to anyone to use this software for any purpose,
  60996. + including commercial applications, and to alter it and redistribute it
  60997. + freely, subject to the following restrictions:
  60998. +
  60999. + 1. The origin of this software must not be misrepresented; you must not
  61000. + claim that you wrote the original software. If you use this software
  61001. + in a product, an acknowledgment in the product documentation would be
  61002. + appreciated but is not required.
  61003. + 2. Altered source versions must be plainly marked as such, and must not be
  61004. + misrepresented as being the original software.
  61005. + 3. This notice may not be removed or altered from any source distribution.
  61006. +
  61007. + Jean-loup Gailly Mark Adler
  61008. + jloup@gzip.org madler@alumni.caltech.edu
  61009. +
  61010. + The above comment applies only to the reiser4_adler32 function.
  61011. +*/
  61012. +
  61013. +__u32 reiser4_adler32(char *data, __u32 len)
  61014. +{
  61015. + unsigned char *t = data;
  61016. + __u32 s1 = 1;
  61017. + __u32 s2 = 0;
  61018. + int k;
  61019. +
  61020. + while (len > 0) {
  61021. + k = len < ADLER_NMAX ? len : ADLER_NMAX;
  61022. + len -= k;
  61023. +
  61024. + while (k--) {
  61025. + s1 += *t++;
  61026. + s2 += s1;
  61027. + }
  61028. +
  61029. + s1 %= ADLER_BASE;
  61030. + s2 %= ADLER_BASE;
  61031. + }
  61032. + return (s2 << 16) | s1;
  61033. +}
  61034. +
  61035. +#define sb_by_bnode(bnode) \
  61036. + ((struct super_block *)jnode_get_tree(bnode->wjnode)->super)
  61037. +
  61038. +static __u32 bnode_calc_crc(const struct bitmap_node *bnode, unsigned long size)
  61039. +{
  61040. + return reiser4_adler32(bnode_commit_data(bnode), bmap_size(size));
  61041. +}
  61042. +
  61043. +static int
  61044. +bnode_check_adler32(const struct bitmap_node *bnode, unsigned long size)
  61045. +{
  61046. + if (bnode_calc_crc(bnode, size) != bnode_commit_crc(bnode)) {
  61047. + bmap_nr_t bmap;
  61048. +
  61049. + bmap = bnode - get_bnode(sb_by_bnode(bnode), 0);
  61050. +
  61051. + warning("vpf-263",
  61052. + "Checksum for the bitmap block %llu is incorrect",
  61053. + bmap);
  61054. +
  61055. + return RETERR(-EIO);
  61056. + }
  61057. +
  61058. + return 0;
  61059. +}
  61060. +
  61061. +#define REISER4_CHECK_BMAP_CRC (0)
  61062. +
  61063. +#if REISER4_CHECK_BMAP_CRC
  61064. +static int bnode_check_crc(const struct bitmap_node *bnode)
  61065. +{
  61066. + return bnode_check_adler32(bnode,
  61067. + bmap_size(sb_by_bnode(bnode)->s_blocksize));
  61068. +}
  61069. +
  61070. +/* REISER4_CHECK_BMAP_CRC */
  61071. +#else
  61072. +
  61073. +#define bnode_check_crc(bnode) (0)
  61074. +
  61075. +/* REISER4_CHECK_BMAP_CRC */
  61076. +#endif
  61077. +
  61078. +/* Recalculates the adler32 checksum for only 1 byte change.
  61079. + adler - previous adler checksum
  61080. + old_data, data - old, new byte values.
  61081. + tail == (chunk - offset) : length, checksum was calculated for, - offset of
  61082. + the changed byte within this chunk.
  61083. + This function can be used for checksum calculation optimisation.
  61084. +*/
  61085. +
  61086. +static __u32
  61087. +adler32_recalc(__u32 adler, unsigned char old_data, unsigned char data,
  61088. + __u32 tail)
  61089. +{
  61090. + __u32 delta = data - old_data + 2 * ADLER_BASE;
  61091. + __u32 s1 = adler & 0xffff;
  61092. + __u32 s2 = (adler >> 16) & 0xffff;
  61093. +
  61094. + s1 = (delta + s1) % ADLER_BASE;
  61095. + s2 = (delta * tail + s2) % ADLER_BASE;
  61096. +
  61097. + return (s2 << 16) | s1;
  61098. +}
  61099. +
  61100. +#define LIMIT(val, boundary) ((val) > (boundary) ? (boundary) : (val))
  61101. +
  61102. +/**
  61103. + * get_nr_bitmap - calculate number of bitmap blocks
  61104. + * @super: super block with initialized blocksize and block count
  61105. + *
  61106. + * Calculates number of bitmap blocks of a filesystem which uses bitmaps to
  61107. + * maintain free disk space. It assumes that each bitmap addresses the same
  61108. + * number of blocks which is calculated by bmap_block_count macro defined in
  61109. + * above. Number of blocks in the filesystem has to be initialized in reiser4
  61110. + * private data of super block already so that it can be obtained via
  61111. + * reiser4_block_count(). Unfortunately, number of blocks addressed by a bitmap
  61112. + * is not power of 2 because 4 bytes are used for checksum. Therefore, we have
  61113. + * to use special function to divide and modulo 64bits filesystem block
  61114. + * counters.
  61115. + *
  61116. + * Example: suppose filesystem have 32768 blocks. Blocksize is 4096. Each bitmap
  61117. + * block addresses (4096 - 4) * 8 = 32736 blocks. Number of bitmaps to address
  61118. + * all 32768 blocks is calculated as (32768 - 1) / 32736 + 1 = 2.
  61119. + */
  61120. +static bmap_nr_t get_nr_bmap(const struct super_block *super)
  61121. +{
  61122. + u64 quotient;
  61123. +
  61124. + assert("zam-393", reiser4_block_count(super) != 0);
  61125. +
  61126. + quotient = reiser4_block_count(super) - 1;
  61127. + do_div(quotient, bmap_bit_count(super->s_blocksize));
  61128. + return quotient + 1;
  61129. +}
  61130. +
  61131. +/**
  61132. + * parse_blocknr - calculate bitmap number and offset in it by block number
  61133. + * @block: pointer to block number to calculate location in bitmap of
  61134. + * @bmap: pointer where to store bitmap block number
  61135. + * @offset: pointer where to store offset within bitmap block
  61136. + *
  61137. + * Calculates location of bit which is responsible for allocation/freeing of
  61138. + * block @*block. That location is represented by bitmap block number and offset
  61139. + * within that bitmap block.
  61140. + */
  61141. +static void
  61142. +parse_blocknr(const reiser4_block_nr *block, bmap_nr_t *bmap,
  61143. + bmap_off_t *offset)
  61144. +{
  61145. + struct super_block *super = get_current_context()->super;
  61146. + u64 quotient = *block;
  61147. +
  61148. + *offset = do_div(quotient, bmap_bit_count(super->s_blocksize));
  61149. + *bmap = quotient;
  61150. +
  61151. + assert("zam-433", *bmap < get_nr_bmap(super));
  61152. + assert("", *offset < bmap_bit_count(super->s_blocksize));
  61153. +}
  61154. +
  61155. +#if REISER4_DEBUG
  61156. +/* Audited by: green(2002.06.12) */
  61157. +static void
  61158. +check_block_range(const reiser4_block_nr * start, const reiser4_block_nr * len)
  61159. +{
  61160. + struct super_block *sb = reiser4_get_current_sb();
  61161. +
  61162. + assert("zam-436", sb != NULL);
  61163. +
  61164. + assert("zam-455", start != NULL);
  61165. + assert("zam-437", *start != 0);
  61166. + assert("zam-541", !reiser4_blocknr_is_fake(start));
  61167. + assert("zam-441", *start < reiser4_block_count(sb));
  61168. +
  61169. + if (len != NULL) {
  61170. + assert("zam-438", *len != 0);
  61171. + assert("zam-442", *start + *len <= reiser4_block_count(sb));
  61172. + }
  61173. +}
  61174. +
  61175. +static void check_bnode_loaded(const struct bitmap_node *bnode)
  61176. +{
  61177. + assert("zam-485", bnode != NULL);
  61178. + assert("zam-483", jnode_page(bnode->wjnode) != NULL);
  61179. + assert("zam-484", jnode_page(bnode->cjnode) != NULL);
  61180. + assert("nikita-2820", jnode_is_loaded(bnode->wjnode));
  61181. + assert("nikita-2821", jnode_is_loaded(bnode->cjnode));
  61182. +}
  61183. +
  61184. +#else
  61185. +
  61186. +# define check_block_range(start, len) do { /* nothing */} while(0)
  61187. +# define check_bnode_loaded(bnode) do { /* nothing */} while(0)
  61188. +
  61189. +#endif
  61190. +
  61191. +/* modify bnode->first_zero_bit (if we free bits before); bnode should be
  61192. + spin-locked */
  61193. +static inline void
  61194. +adjust_first_zero_bit(struct bitmap_node *bnode, bmap_off_t offset)
  61195. +{
  61196. + if (offset < bnode->first_zero_bit)
  61197. + bnode->first_zero_bit = offset;
  61198. +}
  61199. +
  61200. +/* return a physical disk address for logical bitmap number @bmap */
  61201. +/* FIXME-VS: this is somehow related to disk layout? */
  61202. +/* ZAM-FIXME-HANS: your answer is? Use not more than one function dereference
  61203. + * per block allocation so that performance is not affected. Probably this
  61204. + * whole file should be considered part of the disk layout plugin, and other
  61205. + * disk layouts can use other defines and efficiency will not be significantly
  61206. + * affected. */
  61207. +
  61208. +#define REISER4_FIRST_BITMAP_BLOCK \
  61209. + ((REISER4_MASTER_OFFSET / PAGE_SIZE) + 2)
  61210. +
  61211. +/* Audited by: green(2002.06.12) */
  61212. +static void
  61213. +get_bitmap_blocknr(struct super_block *super, bmap_nr_t bmap,
  61214. + reiser4_block_nr * bnr)
  61215. +{
  61216. +
  61217. + assert("zam-390", bmap < get_nr_bmap(super));
  61218. +
  61219. +#ifdef CONFIG_REISER4_BADBLOCKS
  61220. +#define BITMAP_PLUGIN_DISKMAP_ID ((0xc0e1<<16) | (0xe0ff))
  61221. + /* Check if the diskmap have this already, first. */
  61222. + if (reiser4_get_diskmap_value(BITMAP_PLUGIN_DISKMAP_ID, bmap, bnr) == 0)
  61223. + return; /* Found it in diskmap */
  61224. +#endif
  61225. + /* FIXME_ZAM: before discussing of disk layouts and disk format
  61226. + plugins I implement bitmap location scheme which is close to scheme
  61227. + used in reiser 3.6 */
  61228. + if (bmap == 0) {
  61229. + *bnr = REISER4_FIRST_BITMAP_BLOCK;
  61230. + } else {
  61231. + *bnr = bmap * bmap_bit_count(super->s_blocksize);
  61232. + }
  61233. +}
  61234. +
  61235. +/* construct a fake block number for shadow bitmap (WORKING BITMAP) block */
  61236. +/* Audited by: green(2002.06.12) */
  61237. +static void get_working_bitmap_blocknr(bmap_nr_t bmap, reiser4_block_nr * bnr)
  61238. +{
  61239. + *bnr =
  61240. + (reiser4_block_nr) ((bmap & ~REISER4_BLOCKNR_STATUS_BIT_MASK) |
  61241. + REISER4_BITMAP_BLOCKS_STATUS_VALUE);
  61242. +}
  61243. +
  61244. +/* bnode structure initialization */
  61245. +static void
  61246. +init_bnode(struct bitmap_node *bnode,
  61247. + struct super_block *super UNUSED_ARG, bmap_nr_t bmap UNUSED_ARG)
  61248. +{
  61249. + memset(bnode, 0, sizeof(struct bitmap_node));
  61250. +
  61251. + mutex_init(&bnode->mutex);
  61252. + atomic_set(&bnode->loaded, 0);
  61253. +}
  61254. +
  61255. +static void release(jnode * node)
  61256. +{
  61257. + jrelse(node);
  61258. + JF_SET(node, JNODE_HEARD_BANSHEE);
  61259. + jput(node);
  61260. +}
  61261. +
  61262. +/* This function is for internal bitmap.c use because it assumes that jnode is
  61263. + in under full control of this thread */
  61264. +static void done_bnode(struct bitmap_node *bnode)
  61265. +{
  61266. + if (bnode) {
  61267. + atomic_set(&bnode->loaded, 0);
  61268. + if (bnode->wjnode != NULL)
  61269. + release(bnode->wjnode);
  61270. + if (bnode->cjnode != NULL)
  61271. + release(bnode->cjnode);
  61272. + bnode->wjnode = bnode->cjnode = NULL;
  61273. + }
  61274. +}
  61275. +
  61276. +/* ZAM-FIXME-HANS: comment this. Called only by load_and_lock_bnode()*/
  61277. +static int prepare_bnode(struct bitmap_node *bnode, jnode **cjnode_ret,
  61278. + jnode **wjnode_ret)
  61279. +{
  61280. + struct super_block *super;
  61281. + jnode *cjnode;
  61282. + jnode *wjnode;
  61283. + bmap_nr_t bmap;
  61284. + int ret;
  61285. +
  61286. + super = reiser4_get_current_sb();
  61287. +
  61288. + *wjnode_ret = wjnode = bnew();
  61289. + if (wjnode == NULL) {
  61290. + *cjnode_ret = NULL;
  61291. + return RETERR(-ENOMEM);
  61292. + }
  61293. +
  61294. + *cjnode_ret = cjnode = bnew();
  61295. + if (cjnode == NULL)
  61296. + return RETERR(-ENOMEM);
  61297. +
  61298. + bmap = bnode - get_bnode(super, 0);
  61299. +
  61300. + get_working_bitmap_blocknr(bmap, &wjnode->blocknr);
  61301. + get_bitmap_blocknr(super, bmap, &cjnode->blocknr);
  61302. +
  61303. + jref(cjnode);
  61304. + jref(wjnode);
  61305. +
  61306. + /* load commit bitmap */
  61307. + ret = jload_gfp(cjnode, GFP_NOFS, 1);
  61308. +
  61309. + if (ret)
  61310. + goto error;
  61311. +
  61312. + /* allocate memory for working bitmap block. Note that for
  61313. + * bitmaps jinit_new() doesn't actually modifies node content,
  61314. + * so parallel calls to this are ok. */
  61315. + ret = jinit_new(wjnode, GFP_NOFS);
  61316. +
  61317. + if (ret != 0) {
  61318. + jrelse(cjnode);
  61319. + goto error;
  61320. + }
  61321. +
  61322. + return 0;
  61323. +
  61324. + error:
  61325. + jput(cjnode);
  61326. + jput(wjnode);
  61327. + *wjnode_ret = *cjnode_ret = NULL;
  61328. + return ret;
  61329. +
  61330. +}
  61331. +
  61332. +/* Check the bnode data on read. */
  61333. +static int check_struct_bnode(struct bitmap_node *bnode, __u32 blksize)
  61334. +{
  61335. + void *data;
  61336. + int ret;
  61337. +
  61338. + /* Check CRC */
  61339. + ret = bnode_check_adler32(bnode, blksize);
  61340. +
  61341. + if (ret) {
  61342. + return ret;
  61343. + }
  61344. +
  61345. + data = jdata(bnode->cjnode) + CHECKSUM_SIZE;
  61346. +
  61347. + /* Check the very first bit -- it must be busy. */
  61348. + if (!reiser4_test_bit(0, data)) {
  61349. + warning("vpf-1362", "The allocator block %llu is not marked "
  61350. + "as used.", (unsigned long long)bnode->cjnode->blocknr);
  61351. +
  61352. + return -EINVAL;
  61353. + }
  61354. +
  61355. + return 0;
  61356. +}
  61357. +
  61358. +/* load bitmap blocks "on-demand" */
  61359. +static int load_and_lock_bnode(struct bitmap_node *bnode)
  61360. +{
  61361. + int ret;
  61362. +
  61363. + jnode *cjnode;
  61364. + jnode *wjnode;
  61365. +
  61366. + assert("nikita-3040", reiser4_schedulable());
  61367. +
  61368. +/* ZAM-FIXME-HANS: since bitmaps are never unloaded, this does not
  61369. + * need to be atomic, right? Just leave a comment that if bitmaps were
  61370. + * unloadable, this would need to be atomic. */
  61371. + if (atomic_read(&bnode->loaded)) {
  61372. + /* bitmap is already loaded, nothing to do */
  61373. + check_bnode_loaded(bnode);
  61374. + mutex_lock(&bnode->mutex);
  61375. + assert("nikita-2827", atomic_read(&bnode->loaded));
  61376. + return 0;
  61377. + }
  61378. +
  61379. + ret = prepare_bnode(bnode, &cjnode, &wjnode);
  61380. + if (ret)
  61381. + return ret;
  61382. +
  61383. + mutex_lock(&bnode->mutex);
  61384. +
  61385. + if (!atomic_read(&bnode->loaded)) {
  61386. + assert("nikita-2822", cjnode != NULL);
  61387. + assert("nikita-2823", wjnode != NULL);
  61388. + assert("nikita-2824", jnode_is_loaded(cjnode));
  61389. + assert("nikita-2825", jnode_is_loaded(wjnode));
  61390. +
  61391. + bnode->wjnode = wjnode;
  61392. + bnode->cjnode = cjnode;
  61393. +
  61394. + ret = check_struct_bnode(bnode, current_blocksize);
  61395. + if (unlikely(ret != 0))
  61396. + goto error;
  61397. +
  61398. + atomic_set(&bnode->loaded, 1);
  61399. + /* working bitmap is initialized by on-disk
  61400. + * commit bitmap. This should be performed
  61401. + * under mutex. */
  61402. + memcpy(bnode_working_data(bnode),
  61403. + bnode_commit_data(bnode),
  61404. + bmap_size(current_blocksize));
  61405. + } else
  61406. + /* race: someone already loaded bitmap
  61407. + * while we were busy initializing data. */
  61408. + check_bnode_loaded(bnode);
  61409. + return 0;
  61410. +
  61411. + error:
  61412. + release(wjnode);
  61413. + release(cjnode);
  61414. + bnode->wjnode = NULL;
  61415. + bnode->cjnode = NULL;
  61416. + mutex_unlock(&bnode->mutex);
  61417. + return ret;
  61418. +}
  61419. +
  61420. +static void release_and_unlock_bnode(struct bitmap_node *bnode)
  61421. +{
  61422. + check_bnode_loaded(bnode);
  61423. + mutex_unlock(&bnode->mutex);
  61424. +}
  61425. +
  61426. +/* This function does all block allocation work but only for one bitmap
  61427. + block.*/
  61428. +/* FIXME_ZAM: It does not allow us to allocate block ranges across bitmap
  61429. + block responsibility zone boundaries. This had no sense in v3.6 but may
  61430. + have it in v4.x */
  61431. +/* ZAM-FIXME-HANS: do you mean search one bitmap block forward? */
  61432. +static int
  61433. +search_one_bitmap_forward(bmap_nr_t bmap, bmap_off_t * offset,
  61434. + bmap_off_t max_offset, int min_len, int max_len)
  61435. +{
  61436. + struct super_block *super = get_current_context()->super;
  61437. + struct bitmap_node *bnode = get_bnode(super, bmap);
  61438. +
  61439. + char *data;
  61440. +
  61441. + bmap_off_t search_end;
  61442. + bmap_off_t start;
  61443. + bmap_off_t end;
  61444. +
  61445. + int set_first_zero_bit = 0;
  61446. +
  61447. + int ret;
  61448. +
  61449. + assert("zam-364", min_len > 0);
  61450. + assert("zam-365", max_len >= min_len);
  61451. + assert("zam-366", *offset <= max_offset);
  61452. +
  61453. + ret = load_and_lock_bnode(bnode);
  61454. +
  61455. + if (ret)
  61456. + return ret;
  61457. +
  61458. + data = bnode_working_data(bnode);
  61459. +
  61460. + start = *offset;
  61461. +
  61462. + if (bnode->first_zero_bit >= start) {
  61463. + start = bnode->first_zero_bit;
  61464. + set_first_zero_bit = 1;
  61465. + }
  61466. +
  61467. + while (start + min_len < max_offset) {
  61468. +
  61469. + start =
  61470. + reiser4_find_next_zero_bit((long *)data, max_offset, start);
  61471. + if (set_first_zero_bit) {
  61472. + bnode->first_zero_bit = start;
  61473. + set_first_zero_bit = 0;
  61474. + }
  61475. + if (start >= max_offset)
  61476. + break;
  61477. +
  61478. + search_end = LIMIT(start + max_len, max_offset);
  61479. + end =
  61480. + reiser4_find_next_set_bit((long *)data, search_end, start);
  61481. + if (end >= start + min_len) {
  61482. + /* we can't trust find_next_set_bit result if set bit
  61483. + was not fount, result may be bigger than
  61484. + max_offset */
  61485. + if (end > search_end)
  61486. + end = search_end;
  61487. +
  61488. + ret = end - start;
  61489. + *offset = start;
  61490. +
  61491. + reiser4_set_bits(data, start, end);
  61492. +
  61493. + /* FIXME: we may advance first_zero_bit if [start,
  61494. + end] region overlaps the first_zero_bit point */
  61495. +
  61496. + break;
  61497. + }
  61498. +
  61499. + start = end + 1;
  61500. + }
  61501. +
  61502. + release_and_unlock_bnode(bnode);
  61503. +
  61504. + return ret;
  61505. +}
  61506. +
  61507. +static int
  61508. +search_one_bitmap_backward(bmap_nr_t bmap, bmap_off_t * start_offset,
  61509. + bmap_off_t end_offset, int min_len, int max_len)
  61510. +{
  61511. + struct super_block *super = get_current_context()->super;
  61512. + struct bitmap_node *bnode = get_bnode(super, bmap);
  61513. + char *data;
  61514. + bmap_off_t start;
  61515. + int ret;
  61516. +
  61517. + assert("zam-958", min_len > 0);
  61518. + assert("zam-959", max_len >= min_len);
  61519. + assert("zam-960", *start_offset >= end_offset);
  61520. +
  61521. + ret = load_and_lock_bnode(bnode);
  61522. + if (ret)
  61523. + return ret;
  61524. +
  61525. + data = bnode_working_data(bnode);
  61526. + start = *start_offset;
  61527. +
  61528. + while (1) {
  61529. + bmap_off_t end, search_end;
  61530. +
  61531. + /* Find the beginning of the zero filled region */
  61532. + if (reiser4_find_last_zero_bit(&start, data, end_offset, start))
  61533. + break;
  61534. + /* Is there more than `min_len' bits from `start' to
  61535. + * `end_offset'? */
  61536. + if (start < end_offset + min_len - 1)
  61537. + break;
  61538. +
  61539. + /* Do not search to `end_offset' if we need to find less than
  61540. + * `max_len' zero bits. */
  61541. + if (end_offset + max_len - 1 < start)
  61542. + search_end = start - max_len + 1;
  61543. + else
  61544. + search_end = end_offset;
  61545. +
  61546. + if (reiser4_find_last_set_bit(&end, data, search_end, start))
  61547. + end = search_end;
  61548. + else
  61549. + end++;
  61550. +
  61551. + if (end + min_len <= start + 1) {
  61552. + if (end < search_end)
  61553. + end = search_end;
  61554. + ret = start - end + 1;
  61555. + *start_offset = end; /* `end' is lowest offset */
  61556. + assert("zam-987",
  61557. + reiser4_find_next_set_bit(data, start + 1,
  61558. + end) >= start + 1);
  61559. + reiser4_set_bits(data, end, start + 1);
  61560. + break;
  61561. + }
  61562. +
  61563. + if (end <= end_offset)
  61564. + /* left search boundary reached. */
  61565. + break;
  61566. + start = end - 1;
  61567. + }
  61568. +
  61569. + release_and_unlock_bnode(bnode);
  61570. + return ret;
  61571. +}
  61572. +
  61573. +/* allocate contiguous range of blocks in bitmap */
  61574. +static int bitmap_alloc_forward(reiser4_block_nr * start,
  61575. + const reiser4_block_nr * end, int min_len,
  61576. + int max_len)
  61577. +{
  61578. + bmap_nr_t bmap, end_bmap;
  61579. + bmap_off_t offset, end_offset;
  61580. + int len;
  61581. +
  61582. + reiser4_block_nr tmp;
  61583. +
  61584. + struct super_block *super = get_current_context()->super;
  61585. + const bmap_off_t max_offset = bmap_bit_count(super->s_blocksize);
  61586. +
  61587. + parse_blocknr(start, &bmap, &offset);
  61588. +
  61589. + tmp = *end - 1;
  61590. + parse_blocknr(&tmp, &end_bmap, &end_offset);
  61591. + ++end_offset;
  61592. +
  61593. + assert("zam-358", end_bmap >= bmap);
  61594. + assert("zam-359", ergo(end_bmap == bmap, end_offset >= offset));
  61595. +
  61596. + for (; bmap < end_bmap; bmap++, offset = 0) {
  61597. + len =
  61598. + search_one_bitmap_forward(bmap, &offset, max_offset,
  61599. + min_len, max_len);
  61600. + if (len != 0)
  61601. + goto out;
  61602. + }
  61603. +
  61604. + len =
  61605. + search_one_bitmap_forward(bmap, &offset, end_offset, min_len,
  61606. + max_len);
  61607. + out:
  61608. + *start = bmap * max_offset + offset;
  61609. + return len;
  61610. +}
  61611. +
  61612. +/* allocate contiguous range of blocks in bitmap (from @start to @end in
  61613. + * backward direction) */
  61614. +static int bitmap_alloc_backward(reiser4_block_nr * start,
  61615. + const reiser4_block_nr * end, int min_len,
  61616. + int max_len)
  61617. +{
  61618. + bmap_nr_t bmap, end_bmap;
  61619. + bmap_off_t offset, end_offset;
  61620. + int len;
  61621. + struct super_block *super = get_current_context()->super;
  61622. + const bmap_off_t max_offset = bmap_bit_count(super->s_blocksize);
  61623. +
  61624. + parse_blocknr(start, &bmap, &offset);
  61625. + parse_blocknr(end, &end_bmap, &end_offset);
  61626. +
  61627. + assert("zam-961", end_bmap <= bmap);
  61628. + assert("zam-962", ergo(end_bmap == bmap, end_offset <= offset));
  61629. +
  61630. + for (; bmap > end_bmap; bmap--, offset = max_offset - 1) {
  61631. + len =
  61632. + search_one_bitmap_backward(bmap, &offset, 0, min_len,
  61633. + max_len);
  61634. + if (len != 0)
  61635. + goto out;
  61636. + }
  61637. +
  61638. + len =
  61639. + search_one_bitmap_backward(bmap, &offset, end_offset, min_len,
  61640. + max_len);
  61641. + out:
  61642. + *start = bmap * max_offset + offset;
  61643. + return len;
  61644. +}
  61645. +
  61646. +/* plugin->u.space_allocator.alloc_blocks() */
  61647. +static int alloc_blocks_forward(reiser4_blocknr_hint *hint, int needed,
  61648. + reiser4_block_nr *start, reiser4_block_nr *len)
  61649. +{
  61650. + struct super_block *super = get_current_context()->super;
  61651. + int actual_len;
  61652. +
  61653. + reiser4_block_nr search_start;
  61654. + reiser4_block_nr search_end;
  61655. +
  61656. + assert("zam-398", super != NULL);
  61657. + assert("zam-412", hint != NULL);
  61658. + assert("zam-397", hint->blk <= reiser4_block_count(super));
  61659. +
  61660. + if (hint->max_dist == 0)
  61661. + search_end = reiser4_block_count(super);
  61662. + else
  61663. + search_end =
  61664. + LIMIT(hint->blk + hint->max_dist,
  61665. + reiser4_block_count(super));
  61666. +
  61667. + /* We use @hint -> blk as a search start and search from it to the end
  61668. + of the disk or in given region if @hint -> max_dist is not zero */
  61669. + search_start = hint->blk;
  61670. +
  61671. + actual_len =
  61672. + bitmap_alloc_forward(&search_start, &search_end, 1, needed);
  61673. +
  61674. + /* There is only one bitmap search if max_dist was specified or first
  61675. + pass was from the beginning of the bitmap. We also do one pass for
  61676. + scanning bitmap in backward direction. */
  61677. + if (!(actual_len != 0 || hint->max_dist != 0 || search_start == 0)) {
  61678. + /* next step is a scanning from 0 to search_start */
  61679. + search_end = search_start;
  61680. + search_start = 0;
  61681. + actual_len =
  61682. + bitmap_alloc_forward(&search_start, &search_end, 1, needed);
  61683. + }
  61684. + if (actual_len == 0)
  61685. + return RETERR(-ENOSPC);
  61686. + if (actual_len < 0)
  61687. + return RETERR(actual_len);
  61688. + *len = actual_len;
  61689. + *start = search_start;
  61690. + return 0;
  61691. +}
  61692. +
  61693. +static int alloc_blocks_backward(reiser4_blocknr_hint * hint, int needed,
  61694. + reiser4_block_nr * start,
  61695. + reiser4_block_nr * len)
  61696. +{
  61697. + reiser4_block_nr search_start;
  61698. + reiser4_block_nr search_end;
  61699. + int actual_len;
  61700. +
  61701. + ON_DEBUG(struct super_block *super = reiser4_get_current_sb());
  61702. +
  61703. + assert("zam-969", super != NULL);
  61704. + assert("zam-970", hint != NULL);
  61705. + assert("zam-971", hint->blk <= reiser4_block_count(super));
  61706. +
  61707. + search_start = hint->blk;
  61708. + if (hint->max_dist == 0 || search_start <= hint->max_dist)
  61709. + search_end = 0;
  61710. + else
  61711. + search_end = search_start - hint->max_dist;
  61712. +
  61713. + actual_len =
  61714. + bitmap_alloc_backward(&search_start, &search_end, 1, needed);
  61715. + if (actual_len == 0)
  61716. + return RETERR(-ENOSPC);
  61717. + if (actual_len < 0)
  61718. + return RETERR(actual_len);
  61719. + *len = actual_len;
  61720. + *start = search_start;
  61721. + return 0;
  61722. +}
  61723. +
  61724. +/* plugin->u.space_allocator.alloc_blocks() */
  61725. +int reiser4_alloc_blocks_bitmap(reiser4_space_allocator * allocator,
  61726. + reiser4_blocknr_hint * hint, int needed,
  61727. + reiser4_block_nr * start, reiser4_block_nr * len)
  61728. +{
  61729. + if (hint->backward)
  61730. + return alloc_blocks_backward(hint, needed, start, len);
  61731. + return alloc_blocks_forward(hint, needed, start, len);
  61732. +}
  61733. +
  61734. +/* plugin->u.space_allocator.dealloc_blocks(). */
  61735. +/* It just frees blocks in WORKING BITMAP. Usually formatted an unformatted
  61736. + nodes deletion is deferred until transaction commit. However, deallocation
  61737. + of temporary objects like wandered blocks and transaction commit records
  61738. + requires immediate node deletion from WORKING BITMAP.*/
  61739. +void reiser4_dealloc_blocks_bitmap(reiser4_space_allocator * allocator,
  61740. + reiser4_block_nr start, reiser4_block_nr len)
  61741. +{
  61742. + struct super_block *super = reiser4_get_current_sb();
  61743. +
  61744. + bmap_nr_t bmap;
  61745. + bmap_off_t offset;
  61746. +
  61747. + struct bitmap_node *bnode;
  61748. + int ret;
  61749. +
  61750. + assert("zam-468", len != 0);
  61751. + check_block_range(&start, &len);
  61752. +
  61753. + parse_blocknr(&start, &bmap, &offset);
  61754. +
  61755. + assert("zam-469", offset + len <= bmap_bit_count(super->s_blocksize));
  61756. +
  61757. + bnode = get_bnode(super, bmap);
  61758. +
  61759. + assert("zam-470", bnode != NULL);
  61760. +
  61761. + ret = load_and_lock_bnode(bnode);
  61762. + assert("zam-481", ret == 0);
  61763. +
  61764. + reiser4_clear_bits(bnode_working_data(bnode), offset,
  61765. + (bmap_off_t) (offset + len));
  61766. +
  61767. + adjust_first_zero_bit(bnode, offset);
  61768. +
  61769. + release_and_unlock_bnode(bnode);
  61770. +}
  61771. +
  61772. +static int check_blocks_one_bitmap(bmap_nr_t bmap, bmap_off_t start_offset,
  61773. + bmap_off_t end_offset, int desired)
  61774. +{
  61775. + struct super_block *super = reiser4_get_current_sb();
  61776. + struct bitmap_node *bnode = get_bnode(super, bmap);
  61777. + int ret;
  61778. +
  61779. + assert("nikita-2215", bnode != NULL);
  61780. +
  61781. + ret = load_and_lock_bnode(bnode);
  61782. + assert("zam-626", ret == 0);
  61783. +
  61784. + assert("nikita-2216", jnode_is_loaded(bnode->wjnode));
  61785. +
  61786. + if (desired) {
  61787. + ret = reiser4_find_next_zero_bit(bnode_working_data(bnode),
  61788. + end_offset, start_offset)
  61789. + >= end_offset;
  61790. + } else {
  61791. + ret = reiser4_find_next_set_bit(bnode_working_data(bnode),
  61792. + end_offset, start_offset)
  61793. + >= end_offset;
  61794. + }
  61795. +
  61796. + release_and_unlock_bnode(bnode);
  61797. +
  61798. + return ret;
  61799. +}
  61800. +
  61801. +/* plugin->u.space_allocator.check_blocks(). */
  61802. +int reiser4_check_blocks_bitmap(const reiser4_block_nr * start,
  61803. + const reiser4_block_nr * len, int desired)
  61804. +{
  61805. + struct super_block *super = reiser4_get_current_sb();
  61806. +
  61807. + reiser4_block_nr end;
  61808. + bmap_nr_t bmap, end_bmap;
  61809. + bmap_off_t offset, end_offset;
  61810. + const bmap_off_t max_offset = bmap_bit_count(super->s_blocksize);
  61811. +
  61812. + assert("intelfx-9", start != NULL);
  61813. + assert("intelfx-10", ergo(len != NULL, *len > 0));
  61814. +
  61815. + if (len != NULL) {
  61816. + check_block_range(start, len);
  61817. + end = *start + *len - 1;
  61818. + } else {
  61819. + /* on next line, end is used as temporary len for check_block_range() */
  61820. + end = 1; check_block_range(start, &end);
  61821. + end = *start;
  61822. + }
  61823. +
  61824. + parse_blocknr(start, &bmap, &offset);
  61825. +
  61826. + if (end == *start) {
  61827. + end_bmap = bmap;
  61828. + end_offset = offset;
  61829. + } else {
  61830. + parse_blocknr(&end, &end_bmap, &end_offset);
  61831. + }
  61832. + ++end_offset;
  61833. +
  61834. + assert("intelfx-4", end_bmap >= bmap);
  61835. + assert("intelfx-5", ergo(end_bmap == bmap, end_offset >= offset));
  61836. +
  61837. + for (; bmap < end_bmap; bmap++, offset = 0) {
  61838. + if (!check_blocks_one_bitmap(bmap, offset, max_offset, desired)) {
  61839. + return 0;
  61840. + }
  61841. + }
  61842. + return check_blocks_one_bitmap(bmap, offset, end_offset, desired);
  61843. +}
  61844. +
  61845. +/* conditional insertion of @node into atom's overwrite set if it was not there */
  61846. +static void cond_add_to_overwrite_set(txn_atom * atom, jnode * node)
  61847. +{
  61848. + assert("zam-546", atom != NULL);
  61849. + assert("zam-547", atom->stage == ASTAGE_PRE_COMMIT);
  61850. + assert("zam-548", node != NULL);
  61851. +
  61852. + spin_lock_atom(atom);
  61853. + spin_lock_jnode(node);
  61854. +
  61855. + if (node->atom == NULL) {
  61856. + JF_SET(node, JNODE_OVRWR);
  61857. + insert_into_atom_ovrwr_list(atom, node);
  61858. + } else {
  61859. + assert("zam-549", node->atom == atom);
  61860. + }
  61861. +
  61862. + spin_unlock_jnode(node);
  61863. + spin_unlock_atom(atom);
  61864. +}
  61865. +
  61866. +/* an actor which applies delete set to COMMIT bitmap pages and link modified
  61867. + pages in a single-linked list */
  61868. +static int
  61869. +apply_dset_to_commit_bmap(txn_atom * atom, const reiser4_block_nr * start,
  61870. + const reiser4_block_nr * len, void *data)
  61871. +{
  61872. +
  61873. + bmap_nr_t bmap;
  61874. + bmap_off_t offset;
  61875. + int ret;
  61876. +
  61877. + long long *blocks_freed_p = data;
  61878. +
  61879. + struct bitmap_node *bnode;
  61880. +
  61881. + struct super_block *sb = reiser4_get_current_sb();
  61882. +
  61883. + check_block_range(start, len);
  61884. +
  61885. + parse_blocknr(start, &bmap, &offset);
  61886. +
  61887. + /* FIXME-ZAM: we assume that all block ranges are allocated by this
  61888. + bitmap-based allocator and each block range can't go over a zone of
  61889. + responsibility of one bitmap block; same assumption is used in
  61890. + other journal hooks in bitmap code. */
  61891. + bnode = get_bnode(sb, bmap);
  61892. + assert("zam-448", bnode != NULL);
  61893. +
  61894. + /* it is safe to unlock atom with is in ASTAGE_PRE_COMMIT */
  61895. + assert("zam-767", atom->stage == ASTAGE_PRE_COMMIT);
  61896. + ret = load_and_lock_bnode(bnode);
  61897. + if (ret)
  61898. + return ret;
  61899. +
  61900. + /* put bnode into atom's overwrite set */
  61901. + cond_add_to_overwrite_set(atom, bnode->cjnode);
  61902. +
  61903. + data = bnode_commit_data(bnode);
  61904. +
  61905. + ret = bnode_check_crc(bnode);
  61906. + if (ret != 0)
  61907. + return ret;
  61908. +
  61909. + if (len != NULL) {
  61910. + /* FIXME-ZAM: a check that all bits are set should be there */
  61911. + assert("zam-443",
  61912. + offset + *len <= bmap_bit_count(sb->s_blocksize));
  61913. + reiser4_clear_bits(data, offset, (bmap_off_t) (offset + *len));
  61914. +
  61915. + (*blocks_freed_p) += *len;
  61916. + } else {
  61917. + reiser4_clear_bit(offset, data);
  61918. + (*blocks_freed_p)++;
  61919. + }
  61920. +
  61921. + bnode_set_commit_crc(bnode, bnode_calc_crc(bnode, sb->s_blocksize));
  61922. +
  61923. + release_and_unlock_bnode(bnode);
  61924. +
  61925. + return 0;
  61926. +}
  61927. +
  61928. +/* plugin->u.space_allocator.pre_commit_hook(). */
  61929. +/* It just applies transaction changes to fs-wide COMMIT BITMAP, hoping the
  61930. + rest is done by transaction manager (allocate wandered locations for COMMIT
  61931. + BITMAP blocks, copy COMMIT BITMAP blocks data). */
  61932. +/* Only one instance of this function can be running at one given time, because
  61933. + only one transaction can be committed a time, therefore it is safe to access
  61934. + some global variables without any locking */
  61935. +
  61936. +int reiser4_pre_commit_hook_bitmap(void)
  61937. +{
  61938. + struct super_block *super = reiser4_get_current_sb();
  61939. + txn_atom *atom;
  61940. +
  61941. + long long blocks_freed = 0;
  61942. +
  61943. + atom = get_current_atom_locked();
  61944. + assert("zam-876", atom->stage == ASTAGE_PRE_COMMIT);
  61945. + spin_unlock_atom(atom);
  61946. +
  61947. + { /* scan atom's captured list and find all freshly allocated nodes,
  61948. + * mark corresponded bits in COMMIT BITMAP as used */
  61949. + struct list_head *head = ATOM_CLEAN_LIST(atom);
  61950. + jnode *node = list_entry(head->next, jnode, capture_link);
  61951. +
  61952. + while (head != &node->capture_link) {
  61953. + /* we detect freshly allocated jnodes */
  61954. + if (JF_ISSET(node, JNODE_RELOC)) {
  61955. + int ret;
  61956. + bmap_nr_t bmap;
  61957. +
  61958. + bmap_off_t offset;
  61959. + bmap_off_t index;
  61960. + struct bitmap_node *bn;
  61961. + __u32 size = bmap_size(super->s_blocksize);
  61962. + __u32 crc;
  61963. + char byte;
  61964. +
  61965. + assert("zam-559", !JF_ISSET(node, JNODE_OVRWR));
  61966. + assert("zam-460",
  61967. + !reiser4_blocknr_is_fake(&node->blocknr));
  61968. +
  61969. + parse_blocknr(&node->blocknr, &bmap, &offset);
  61970. + bn = get_bnode(super, bmap);
  61971. +
  61972. + index = offset >> 3;
  61973. + assert("vpf-276", index < size);
  61974. +
  61975. + ret = bnode_check_crc(bnode);
  61976. + if (ret != 0)
  61977. + return ret;
  61978. +
  61979. + check_bnode_loaded(bn);
  61980. + load_and_lock_bnode(bn);
  61981. +
  61982. + byte = *(bnode_commit_data(bn) + index);
  61983. + reiser4_set_bit(offset, bnode_commit_data(bn));
  61984. +
  61985. + crc = adler32_recalc(bnode_commit_crc(bn), byte,
  61986. + *(bnode_commit_data(bn) +
  61987. + index),
  61988. + size - index),
  61989. + bnode_set_commit_crc(bn, crc);
  61990. +
  61991. + release_and_unlock_bnode(bn);
  61992. +
  61993. + ret = bnode_check_crc(bn);
  61994. + if (ret != 0)
  61995. + return ret;
  61996. +
  61997. + /* working of this depends on how it inserts
  61998. + new j-node into clean list, because we are
  61999. + scanning the same list now. It is OK, if
  62000. + insertion is done to the list front */
  62001. + cond_add_to_overwrite_set(atom, bn->cjnode);
  62002. + }
  62003. +
  62004. + node = list_entry(node->capture_link.next, jnode, capture_link);
  62005. + }
  62006. + }
  62007. +
  62008. + atom_dset_deferred_apply(atom, apply_dset_to_commit_bmap, &blocks_freed, 0);
  62009. +
  62010. + blocks_freed -= atom->nr_blocks_allocated;
  62011. +
  62012. + {
  62013. + reiser4_super_info_data *sbinfo;
  62014. +
  62015. + sbinfo = get_super_private(super);
  62016. +
  62017. + spin_lock_reiser4_super(sbinfo);
  62018. + sbinfo->blocks_free_committed += blocks_freed;
  62019. + spin_unlock_reiser4_super(sbinfo);
  62020. + }
  62021. +
  62022. + return 0;
  62023. +}
  62024. +
  62025. +/* plugin->u.space_allocator.init_allocator
  62026. + constructor of reiser4_space_allocator object. It is called on fs mount */
  62027. +int reiser4_init_allocator_bitmap(reiser4_space_allocator * allocator,
  62028. + struct super_block *super, void *arg)
  62029. +{
  62030. + struct bitmap_allocator_data *data = NULL;
  62031. + bmap_nr_t bitmap_blocks_nr;
  62032. + bmap_nr_t i;
  62033. +
  62034. + assert("nikita-3039", reiser4_schedulable());
  62035. +
  62036. + /* getting memory for bitmap allocator private data holder */
  62037. + data =
  62038. + kmalloc(sizeof(struct bitmap_allocator_data),
  62039. + reiser4_ctx_gfp_mask_get());
  62040. +
  62041. + if (data == NULL)
  62042. + return RETERR(-ENOMEM);
  62043. +
  62044. + /* allocation and initialization for the array of bnodes */
  62045. + bitmap_blocks_nr = get_nr_bmap(super);
  62046. +
  62047. + /* FIXME-ZAM: it is not clear what to do with huge number of bitmaps
  62048. + which is bigger than 2^32 (= 8 * 4096 * 4096 * 2^32 bytes = 5.76e+17,
  62049. + may I never meet someone who still uses the ia32 architecture when
  62050. + storage devices of that size enter the market, and wants to use ia32
  62051. + with that storage device, much less reiser4. ;-) -Hans). Kmalloc is not possible and,
  62052. + probably, another dynamic data structure should replace a static
  62053. + array of bnodes. */
  62054. + /*data->bitmap = reiser4_kmalloc((size_t) (sizeof (struct bitmap_node) * bitmap_blocks_nr), GFP_KERNEL); */
  62055. + data->bitmap = reiser4_vmalloc(sizeof(struct bitmap_node) * bitmap_blocks_nr);
  62056. + if (data->bitmap == NULL) {
  62057. + kfree(data);
  62058. + return RETERR(-ENOMEM);
  62059. + }
  62060. +
  62061. + for (i = 0; i < bitmap_blocks_nr; i++)
  62062. + init_bnode(data->bitmap + i, super, i);
  62063. +
  62064. + allocator->u.generic = data;
  62065. +
  62066. +#if REISER4_DEBUG
  62067. + get_super_private(super)->min_blocks_used += bitmap_blocks_nr;
  62068. +#endif
  62069. +
  62070. + /* Load all bitmap blocks at mount time. */
  62071. + if (!test_bit
  62072. + (REISER4_DONT_LOAD_BITMAP, &get_super_private(super)->fs_flags)) {
  62073. + __u64 start_time, elapsed_time;
  62074. + struct bitmap_node *bnode;
  62075. + int ret;
  62076. +
  62077. + if (REISER4_DEBUG)
  62078. + printk(KERN_INFO "loading reiser4 bitmap...");
  62079. + start_time = jiffies;
  62080. +
  62081. + for (i = 0; i < bitmap_blocks_nr; i++) {
  62082. + bnode = data->bitmap + i;
  62083. + ret = load_and_lock_bnode(bnode);
  62084. + if (ret) {
  62085. + reiser4_destroy_allocator_bitmap(allocator,
  62086. + super);
  62087. + return ret;
  62088. + }
  62089. + release_and_unlock_bnode(bnode);
  62090. + }
  62091. +
  62092. + elapsed_time = jiffies - start_time;
  62093. + if (REISER4_DEBUG)
  62094. + printk("...done (%llu jiffies)\n",
  62095. + (unsigned long long)elapsed_time);
  62096. + }
  62097. +
  62098. + return 0;
  62099. +}
  62100. +
  62101. +/* plugin->u.space_allocator.destroy_allocator
  62102. + destructor. It is called on fs unmount */
  62103. +int reiser4_destroy_allocator_bitmap(reiser4_space_allocator * allocator,
  62104. + struct super_block *super)
  62105. +{
  62106. + bmap_nr_t bitmap_blocks_nr;
  62107. + bmap_nr_t i;
  62108. +
  62109. + struct bitmap_allocator_data *data = allocator->u.generic;
  62110. +
  62111. + assert("zam-414", data != NULL);
  62112. + assert("zam-376", data->bitmap != NULL);
  62113. +
  62114. + bitmap_blocks_nr = get_nr_bmap(super);
  62115. +
  62116. + for (i = 0; i < bitmap_blocks_nr; i++) {
  62117. + struct bitmap_node *bnode = data->bitmap + i;
  62118. +
  62119. + mutex_lock(&bnode->mutex);
  62120. +
  62121. +#if REISER4_DEBUG
  62122. + if (atomic_read(&bnode->loaded)) {
  62123. + jnode *wj = bnode->wjnode;
  62124. + jnode *cj = bnode->cjnode;
  62125. +
  62126. + assert("zam-480", jnode_page(cj) != NULL);
  62127. + assert("zam-633", jnode_page(wj) != NULL);
  62128. +
  62129. + assert("zam-634",
  62130. + memcmp(jdata(wj), jdata(wj),
  62131. + bmap_size(super->s_blocksize)) == 0);
  62132. +
  62133. + }
  62134. +#endif
  62135. + done_bnode(bnode);
  62136. + mutex_unlock(&bnode->mutex);
  62137. + }
  62138. +
  62139. + vfree(data->bitmap);
  62140. + kfree(data);
  62141. +
  62142. + allocator->u.generic = NULL;
  62143. +
  62144. + return 0;
  62145. +}
  62146. +
  62147. +/*
  62148. + * Local variables:
  62149. + * c-indentation-style: "K&R"
  62150. + * mode-name: "LC"
  62151. + * c-basic-offset: 8
  62152. + * tab-width: 8
  62153. + * fill-column: 79
  62154. + * scroll-step: 1
  62155. + * End:
  62156. + */
  62157. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/space/bitmap.h linux-4.14.2/fs/reiser4/plugin/space/bitmap.h
  62158. --- linux-4.14.2.orig/fs/reiser4/plugin/space/bitmap.h 1970-01-01 01:00:00.000000000 +0100
  62159. +++ linux-4.14.2/fs/reiser4/plugin/space/bitmap.h 2017-11-26 22:13:09.000000000 +0100
  62160. @@ -0,0 +1,47 @@
  62161. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  62162. +
  62163. +#if !defined (__REISER4_PLUGIN_SPACE_BITMAP_H__)
  62164. +#define __REISER4_PLUGIN_SPACE_BITMAP_H__
  62165. +
  62166. +#include "../../dformat.h"
  62167. +#include "../../block_alloc.h"
  62168. +
  62169. +#include <linux/types.h> /* for __u?? */
  62170. +#include <linux/fs.h> /* for struct super_block */
  62171. +/* EDWARD-FIXME-HANS: write something as informative as the below for every .h file lacking it. */
  62172. +/* declarations of functions implementing methods of space allocator plugin for
  62173. + bitmap based allocator. The functions themselves are in bitmap.c */
  62174. +extern int reiser4_init_allocator_bitmap(reiser4_space_allocator *,
  62175. + struct super_block *, void *);
  62176. +extern int reiser4_destroy_allocator_bitmap(reiser4_space_allocator *,
  62177. + struct super_block *);
  62178. +extern int reiser4_alloc_blocks_bitmap(reiser4_space_allocator *,
  62179. + reiser4_blocknr_hint *, int needed,
  62180. + reiser4_block_nr * start,
  62181. + reiser4_block_nr * len);
  62182. +extern int reiser4_check_blocks_bitmap(const reiser4_block_nr *,
  62183. + const reiser4_block_nr *, int);
  62184. +extern void reiser4_dealloc_blocks_bitmap(reiser4_space_allocator *,
  62185. + reiser4_block_nr,
  62186. + reiser4_block_nr);
  62187. +extern int reiser4_pre_commit_hook_bitmap(void);
  62188. +
  62189. +#define reiser4_post_commit_hook_bitmap() do{}while(0)
  62190. +#define reiser4_post_write_back_hook_bitmap() do{}while(0)
  62191. +#define reiser4_print_info_bitmap(pref, al) do{}while(0)
  62192. +
  62193. +typedef __u64 bmap_nr_t;
  62194. +typedef __u32 bmap_off_t;
  62195. +
  62196. +#endif /* __REISER4_PLUGIN_SPACE_BITMAP_H__ */
  62197. +
  62198. +/* Make Linus happy.
  62199. + Local variables:
  62200. + c-indentation-style: "K&R"
  62201. + mode-name: "LC"
  62202. + c-basic-offset: 8
  62203. + tab-width: 8
  62204. + fill-column: 120
  62205. + scroll-step: 1
  62206. + End:
  62207. +*/
  62208. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/space/Makefile linux-4.14.2/fs/reiser4/plugin/space/Makefile
  62209. --- linux-4.14.2.orig/fs/reiser4/plugin/space/Makefile 1970-01-01 01:00:00.000000000 +0100
  62210. +++ linux-4.14.2/fs/reiser4/plugin/space/Makefile 2017-11-26 22:13:09.000000000 +0100
  62211. @@ -0,0 +1,4 @@
  62212. +obj-$(CONFIG_REISER4_FS) += space_plugins.o
  62213. +
  62214. +space_plugins-objs := \
  62215. + bitmap.o
  62216. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/space/space_allocator.h linux-4.14.2/fs/reiser4/plugin/space/space_allocator.h
  62217. --- linux-4.14.2.orig/fs/reiser4/plugin/space/space_allocator.h 1970-01-01 01:00:00.000000000 +0100
  62218. +++ linux-4.14.2/fs/reiser4/plugin/space/space_allocator.h 2017-11-26 22:13:09.000000000 +0100
  62219. @@ -0,0 +1,80 @@
  62220. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  62221. +
  62222. +#ifndef __SPACE_ALLOCATOR_H__
  62223. +#define __SPACE_ALLOCATOR_H__
  62224. +
  62225. +#include "../../forward.h"
  62226. +#include "bitmap.h"
  62227. +/* NIKITA-FIXME-HANS: surely this could use a comment. Something about how bitmap is the only space allocator for now,
  62228. + * but... */
  62229. +#define DEF_SPACE_ALLOCATOR(allocator) \
  62230. + \
  62231. +static inline int sa_init_allocator (reiser4_space_allocator * al, struct super_block *s, void * opaque) \
  62232. +{ \
  62233. + return reiser4_init_allocator_##allocator (al, s, opaque); \
  62234. +} \
  62235. + \
  62236. +static inline void sa_destroy_allocator (reiser4_space_allocator *al, struct super_block *s) \
  62237. +{ \
  62238. + reiser4_destroy_allocator_##allocator (al, s); \
  62239. +} \
  62240. + \
  62241. +static inline int sa_alloc_blocks (reiser4_space_allocator *al, reiser4_blocknr_hint * hint, \
  62242. + int needed, reiser4_block_nr * start, reiser4_block_nr * len) \
  62243. +{ \
  62244. + return reiser4_alloc_blocks_##allocator (al, hint, needed, start, len); \
  62245. +} \
  62246. +static inline void sa_dealloc_blocks (reiser4_space_allocator * al, reiser4_block_nr start, reiser4_block_nr len) \
  62247. +{ \
  62248. + reiser4_dealloc_blocks_##allocator (al, start, len); \
  62249. +} \
  62250. + \
  62251. +static inline int sa_check_blocks (const reiser4_block_nr * start, const reiser4_block_nr * end, int desired) \
  62252. +{ \
  62253. + return reiser4_check_blocks_##allocator (start, end, desired); \
  62254. +} \
  62255. + \
  62256. +static inline void sa_pre_commit_hook (void) \
  62257. +{ \
  62258. + reiser4_pre_commit_hook_##allocator (); \
  62259. +} \
  62260. + \
  62261. +static inline void sa_post_commit_hook (void) \
  62262. +{ \
  62263. + reiser4_post_commit_hook_##allocator (); \
  62264. +} \
  62265. + \
  62266. +static inline void sa_post_write_back_hook (void) \
  62267. +{ \
  62268. + reiser4_post_write_back_hook_##allocator(); \
  62269. +} \
  62270. + \
  62271. +static inline void sa_print_info(const char * prefix, reiser4_space_allocator * al) \
  62272. +{ \
  62273. + reiser4_print_info_##allocator (prefix, al); \
  62274. +}
  62275. +
  62276. +DEF_SPACE_ALLOCATOR(bitmap)
  62277. +
  62278. +/* this object is part of reiser4 private in-core super block */
  62279. +struct reiser4_space_allocator {
  62280. + union {
  62281. + /* space allocators might use this pointer to reference their
  62282. + * data. */
  62283. + void *generic;
  62284. + } u;
  62285. +};
  62286. +
  62287. +/* __SPACE_ALLOCATOR_H__ */
  62288. +#endif
  62289. +
  62290. +/* Make Linus happy.
  62291. + Local variables:
  62292. + c-indentation-style: "K&R"
  62293. + mode-name: "LC"
  62294. + c-basic-offset: 8
  62295. + tab-width: 8
  62296. + fill-column: 120
  62297. + scroll-step: 1
  62298. + End:
  62299. +*/
  62300. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/tail_policy.c linux-4.14.2/fs/reiser4/plugin/tail_policy.c
  62301. --- linux-4.14.2.orig/fs/reiser4/plugin/tail_policy.c 1970-01-01 01:00:00.000000000 +0100
  62302. +++ linux-4.14.2/fs/reiser4/plugin/tail_policy.c 2017-11-26 22:13:09.000000000 +0100
  62303. @@ -0,0 +1,113 @@
  62304. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  62305. + * reiser4/README */
  62306. +
  62307. +/* Formatting policy plugins */
  62308. +
  62309. +/*
  62310. + * Formatting policy plugin is used by object plugin (of regular file) to
  62311. + * convert file between two representations.
  62312. + *
  62313. + * Currently following policies are implemented:
  62314. + * never store file in formatted nodes
  62315. + * always store file in formatted nodes
  62316. + * store file in formatted nodes if file is smaller than 4 blocks (default)
  62317. + */
  62318. +
  62319. +#include "../tree.h"
  62320. +#include "../inode.h"
  62321. +#include "../super.h"
  62322. +#include "object.h"
  62323. +#include "plugin.h"
  62324. +#include "node/node.h"
  62325. +#include "plugin_header.h"
  62326. +
  62327. +#include <linux/pagemap.h>
  62328. +#include <linux/fs.h> /* For struct inode */
  62329. +
  62330. +/**
  62331. + * have_formatting_never -
  62332. + * @inode:
  62333. + * @size:
  62334. + *
  62335. + *
  62336. + */
  62337. +/* Never store file's tail as direct item */
  62338. +/* Audited by: green(2002.06.12) */
  62339. +static int have_formatting_never(const struct inode *inode UNUSED_ARG
  62340. + /* inode to operate on */ ,
  62341. + loff_t size UNUSED_ARG/* new object size */)
  62342. +{
  62343. + return 0;
  62344. +}
  62345. +
  62346. +/* Always store file's tail as direct item */
  62347. +/* Audited by: green(2002.06.12) */
  62348. +static int
  62349. +have_formatting_always(const struct inode *inode UNUSED_ARG
  62350. + /* inode to operate on */ ,
  62351. + loff_t size UNUSED_ARG/* new object size */)
  62352. +{
  62353. + return 1;
  62354. +}
  62355. +
  62356. +/* This function makes test if we should store file denoted @inode as tails only
  62357. + or as extents only. */
  62358. +static int
  62359. +have_formatting_default(const struct inode *inode UNUSED_ARG
  62360. + /* inode to operate on */ ,
  62361. + loff_t size/* new object size */)
  62362. +{
  62363. + assert("umka-1253", inode != NULL);
  62364. +
  62365. + if (size > inode->i_sb->s_blocksize * 4)
  62366. + return 0;
  62367. +
  62368. + return 1;
  62369. +}
  62370. +
  62371. +/* tail plugins */
  62372. +formatting_plugin formatting_plugins[LAST_TAIL_FORMATTING_ID] = {
  62373. + [NEVER_TAILS_FORMATTING_ID] = {
  62374. + .h = {
  62375. + .type_id = REISER4_FORMATTING_PLUGIN_TYPE,
  62376. + .id = NEVER_TAILS_FORMATTING_ID,
  62377. + .pops = NULL,
  62378. + .label = "never",
  62379. + .desc = "Never store file's tail",
  62380. + .linkage = {NULL, NULL}
  62381. + },
  62382. + .have_tail = have_formatting_never
  62383. + },
  62384. + [ALWAYS_TAILS_FORMATTING_ID] = {
  62385. + .h = {
  62386. + .type_id = REISER4_FORMATTING_PLUGIN_TYPE,
  62387. + .id = ALWAYS_TAILS_FORMATTING_ID,
  62388. + .pops = NULL,
  62389. + .label = "always",
  62390. + .desc = "Always store file's tail",
  62391. + .linkage = {NULL, NULL}
  62392. + },
  62393. + .have_tail = have_formatting_always
  62394. + },
  62395. + [SMALL_FILE_FORMATTING_ID] = {
  62396. + .h = {
  62397. + .type_id = REISER4_FORMATTING_PLUGIN_TYPE,
  62398. + .id = SMALL_FILE_FORMATTING_ID,
  62399. + .pops = NULL,
  62400. + .label = "4blocks",
  62401. + .desc = "store files shorter than 4 blocks in tail items",
  62402. + .linkage = {NULL, NULL}
  62403. + },
  62404. + .have_tail = have_formatting_default
  62405. + }
  62406. +};
  62407. +
  62408. +/*
  62409. + * Local variables:
  62410. + * c-indentation-style: "K&R"
  62411. + * mode-name: "LC"
  62412. + * c-basic-offset: 8
  62413. + * tab-width: 8
  62414. + * fill-column: 79
  62415. + * End:
  62416. + */
  62417. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/plugin/txmod.c linux-4.14.2/fs/reiser4/plugin/txmod.c
  62418. --- linux-4.14.2.orig/fs/reiser4/plugin/txmod.c 1970-01-01 01:00:00.000000000 +0100
  62419. +++ linux-4.14.2/fs/reiser4/plugin/txmod.c 2017-11-26 22:13:09.000000000 +0100
  62420. @@ -0,0 +1,1238 @@
  62421. +#include "../forward.h"
  62422. +#include "../debug.h"
  62423. +#include "../coord.h"
  62424. +#include "../plugin/plugin.h"
  62425. +#include "../jnode.h"
  62426. +#include "../znode.h"
  62427. +#include "../block_alloc.h"
  62428. +#include "../reiser4.h"
  62429. +#include "../flush.h"
  62430. +
  62431. +/*
  62432. + * This file contains implementation of different transaction models.
  62433. + *
  62434. + * Transaction model is a high-level block allocator, which assigns block
  62435. + * numbers to dirty nodes, and, thereby, decides, how those nodes will be
  62436. + * committed.
  62437. + *
  62438. + * Every dirty node of reiser4 atom can be committed by either of the
  62439. + * following two ways:
  62440. + * 1) via journal;
  62441. + * 2) using "write-anywhere" technique.
  62442. + *
  62443. + * If the allocator doesn't change on-disk location of a node, then
  62444. + * this node will be committed using journalling technique (overwrite).
  62445. + * Otherwise, it will be comitted via write-anywhere technique (relocate):
  62446. + *
  62447. + * relocate <---- allocate --- > overwrite
  62448. + *
  62449. + * So, in our interpretation the 2 traditional "classic" strategies in
  62450. + * committing transactions (journalling and "write-anywhere") are just two
  62451. + * boundary cases: 1) when all nodes are overwritten, and 2) when all nodes
  62452. + * are relocated.
  62453. + *
  62454. + * Besides those 2 boundary cases we can implement in reiser4 the infinite
  62455. + * set of their various combinations, so that user can choose what is really
  62456. + * suitable for his needs.
  62457. + */
  62458. +
  62459. +/* jnode_make_wander_nolock <- find_flush_start_jnode (special case for znode-above-root)
  62460. + <- jnode_make_wander */
  62461. +void jnode_make_wander_nolock(jnode * node);
  62462. +
  62463. +/* jnode_make_wander <- txmod.forward_alloc_formatted */
  62464. +void jnode_make_wander(jnode * node);
  62465. +
  62466. +/* jnode_make_reloc_nolock <- znode_make_reloc
  62467. + <- unformatted_make_reloc */
  62468. +static void jnode_make_reloc_nolock(flush_queue_t * fq, jnode * node);
  62469. +
  62470. +
  62471. +
  62472. + /* Handle formatted nodes in forward context */
  62473. +
  62474. +
  62475. +/**
  62476. + * txmod.forward_alloc_formatted <- allocate_znode <- alloc_pos_and_ancestors <- jnode_flush
  62477. + * <- alloc_one_ancestor <- alloc_pos_and_ancestors <- jnode_flush
  62478. + * <- alloc_one_ancestor (recursive)
  62479. + * <- lock_parent_and_allocate_znode <- squalloc_upper_levels <- check_parents_and_squalloc_upper_levels <- squalloc_upper_levels (recursive)
  62480. + * <- handle_pos_on_formatted
  62481. + * <- handle_pos_on_formatted
  62482. + * <- handle_pos_end_of_twig
  62483. + * <- handle_pos_to_leaf
  62484. + */
  62485. +void znode_make_reloc(znode * z, flush_queue_t * fq);
  62486. +
  62487. +
  62488. + /* Handle unformatted nodes */
  62489. +
  62490. +
  62491. +/* unformatted_make_reloc <- assign_real_blocknrs <- txmod.forward_alloc_unformatted
  62492. + <- txmod.squeeze_alloc_unformatted
  62493. +*/
  62494. +void unformatted_make_reloc(jnode *node, flush_queue_t *fq);
  62495. +
  62496. +static void forward_overwrite_unformatted(flush_pos_t *flush_pos, oid_t oid,
  62497. + unsigned long index, reiser4_block_nr width);
  62498. +
  62499. +/* mark_jnode_overwrite <- forward_overwrite_unformatted <- txmod.forward_alloc_unformatted
  62500. + squeeze_overwrite_unformatted <- txmod.squeeze_alloc_unformatted
  62501. +*/
  62502. +static void mark_jnode_overwrite(struct list_head *jnodes, jnode *node);
  62503. +
  62504. +int split_allocated_extent(coord_t *coord, reiser4_block_nr pos_in_unit);
  62505. +int allocated_extent_slum_size(flush_pos_t *flush_pos, oid_t oid,
  62506. + unsigned long index, unsigned long count);
  62507. +void allocate_blocks_unformatted(reiser4_blocknr_hint *preceder,
  62508. + reiser4_block_nr wanted_count,
  62509. + reiser4_block_nr *first_allocated,
  62510. + reiser4_block_nr *allocated,
  62511. + block_stage_t block_stage);
  62512. +void assign_real_blocknrs(flush_pos_t *flush_pos, oid_t oid,
  62513. + unsigned long index, reiser4_block_nr count,
  62514. + reiser4_block_nr first);
  62515. +int convert_extent(coord_t *coord, reiser4_extent *replace);
  62516. +int put_unit_to_end(znode *node,
  62517. + const reiser4_key *key, reiser4_extent *copy_ext);
  62518. +
  62519. +/*
  62520. + * txmod.forward_alloc_unformatted <- handle_pos_on_twig
  62521. + * txmod.squeeze_alloc_unformatted <- squeeze_right_twig
  62522. + */
  62523. +
  62524. +/* Common functions */
  62525. +
  62526. +/**
  62527. + * Mark node JNODE_OVRWR and put it on atom->overwrite_nodes list.
  62528. + * Atom lock and jnode lock should be taken before calling this
  62529. + * function.
  62530. + */
  62531. +void jnode_make_wander_nolock(jnode * node)
  62532. +{
  62533. + txn_atom *atom;
  62534. +
  62535. + assert("nikita-2432", !JF_ISSET(node, JNODE_RELOC));
  62536. + assert("nikita-3153", JF_ISSET(node, JNODE_DIRTY));
  62537. + assert("zam-897", !JF_ISSET(node, JNODE_FLUSH_QUEUED));
  62538. + assert("nikita-3367", !reiser4_blocknr_is_fake(jnode_get_block(node)));
  62539. +
  62540. + atom = node->atom;
  62541. +
  62542. + assert("zam-895", atom != NULL);
  62543. + assert("zam-894", atom_is_protected(atom));
  62544. +
  62545. + JF_SET(node, JNODE_OVRWR);
  62546. + /* move node to atom's overwrite list */
  62547. + list_move_tail(&node->capture_link, ATOM_OVRWR_LIST(atom));
  62548. + ON_DEBUG(count_jnode(atom, node, DIRTY_LIST, OVRWR_LIST, 1));
  62549. +}
  62550. +
  62551. +/*
  62552. + * Same as jnode_make_wander_nolock, but all necessary locks
  62553. + * are taken inside this function.
  62554. + */
  62555. +void jnode_make_wander(jnode * node)
  62556. +{
  62557. + txn_atom *atom;
  62558. +
  62559. + spin_lock_jnode(node);
  62560. + atom = jnode_get_atom(node);
  62561. + assert("zam-913", atom != NULL);
  62562. + assert("zam-914", !JF_ISSET(node, JNODE_RELOC));
  62563. +
  62564. + jnode_make_wander_nolock(node);
  62565. + spin_unlock_atom(atom);
  62566. + spin_unlock_jnode(node);
  62567. +}
  62568. +
  62569. +/* this just sets RELOC bit */
  62570. +static void jnode_make_reloc_nolock(flush_queue_t * fq, jnode * node)
  62571. +{
  62572. + assert_spin_locked(&(node->guard));
  62573. + assert("zam-916", JF_ISSET(node, JNODE_DIRTY));
  62574. + assert("zam-917", !JF_ISSET(node, JNODE_RELOC));
  62575. + assert("zam-918", !JF_ISSET(node, JNODE_OVRWR));
  62576. + assert("zam-920", !JF_ISSET(node, JNODE_FLUSH_QUEUED));
  62577. + assert("nikita-3367", !reiser4_blocknr_is_fake(jnode_get_block(node)));
  62578. + jnode_set_reloc(node);
  62579. +}
  62580. +
  62581. +/*
  62582. + * Mark znode RELOC and put it on flush queue
  62583. + */
  62584. +void znode_make_reloc(znode * z, flush_queue_t * fq)
  62585. +{
  62586. + jnode *node;
  62587. + txn_atom *atom;
  62588. +
  62589. + node = ZJNODE(z);
  62590. + spin_lock_jnode(node);
  62591. +
  62592. + atom = jnode_get_atom(node);
  62593. + assert("zam-919", atom != NULL);
  62594. +
  62595. + jnode_make_reloc_nolock(fq, node);
  62596. + queue_jnode(fq, node);
  62597. +
  62598. + spin_unlock_atom(atom);
  62599. + spin_unlock_jnode(node);
  62600. +}
  62601. +
  62602. +/* Mark unformatted node RELOC and put it on flush queue */
  62603. +void unformatted_make_reloc(jnode *node, flush_queue_t *fq)
  62604. +{
  62605. + assert("vs-1479", jnode_is_unformatted(node));
  62606. +
  62607. + jnode_make_reloc_nolock(fq, node);
  62608. + queue_jnode(fq, node);
  62609. +}
  62610. +
  62611. +/**
  62612. + * mark_jnode_overwrite - assign node to overwrite set
  62613. + * @jnodes: overwrite set list head
  62614. + * @node: jnode to belong to overwrite set
  62615. + *
  62616. + * Sets OVRWR jnode state bit and puts @node to the end of list head @jnodes
  62617. + * which is an accumulator for nodes before they get to overwrite set list of
  62618. + * atom.
  62619. + */
  62620. +static void mark_jnode_overwrite(struct list_head *jnodes, jnode *node)
  62621. +{
  62622. + spin_lock_jnode(node);
  62623. +
  62624. + assert("zam-917", !JF_ISSET(node, JNODE_RELOC));
  62625. + assert("zam-918", !JF_ISSET(node, JNODE_OVRWR));
  62626. +
  62627. + JF_SET(node, JNODE_OVRWR);
  62628. + list_move_tail(&node->capture_link, jnodes);
  62629. + ON_DEBUG(count_jnode(node->atom, node, DIRTY_LIST, OVRWR_LIST, 0));
  62630. +
  62631. + spin_unlock_jnode(node);
  62632. +}
  62633. +
  62634. +static int forward_relocate_unformatted(flush_pos_t *flush_pos,
  62635. + reiser4_extent *ext,
  62636. + extent_state state,
  62637. + oid_t oid, __u64 index,
  62638. + __u64 width, int *exit)
  62639. +{
  62640. + int result;
  62641. + coord_t *coord;
  62642. + reiser4_extent replace_ext;
  62643. + reiser4_block_nr protected;
  62644. + reiser4_block_nr start;
  62645. + reiser4_block_nr first_allocated;
  62646. + __u64 allocated;
  62647. + block_stage_t block_stage;
  62648. +
  62649. + *exit = 0;
  62650. + coord = &flush_pos->coord;
  62651. + start = extent_get_start(ext);
  62652. +
  62653. + if (flush_pos->pos_in_unit) {
  62654. + /*
  62655. + * split extent unit into two ones
  62656. + */
  62657. + result = split_allocated_extent(coord,
  62658. + flush_pos->pos_in_unit);
  62659. + flush_pos->pos_in_unit = 0;
  62660. + *exit = 1;
  62661. + return result;
  62662. + }
  62663. + /*
  62664. + * limit number of nodes to allocate
  62665. + */
  62666. + if (flush_pos->nr_to_write < width)
  62667. + width = flush_pos->nr_to_write;
  62668. +
  62669. + if (state == ALLOCATED_EXTENT) {
  62670. + /*
  62671. + * all protected nodes are not flushprepped, therefore
  62672. + * they are counted as flush_reserved
  62673. + */
  62674. + block_stage = BLOCK_FLUSH_RESERVED;
  62675. + protected = allocated_extent_slum_size(flush_pos, oid,
  62676. + index, width);
  62677. + if (protected == 0) {
  62678. + flush_pos->state = POS_INVALID;
  62679. + flush_pos->pos_in_unit = 0;
  62680. + *exit = 1;
  62681. + return 0;
  62682. + }
  62683. + } else {
  62684. + block_stage = BLOCK_UNALLOCATED;
  62685. + protected = width;
  62686. + }
  62687. + /*
  62688. + * look at previous unit if possible. If it is allocated, make
  62689. + * preceder more precise
  62690. + */
  62691. + if (coord->unit_pos &&
  62692. + (state_of_extent(ext - 1) == ALLOCATED_EXTENT))
  62693. + reiser4_pos_hint(flush_pos)->blk =
  62694. + extent_get_start(ext - 1) +
  62695. + extent_get_width(ext - 1);
  62696. + /*
  62697. + * allocate new block numbers for protected nodes
  62698. + */
  62699. + allocate_blocks_unformatted(reiser4_pos_hint(flush_pos),
  62700. + protected,
  62701. + &first_allocated, &allocated,
  62702. + block_stage);
  62703. +
  62704. + if (state == ALLOCATED_EXTENT)
  62705. + /*
  62706. + * on relocating - free nodes which are going to be
  62707. + * relocated
  62708. + */
  62709. + reiser4_dealloc_blocks(&start, &allocated, 0, BA_DEFER);
  62710. +
  62711. + /* assign new block numbers to protected nodes */
  62712. + assign_real_blocknrs(flush_pos, oid, index, allocated, first_allocated);
  62713. +
  62714. + /* prepare extent which will replace current one */
  62715. + reiser4_set_extent(&replace_ext, first_allocated, allocated);
  62716. +
  62717. + /* adjust extent item */
  62718. + result = convert_extent(coord, &replace_ext);
  62719. + if (result != 0 && result != -ENOMEM) {
  62720. + warning("vs-1461",
  62721. + "Failed to allocate extent. Should not happen\n");
  62722. + *exit = 1;
  62723. + return result;
  62724. + }
  62725. + /*
  62726. + * break flush: we prepared for flushing as many blocks as we
  62727. + * were asked for
  62728. + */
  62729. + if (flush_pos->nr_to_write == allocated)
  62730. + flush_pos->state = POS_INVALID;
  62731. + return 0;
  62732. +}
  62733. +
  62734. +static squeeze_result squeeze_relocate_unformatted(znode *left,
  62735. + const coord_t *coord,
  62736. + flush_pos_t *flush_pos,
  62737. + reiser4_key *key,
  62738. + reiser4_key *stop_key)
  62739. +{
  62740. + int result;
  62741. + reiser4_extent *ext;
  62742. + __u64 index;
  62743. + __u64 width;
  62744. + reiser4_block_nr start;
  62745. + extent_state state;
  62746. + oid_t oid;
  62747. + reiser4_block_nr first_allocated;
  62748. + __u64 allocated;
  62749. + __u64 protected;
  62750. + reiser4_extent copy_extent;
  62751. + block_stage_t block_stage;
  62752. +
  62753. + assert("edward-1610", flush_pos->pos_in_unit == 0);
  62754. + assert("edward-1611", coord_is_leftmost_unit(coord));
  62755. + assert("edward-1612", item_is_extent(coord));
  62756. +
  62757. + ext = extent_by_coord(coord);
  62758. + index = extent_unit_index(coord);
  62759. + start = extent_get_start(ext);
  62760. + width = extent_get_width(ext);
  62761. + state = state_of_extent(ext);
  62762. + unit_key_by_coord(coord, key);
  62763. + oid = get_key_objectid(key);
  62764. +
  62765. + assert("edward-1613", state != HOLE_EXTENT);
  62766. +
  62767. + if (state == ALLOCATED_EXTENT) {
  62768. + /*
  62769. + * all protected nodes are not flushprepped,
  62770. + * therefore they are counted as flush_reserved
  62771. + */
  62772. + block_stage = BLOCK_FLUSH_RESERVED;
  62773. + protected = allocated_extent_slum_size(flush_pos, oid,
  62774. + index, width);
  62775. + if (protected == 0) {
  62776. + flush_pos->state = POS_INVALID;
  62777. + flush_pos->pos_in_unit = 0;
  62778. + return 0;
  62779. + }
  62780. + } else {
  62781. + block_stage = BLOCK_UNALLOCATED;
  62782. + protected = width;
  62783. + }
  62784. + /*
  62785. + * look at previous unit if possible. If it is allocated, make
  62786. + * preceder more precise
  62787. + */
  62788. + if (coord->unit_pos &&
  62789. + (state_of_extent(ext - 1) == ALLOCATED_EXTENT))
  62790. + reiser4_pos_hint(flush_pos)->blk =
  62791. + extent_get_start(ext - 1) +
  62792. + extent_get_width(ext - 1);
  62793. + /*
  62794. + * allocate new block numbers for protected nodes
  62795. + */
  62796. + allocate_blocks_unformatted(reiser4_pos_hint(flush_pos),
  62797. + protected,
  62798. + &first_allocated, &allocated,
  62799. + block_stage);
  62800. + /*
  62801. + * prepare extent which will be copied to left
  62802. + */
  62803. + reiser4_set_extent(&copy_extent, first_allocated, allocated);
  62804. + result = put_unit_to_end(left, key, &copy_extent);
  62805. +
  62806. + if (result == -E_NODE_FULL) {
  62807. + /*
  62808. + * free blocks which were just allocated
  62809. + */
  62810. + reiser4_dealloc_blocks(&first_allocated, &allocated,
  62811. + (state == ALLOCATED_EXTENT)
  62812. + ? BLOCK_FLUSH_RESERVED
  62813. + : BLOCK_UNALLOCATED,
  62814. + BA_PERMANENT);
  62815. + /*
  62816. + * rewind the preceder
  62817. + */
  62818. + flush_pos->preceder.blk = first_allocated;
  62819. + check_preceder(flush_pos->preceder.blk);
  62820. + return SQUEEZE_TARGET_FULL;
  62821. + }
  62822. + if (state == ALLOCATED_EXTENT) {
  62823. + /*
  62824. + * free nodes which were relocated
  62825. + */
  62826. + reiser4_dealloc_blocks(&start, &allocated, 0, BA_DEFER);
  62827. + }
  62828. + /*
  62829. + * assign new block numbers to protected nodes
  62830. + */
  62831. + assign_real_blocknrs(flush_pos, oid, index, allocated,
  62832. + first_allocated);
  62833. + set_key_offset(key,
  62834. + get_key_offset(key) +
  62835. + (allocated << current_blocksize_bits));
  62836. + return SQUEEZE_CONTINUE;
  62837. +}
  62838. +
  62839. +/**
  62840. + * forward_overwrite_unformatted - put bunch of jnodes to overwrite set
  62841. + * @flush_pos: flush position
  62842. + * @oid: objectid of file jnodes belong to
  62843. + * @index: starting index
  62844. + * @width: extent width
  62845. + *
  62846. + * Puts nodes of one extent (file objectid @oid, extent width @width) to atom's
  62847. + * overwrite set. Starting from the one with index @index. If end of slum is
  62848. + * detected (node is not found or flushprepped) - stop iterating and set flush
  62849. + * position's state to POS_INVALID.
  62850. + */
  62851. +static void forward_overwrite_unformatted(flush_pos_t *flush_pos, oid_t oid,
  62852. + unsigned long index,
  62853. + reiser4_block_nr width)
  62854. +{
  62855. + unsigned long i;
  62856. + reiser4_tree *tree;
  62857. + jnode *node;
  62858. + txn_atom *atom;
  62859. + LIST_HEAD(jnodes);
  62860. +
  62861. + tree = current_tree;
  62862. +
  62863. + atom = atom_locked_by_fq(reiser4_pos_fq(flush_pos));
  62864. + assert("vs-1478", atom);
  62865. +
  62866. + for (i = flush_pos->pos_in_unit; i < width; i++, index++) {
  62867. + node = jlookup(tree, oid, index);
  62868. + if (!node) {
  62869. + flush_pos->state = POS_INVALID;
  62870. + break;
  62871. + }
  62872. + if (jnode_check_flushprepped(node)) {
  62873. + flush_pos->state = POS_INVALID;
  62874. + atomic_dec(&node->x_count);
  62875. + break;
  62876. + }
  62877. + if (node->atom != atom) {
  62878. + flush_pos->state = POS_INVALID;
  62879. + atomic_dec(&node->x_count);
  62880. + break;
  62881. + }
  62882. + mark_jnode_overwrite(&jnodes, node);
  62883. + atomic_dec(&node->x_count);
  62884. + }
  62885. +
  62886. + list_splice_init(&jnodes, ATOM_OVRWR_LIST(atom)->prev);
  62887. + spin_unlock_atom(atom);
  62888. +}
  62889. +
  62890. +static squeeze_result squeeze_overwrite_unformatted(znode *left,
  62891. + const coord_t *coord,
  62892. + flush_pos_t *flush_pos,
  62893. + reiser4_key *key,
  62894. + reiser4_key *stop_key)
  62895. +{
  62896. + int result;
  62897. + reiser4_extent *ext;
  62898. + __u64 index;
  62899. + __u64 width;
  62900. + reiser4_block_nr start;
  62901. + extent_state state;
  62902. + oid_t oid;
  62903. + reiser4_extent copy_extent;
  62904. +
  62905. + assert("vs-1457", flush_pos->pos_in_unit == 0);
  62906. + assert("vs-1467", coord_is_leftmost_unit(coord));
  62907. + assert("vs-1467", item_is_extent(coord));
  62908. +
  62909. + ext = extent_by_coord(coord);
  62910. + index = extent_unit_index(coord);
  62911. + start = extent_get_start(ext);
  62912. + width = extent_get_width(ext);
  62913. + state = state_of_extent(ext);
  62914. + unit_key_by_coord(coord, key);
  62915. + oid = get_key_objectid(key);
  62916. + /*
  62917. + * try to copy unit as it is to left neighbor
  62918. + * and make all first not flushprepped nodes
  62919. + * overwrite nodes
  62920. + */
  62921. + reiser4_set_extent(&copy_extent, start, width);
  62922. +
  62923. + result = put_unit_to_end(left, key, &copy_extent);
  62924. + if (result == -E_NODE_FULL)
  62925. + return SQUEEZE_TARGET_FULL;
  62926. +
  62927. + if (state != HOLE_EXTENT)
  62928. + forward_overwrite_unformatted(flush_pos, oid, index, width);
  62929. +
  62930. + set_key_offset(key,
  62931. + get_key_offset(key) + (width << current_blocksize_bits));
  62932. + return SQUEEZE_CONTINUE;
  62933. +}
  62934. +
  62935. +/************************ HYBRID TRANSACTION MODEL ****************************/
  62936. +
  62937. +/**
  62938. + * This is the default transaction model suggested by Josh MacDonald and
  62939. + * Hans Reiser. This was the single hardcoded transaction mode till Feb 2014
  62940. + * when Edward introduced pure Journalling and pure Write-Anywhere.
  62941. + *
  62942. + * In this mode all relocate-overwrite decisions are result of attempts to
  62943. + * defragment atom's locality.
  62944. + */
  62945. +
  62946. +/* REVERSE PARENT-FIRST RELOCATION POLICIES */
  62947. +
  62948. +/* This implements the is-it-close-enough-to-its-preceder? test for relocation
  62949. + in the reverse parent-first relocate context. Here all we know is the
  62950. + preceder and the block number. Since we are going in reverse, the preceder
  62951. + may still be relocated as well, so we can't ask the block allocator "is there
  62952. + a closer block available to relocate?" here. In the _forward_ parent-first
  62953. + relocate context (not here) we actually call the block allocator to try and
  62954. + find a closer location.
  62955. +*/
  62956. +static int reverse_try_defragment_if_close(const reiser4_block_nr * pblk,
  62957. + const reiser4_block_nr * nblk)
  62958. +{
  62959. + reiser4_block_nr dist;
  62960. +
  62961. + assert("jmacd-7710", *pblk != 0 && *nblk != 0);
  62962. + assert("jmacd-7711", !reiser4_blocknr_is_fake(pblk));
  62963. + assert("jmacd-7712", !reiser4_blocknr_is_fake(nblk));
  62964. +
  62965. + /* Distance is the absolute value. */
  62966. + dist = (*pblk > *nblk) ? (*pblk - *nblk) : (*nblk - *pblk);
  62967. +
  62968. + /* If the block is less than FLUSH_RELOCATE_DISTANCE blocks away from
  62969. + its preceder block, do not relocate. */
  62970. + if (dist <= get_current_super_private()->flush.relocate_distance)
  62971. + return 0;
  62972. +
  62973. + return 1;
  62974. +}
  62975. +
  62976. +/**
  62977. + * This function is a predicate that tests for relocation. Always called in the
  62978. + * reverse-parent-first context, when we are asking whether the current node
  62979. + * should be relocated in order to expand the flush by dirtying the parent level
  62980. + * (and thus proceeding to flush that level). When traversing in the forward
  62981. + * parent-first direction (not here), relocation decisions are handled in two
  62982. + * places: allocate_znode() and extent_needs_allocation().
  62983. + */
  62984. +static int reverse_alloc_formatted_hybrid(jnode * node,
  62985. + const coord_t *parent_coord,
  62986. + flush_pos_t *pos)
  62987. +{
  62988. + reiser4_block_nr pblk = 0;
  62989. + reiser4_block_nr nblk = 0;
  62990. +
  62991. + assert("jmacd-8989", !jnode_is_root(node));
  62992. + /*
  62993. + * This function is called only from the
  62994. + * reverse_relocate_check_dirty_parent() and only if the parent
  62995. + * node is clean. This implies that the parent has the real (i.e., not
  62996. + * fake) block number, and, so does the child, because otherwise the
  62997. + * parent would be dirty.
  62998. + */
  62999. +
  63000. + /* New nodes are treated as if they are being relocated. */
  63001. + if (JF_ISSET(node, JNODE_CREATED) ||
  63002. + (pos->leaf_relocate && jnode_get_level(node) == LEAF_LEVEL))
  63003. + return 1;
  63004. +
  63005. + /* Find the preceder. FIXME(B): When the child is an unformatted,
  63006. + previously existing node, the coord may be leftmost even though the
  63007. + child is not the parent-first preceder of the parent. If the first
  63008. + dirty node appears somewhere in the middle of the first extent unit,
  63009. + this preceder calculation is wrong.
  63010. + Needs more logic in here. */
  63011. + if (coord_is_leftmost_unit(parent_coord)) {
  63012. + pblk = *znode_get_block(parent_coord->node);
  63013. + } else {
  63014. + pblk = pos->preceder.blk;
  63015. + }
  63016. + check_preceder(pblk);
  63017. +
  63018. + /* If (pblk == 0) then the preceder isn't allocated or isn't known:
  63019. + relocate. */
  63020. + if (pblk == 0)
  63021. + return 1;
  63022. +
  63023. + nblk = *jnode_get_block(node);
  63024. +
  63025. + if (reiser4_blocknr_is_fake(&nblk))
  63026. + /* child is unallocated, mark parent dirty */
  63027. + return 1;
  63028. +
  63029. + return reverse_try_defragment_if_close(&pblk, &nblk);
  63030. +}
  63031. +
  63032. +/**
  63033. + * A subroutine of forward_alloc_formatted_hybrid(), this is called first to see
  63034. + * if there is a close position to relocate to. It may return ENOSPC if there is
  63035. + * no close position. If there is no close position it may not relocate. This
  63036. + * takes care of updating the parent node with the relocated block address.
  63037. + *
  63038. + * was allocate_znode_update()
  63039. + */
  63040. +static int forward_try_defragment_locality(znode * node,
  63041. + const coord_t *parent_coord,
  63042. + flush_pos_t *pos)
  63043. +{
  63044. + int ret;
  63045. + reiser4_block_nr blk;
  63046. + lock_handle uber_lock;
  63047. + int flush_reserved_used = 0;
  63048. + int grabbed;
  63049. + reiser4_context *ctx;
  63050. + reiser4_super_info_data *sbinfo;
  63051. +
  63052. + init_lh(&uber_lock);
  63053. +
  63054. + ctx = get_current_context();
  63055. + sbinfo = get_super_private(ctx->super);
  63056. +
  63057. + grabbed = ctx->grabbed_blocks;
  63058. +
  63059. + ret = zload(node);
  63060. + if (ret)
  63061. + return ret;
  63062. +
  63063. + if (ZF_ISSET(node, JNODE_CREATED)) {
  63064. + assert("zam-816", reiser4_blocknr_is_fake(znode_get_block(node)));
  63065. + pos->preceder.block_stage = BLOCK_UNALLOCATED;
  63066. + } else {
  63067. + pos->preceder.block_stage = BLOCK_GRABBED;
  63068. +
  63069. + /* The disk space for relocating the @node is already reserved
  63070. + * in "flush reserved" counter if @node is leaf, otherwise we
  63071. + * grab space using BA_RESERVED (means grab space from whole
  63072. + * disk not from only 95%). */
  63073. + if (znode_get_level(node) == LEAF_LEVEL) {
  63074. + /*
  63075. + * earlier (during do_jnode_make_dirty()) we decided
  63076. + * that @node can possibly go into overwrite set and
  63077. + * reserved block for its wandering location.
  63078. + */
  63079. + txn_atom *atom = get_current_atom_locked();
  63080. + assert("nikita-3449",
  63081. + ZF_ISSET(node, JNODE_FLUSH_RESERVED));
  63082. + flush_reserved2grabbed(atom, (__u64) 1);
  63083. + spin_unlock_atom(atom);
  63084. + /*
  63085. + * we are trying to move node into relocate
  63086. + * set. Allocation of relocated position "uses"
  63087. + * reserved block.
  63088. + */
  63089. + ZF_CLR(node, JNODE_FLUSH_RESERVED);
  63090. + flush_reserved_used = 1;
  63091. + } else {
  63092. + ret = reiser4_grab_space_force((__u64) 1, BA_RESERVED);
  63093. + if (ret != 0)
  63094. + goto exit;
  63095. + }
  63096. + }
  63097. +
  63098. + /* We may do not use 5% of reserved disk space here and flush will not
  63099. + pack tightly. */
  63100. + ret = reiser4_alloc_block(&pos->preceder, &blk,
  63101. + BA_FORMATTED | BA_PERMANENT);
  63102. + if (ret)
  63103. + goto exit;
  63104. +
  63105. + if (!ZF_ISSET(node, JNODE_CREATED) &&
  63106. + (ret = reiser4_dealloc_block(znode_get_block(node), 0,
  63107. + BA_DEFER | BA_FORMATTED)))
  63108. + goto exit;
  63109. +
  63110. + if (likely(!znode_is_root(node))) {
  63111. + item_plugin *iplug;
  63112. +
  63113. + iplug = item_plugin_by_coord(parent_coord);
  63114. + assert("nikita-2954", iplug->f.update != NULL);
  63115. + iplug->f.update(parent_coord, &blk);
  63116. +
  63117. + znode_make_dirty(parent_coord->node);
  63118. +
  63119. + } else {
  63120. + reiser4_tree *tree = znode_get_tree(node);
  63121. + znode *uber;
  63122. +
  63123. + /* We take a longterm lock on the fake node in order to change
  63124. + the root block number. This may cause atom fusion. */
  63125. + ret = get_uber_znode(tree, ZNODE_WRITE_LOCK, ZNODE_LOCK_HIPRI,
  63126. + &uber_lock);
  63127. + /* The fake node cannot be deleted, and we must have priority
  63128. + here, and may not be confused with ENOSPC. */
  63129. + assert("jmacd-74412",
  63130. + ret != -EINVAL && ret != -E_DEADLOCK && ret != -ENOSPC);
  63131. +
  63132. + if (ret)
  63133. + goto exit;
  63134. +
  63135. + uber = uber_lock.node;
  63136. +
  63137. + write_lock_tree(tree);
  63138. + tree->root_block = blk;
  63139. + write_unlock_tree(tree);
  63140. +
  63141. + znode_make_dirty(uber);
  63142. + }
  63143. + ret = znode_rehash(node, &blk);
  63144. +exit:
  63145. + if (ret) {
  63146. + /* Get flush reserved block back if something fails, because
  63147. + * callers assume that on error block wasn't relocated and its
  63148. + * flush reserved block wasn't used. */
  63149. + if (flush_reserved_used) {
  63150. + /*
  63151. + * ok, we failed to move node into relocate
  63152. + * set. Restore status quo.
  63153. + */
  63154. + grabbed2flush_reserved((__u64) 1);
  63155. + ZF_SET(node, JNODE_FLUSH_RESERVED);
  63156. + }
  63157. + }
  63158. + zrelse(node);
  63159. + done_lh(&uber_lock);
  63160. + grabbed2free_mark(grabbed);
  63161. + return ret;
  63162. +}
  63163. +
  63164. +/*
  63165. + * Make the final relocate/wander decision during
  63166. + * forward parent-first squalloc for a formatted node
  63167. + */
  63168. +static int forward_alloc_formatted_hybrid(znode * node,
  63169. + const coord_t *parent_coord,
  63170. + flush_pos_t *pos)
  63171. +{
  63172. + int ret;
  63173. + reiser4_super_info_data *sbinfo = get_current_super_private();
  63174. + /**
  63175. + * FIXME(D): We have the node write-locked and should have checked for !
  63176. + * allocated() somewhere before reaching this point, but there can be a
  63177. + * race, so this assertion is bogus.
  63178. + */
  63179. + assert("edward-1614", znode_is_loaded(node));
  63180. + assert("jmacd-7987", !jnode_check_flushprepped(ZJNODE(node)));
  63181. + assert("jmacd-7988", znode_is_write_locked(node));
  63182. + assert("jmacd-7989", coord_is_invalid(parent_coord)
  63183. + || znode_is_write_locked(parent_coord->node));
  63184. +
  63185. + if (ZF_ISSET(node, JNODE_REPACK) || ZF_ISSET(node, JNODE_CREATED) ||
  63186. + znode_is_root(node) ||
  63187. + /*
  63188. + * We have enough nodes to relocate no matter what.
  63189. + */
  63190. + (pos->leaf_relocate != 0 && znode_get_level(node) == LEAF_LEVEL)) {
  63191. + /*
  63192. + * No need to decide with new nodes, they are treated the same
  63193. + * as relocate. If the root node is dirty, relocate.
  63194. + */
  63195. + if (pos->preceder.blk == 0) {
  63196. + /*
  63197. + * preceder is unknown and we have decided to relocate
  63198. + * node -- using of default value for search start is
  63199. + * better than search from block #0.
  63200. + */
  63201. + get_blocknr_hint_default(&pos->preceder.blk);
  63202. + check_preceder(pos->preceder.blk);
  63203. + }
  63204. + goto best_reloc;
  63205. +
  63206. + } else if (pos->preceder.blk == 0) {
  63207. + /* If we don't know the preceder, leave it where it is. */
  63208. + jnode_make_wander(ZJNODE(node));
  63209. + } else {
  63210. + /* Make a decision based on block distance. */
  63211. + reiser4_block_nr dist;
  63212. + reiser4_block_nr nblk = *znode_get_block(node);
  63213. +
  63214. + assert("jmacd-6172", !reiser4_blocknr_is_fake(&nblk));
  63215. + assert("jmacd-6173", !reiser4_blocknr_is_fake(&pos->preceder.blk));
  63216. + assert("jmacd-6174", pos->preceder.blk != 0);
  63217. +
  63218. + if (pos->preceder.blk == nblk - 1) {
  63219. + /* Ideal. */
  63220. + jnode_make_wander(ZJNODE(node));
  63221. + } else {
  63222. +
  63223. + dist =
  63224. + (nblk <
  63225. + pos->preceder.blk) ? (pos->preceder.blk -
  63226. + nblk) : (nblk -
  63227. + pos->preceder.blk);
  63228. +
  63229. + /* See if we can find a closer block
  63230. + (forward direction only). */
  63231. + pos->preceder.max_dist =
  63232. + min((reiser4_block_nr) sbinfo->flush.
  63233. + relocate_distance, dist);
  63234. + pos->preceder.level = znode_get_level(node);
  63235. +
  63236. + ret = forward_try_defragment_locality(node,
  63237. + parent_coord,
  63238. + pos);
  63239. + pos->preceder.max_dist = 0;
  63240. +
  63241. + if (ret && (ret != -ENOSPC))
  63242. + return ret;
  63243. +
  63244. + if (ret == 0) {
  63245. + /* Got a better allocation. */
  63246. + znode_make_reloc(node, pos->fq);
  63247. + } else if (dist < sbinfo->flush.relocate_distance) {
  63248. + /* The present allocation is good enough. */
  63249. + jnode_make_wander(ZJNODE(node));
  63250. + } else {
  63251. + /*
  63252. + * Otherwise, try to relocate to the best
  63253. + * position.
  63254. + */
  63255. + best_reloc:
  63256. + ret = forward_try_defragment_locality(node,
  63257. + parent_coord,
  63258. + pos);
  63259. + if (ret != 0)
  63260. + return ret;
  63261. + /*
  63262. + * set JNODE_RELOC bit _after_ node gets
  63263. + * allocated
  63264. + */
  63265. + znode_make_reloc(node, pos->fq);
  63266. + }
  63267. + }
  63268. + }
  63269. + /*
  63270. + * This is the new preceder
  63271. + */
  63272. + pos->preceder.blk = *znode_get_block(node);
  63273. + check_preceder(pos->preceder.blk);
  63274. + pos->alloc_cnt += 1;
  63275. +
  63276. + assert("jmacd-4277", !reiser4_blocknr_is_fake(&pos->preceder.blk));
  63277. +
  63278. + return 0;
  63279. +}
  63280. +
  63281. +static int forward_alloc_unformatted_hybrid(flush_pos_t *flush_pos)
  63282. +{
  63283. + coord_t *coord;
  63284. + reiser4_extent *ext;
  63285. + oid_t oid;
  63286. + __u64 index;
  63287. + __u64 width;
  63288. + extent_state state;
  63289. + reiser4_key key;
  63290. +
  63291. + assert("vs-1468", flush_pos->state == POS_ON_EPOINT);
  63292. + assert("vs-1469", coord_is_existing_unit(&flush_pos->coord)
  63293. + && item_is_extent(&flush_pos->coord));
  63294. +
  63295. + coord = &flush_pos->coord;
  63296. +
  63297. + ext = extent_by_coord(coord);
  63298. + state = state_of_extent(ext);
  63299. + if (state == HOLE_EXTENT) {
  63300. + flush_pos->state = POS_INVALID;
  63301. + return 0;
  63302. + }
  63303. + item_key_by_coord(coord, &key);
  63304. + oid = get_key_objectid(&key);
  63305. + index = extent_unit_index(coord) + flush_pos->pos_in_unit;
  63306. + width = extent_get_width(ext);
  63307. +
  63308. + assert("vs-1457", width > flush_pos->pos_in_unit);
  63309. +
  63310. + if (flush_pos->leaf_relocate || state == UNALLOCATED_EXTENT) {
  63311. + int exit;
  63312. + int result;
  63313. + result = forward_relocate_unformatted(flush_pos, ext, state,
  63314. + oid,
  63315. + index, width, &exit);
  63316. + if (exit)
  63317. + return result;
  63318. + } else
  63319. + forward_overwrite_unformatted(flush_pos, oid, index, width);
  63320. +
  63321. + flush_pos->pos_in_unit = 0;
  63322. + return 0;
  63323. +}
  63324. +
  63325. +static squeeze_result squeeze_alloc_unformatted_hybrid(znode *left,
  63326. + const coord_t *coord,
  63327. + flush_pos_t *flush_pos,
  63328. + reiser4_key *stop_key)
  63329. +{
  63330. + squeeze_result ret;
  63331. + reiser4_key key;
  63332. + reiser4_extent *ext;
  63333. + extent_state state;
  63334. +
  63335. + ext = extent_by_coord(coord);
  63336. + state = state_of_extent(ext);
  63337. +
  63338. + if ((flush_pos->leaf_relocate && state == ALLOCATED_EXTENT) ||
  63339. + (state == UNALLOCATED_EXTENT))
  63340. + /*
  63341. + * relocate
  63342. + */
  63343. + ret = squeeze_relocate_unformatted(left, coord,
  63344. + flush_pos, &key, stop_key);
  63345. + else
  63346. + /*
  63347. + * (state == ALLOCATED_EXTENT && !flush_pos->leaf_relocate) ||
  63348. + * state == HOLE_EXTENT - overwrite
  63349. + */
  63350. + ret = squeeze_overwrite_unformatted(left, coord,
  63351. + flush_pos, &key, stop_key);
  63352. + if (ret == SQUEEZE_CONTINUE)
  63353. + *stop_key = key;
  63354. + return ret;
  63355. +}
  63356. +
  63357. +/*********************** JOURNAL TRANSACTION MODEL ****************************/
  63358. +
  63359. +static int forward_alloc_formatted_journal(znode * node,
  63360. + const coord_t *parent_coord,
  63361. + flush_pos_t *pos)
  63362. +{
  63363. + int ret;
  63364. +
  63365. + if (ZF_ISSET(node, JNODE_CREATED)) {
  63366. + if (pos->preceder.blk == 0) {
  63367. + /*
  63368. + * preceder is unknown and we have decided to relocate
  63369. + * node -- using of default value for search start is
  63370. + * better than search from block #0.
  63371. + */
  63372. + get_blocknr_hint_default(&pos->preceder.blk);
  63373. + check_preceder(pos->preceder.blk);
  63374. + }
  63375. + ret = forward_try_defragment_locality(node,
  63376. + parent_coord,
  63377. + pos);
  63378. + if (ret != 0) {
  63379. + warning("edward-1615",
  63380. + "forward defrag failed (%d)", ret);
  63381. + return ret;
  63382. + }
  63383. + /*
  63384. + * set JNODE_RELOC bit _after_ node gets
  63385. + * allocated
  63386. + */
  63387. + znode_make_reloc(node, pos->fq);
  63388. + }
  63389. + else
  63390. + jnode_make_wander(ZJNODE(node));
  63391. + /*
  63392. + * This is the new preceder
  63393. + */
  63394. + pos->preceder.blk = *znode_get_block(node);
  63395. + check_preceder(pos->preceder.blk);
  63396. + pos->alloc_cnt += 1;
  63397. +
  63398. + assert("edward-1616", !reiser4_blocknr_is_fake(&pos->preceder.blk));
  63399. + return 0;
  63400. +}
  63401. +
  63402. +static int forward_alloc_unformatted_journal(flush_pos_t *flush_pos)
  63403. +{
  63404. +
  63405. + coord_t *coord;
  63406. + reiser4_extent *ext;
  63407. + oid_t oid;
  63408. + __u64 index;
  63409. + __u64 width;
  63410. + extent_state state;
  63411. + reiser4_key key;
  63412. +
  63413. + assert("edward-1617", flush_pos->state == POS_ON_EPOINT);
  63414. + assert("edward-1618", coord_is_existing_unit(&flush_pos->coord)
  63415. + && item_is_extent(&flush_pos->coord));
  63416. +
  63417. + coord = &flush_pos->coord;
  63418. +
  63419. + ext = extent_by_coord(coord);
  63420. + state = state_of_extent(ext);
  63421. + if (state == HOLE_EXTENT) {
  63422. + flush_pos->state = POS_INVALID;
  63423. + return 0;
  63424. + }
  63425. + item_key_by_coord(coord, &key);
  63426. + oid = get_key_objectid(&key);
  63427. + index = extent_unit_index(coord) + flush_pos->pos_in_unit;
  63428. + width = extent_get_width(ext);
  63429. +
  63430. + assert("edward-1619", width > flush_pos->pos_in_unit);
  63431. +
  63432. + if (state == UNALLOCATED_EXTENT) {
  63433. + int exit;
  63434. + int result;
  63435. + result = forward_relocate_unformatted(flush_pos, ext, state,
  63436. + oid,
  63437. + index, width, &exit);
  63438. + if (exit)
  63439. + return result;
  63440. + }
  63441. + else
  63442. + /*
  63443. + * state == ALLOCATED_EXTENT
  63444. + * keep old allocation
  63445. + */
  63446. + forward_overwrite_unformatted(flush_pos, oid, index, width);
  63447. +
  63448. + flush_pos->pos_in_unit = 0;
  63449. + return 0;
  63450. +}
  63451. +
  63452. +static squeeze_result squeeze_alloc_unformatted_journal(znode *left,
  63453. + const coord_t *coord,
  63454. + flush_pos_t *flush_pos,
  63455. + reiser4_key *stop_key)
  63456. +{
  63457. + squeeze_result ret;
  63458. + reiser4_key key;
  63459. + reiser4_extent *ext;
  63460. + extent_state state;
  63461. +
  63462. + ext = extent_by_coord(coord);
  63463. + state = state_of_extent(ext);
  63464. +
  63465. + if (state == UNALLOCATED_EXTENT)
  63466. + ret = squeeze_relocate_unformatted(left, coord,
  63467. + flush_pos, &key, stop_key);
  63468. + else
  63469. + /*
  63470. + * state == ALLOCATED_EXTENT || state == HOLE_EXTENT
  63471. + */
  63472. + ret = squeeze_overwrite_unformatted(left, coord,
  63473. + flush_pos, &key, stop_key);
  63474. + if (ret == SQUEEZE_CONTINUE)
  63475. + *stop_key = key;
  63476. + return ret;
  63477. +}
  63478. +
  63479. +/********************** WA (Write-Anywhere) TRANSACTION MODEL ***************/
  63480. +
  63481. +static int forward_alloc_formatted_wa(znode * node,
  63482. + const coord_t *parent_coord,
  63483. + flush_pos_t *pos)
  63484. +{
  63485. + int ret;
  63486. +
  63487. + assert("edward-1620", znode_is_loaded(node));
  63488. + assert("edward-1621", !jnode_check_flushprepped(ZJNODE(node)));
  63489. + assert("edward-1622", znode_is_write_locked(node));
  63490. + assert("edward-1623", coord_is_invalid(parent_coord)
  63491. + || znode_is_write_locked(parent_coord->node));
  63492. +
  63493. + if (pos->preceder.blk == 0) {
  63494. + /*
  63495. + * preceder is unknown and we have decided to relocate
  63496. + * node -- using of default value for search start is
  63497. + * better than search from block #0.
  63498. + */
  63499. + get_blocknr_hint_default(&pos->preceder.blk);
  63500. + check_preceder(pos->preceder.blk);
  63501. + }
  63502. + ret = forward_try_defragment_locality(node, parent_coord, pos);
  63503. + if (ret && (ret != -ENOSPC)) {
  63504. + warning("edward-1624",
  63505. + "forward defrag failed (%d)", ret);
  63506. + return ret;
  63507. + }
  63508. + if (ret == 0)
  63509. + znode_make_reloc(node, pos->fq);
  63510. + else {
  63511. + ret = forward_try_defragment_locality(node, parent_coord, pos);
  63512. + if (ret) {
  63513. + warning("edward-1625",
  63514. + "forward defrag failed (%d)", ret);
  63515. + return ret;
  63516. + }
  63517. + /* set JNODE_RELOC bit _after_ node gets allocated */
  63518. + znode_make_reloc(node, pos->fq);
  63519. + }
  63520. + /*
  63521. + * This is the new preceder
  63522. + */
  63523. + pos->preceder.blk = *znode_get_block(node);
  63524. + check_preceder(pos->preceder.blk);
  63525. + pos->alloc_cnt += 1;
  63526. +
  63527. + assert("edward-1626", !reiser4_blocknr_is_fake(&pos->preceder.blk));
  63528. + return 0;
  63529. +}
  63530. +
  63531. +static int forward_alloc_unformatted_wa(flush_pos_t *flush_pos)
  63532. +{
  63533. + int exit;
  63534. + int result;
  63535. +
  63536. + coord_t *coord;
  63537. + reiser4_extent *ext;
  63538. + oid_t oid;
  63539. + __u64 index;
  63540. + __u64 width;
  63541. + extent_state state;
  63542. + reiser4_key key;
  63543. +
  63544. + assert("edward-1627", flush_pos->state == POS_ON_EPOINT);
  63545. + assert("edward-1628", coord_is_existing_unit(&flush_pos->coord)
  63546. + && item_is_extent(&flush_pos->coord));
  63547. +
  63548. + coord = &flush_pos->coord;
  63549. +
  63550. + ext = extent_by_coord(coord);
  63551. + state = state_of_extent(ext);
  63552. + if (state == HOLE_EXTENT) {
  63553. + flush_pos->state = POS_INVALID;
  63554. + return 0;
  63555. + }
  63556. +
  63557. + item_key_by_coord(coord, &key);
  63558. + oid = get_key_objectid(&key);
  63559. + index = extent_unit_index(coord) + flush_pos->pos_in_unit;
  63560. + width = extent_get_width(ext);
  63561. +
  63562. + assert("edward-1629", width > flush_pos->pos_in_unit);
  63563. + assert("edward-1630",
  63564. + state == ALLOCATED_EXTENT || state == UNALLOCATED_EXTENT);
  63565. + /*
  63566. + * always relocate
  63567. + */
  63568. + result = forward_relocate_unformatted(flush_pos, ext, state, oid,
  63569. + index, width, &exit);
  63570. + if (exit)
  63571. + return result;
  63572. + flush_pos->pos_in_unit = 0;
  63573. + return 0;
  63574. +}
  63575. +
  63576. +static squeeze_result squeeze_alloc_unformatted_wa(znode *left,
  63577. + const coord_t *coord,
  63578. + flush_pos_t *flush_pos,
  63579. + reiser4_key *stop_key)
  63580. +{
  63581. + squeeze_result ret;
  63582. + reiser4_key key;
  63583. + reiser4_extent *ext;
  63584. + extent_state state;
  63585. +
  63586. + ext = extent_by_coord(coord);
  63587. + state = state_of_extent(ext);
  63588. +
  63589. + if (state == HOLE_EXTENT)
  63590. + /*
  63591. + * hole extents are handled in squeeze_overwrite
  63592. + */
  63593. + ret = squeeze_overwrite_unformatted(left, coord,
  63594. + flush_pos, &key, stop_key);
  63595. + else
  63596. + ret = squeeze_relocate_unformatted(left, coord,
  63597. + flush_pos, &key, stop_key);
  63598. + if (ret == SQUEEZE_CONTINUE)
  63599. + *stop_key = key;
  63600. + return ret;
  63601. +}
  63602. +
  63603. +/******************************************************************************/
  63604. +
  63605. +txmod_plugin txmod_plugins[LAST_TXMOD_ID] = {
  63606. + [HYBRID_TXMOD_ID] = {
  63607. + .h = {
  63608. + .type_id = REISER4_TXMOD_PLUGIN_TYPE,
  63609. + .id = HYBRID_TXMOD_ID,
  63610. + .pops = NULL,
  63611. + .label = "hybrid",
  63612. + .desc = "Hybrid Transaction Model",
  63613. + .linkage = {NULL, NULL}
  63614. + },
  63615. + .forward_alloc_formatted = forward_alloc_formatted_hybrid,
  63616. + .reverse_alloc_formatted = reverse_alloc_formatted_hybrid,
  63617. + .forward_alloc_unformatted = forward_alloc_unformatted_hybrid,
  63618. + .squeeze_alloc_unformatted = squeeze_alloc_unformatted_hybrid
  63619. + },
  63620. + [JOURNAL_TXMOD_ID] = {
  63621. + .h = {
  63622. + .type_id = REISER4_TXMOD_PLUGIN_TYPE,
  63623. + .id = JOURNAL_TXMOD_ID,
  63624. + .pops = NULL,
  63625. + .label = "journal",
  63626. + .desc = "Journalling Transaction Model",
  63627. + .linkage = {NULL, NULL}
  63628. + },
  63629. + .forward_alloc_formatted = forward_alloc_formatted_journal,
  63630. + .reverse_alloc_formatted = NULL,
  63631. + .forward_alloc_unformatted = forward_alloc_unformatted_journal,
  63632. + .squeeze_alloc_unformatted = squeeze_alloc_unformatted_journal
  63633. + },
  63634. + [WA_TXMOD_ID] = {
  63635. + .h = {
  63636. + .type_id = REISER4_TXMOD_PLUGIN_TYPE,
  63637. + .id = WA_TXMOD_ID,
  63638. + .pops = NULL,
  63639. + .label = "wa",
  63640. + .desc = "Write-Anywhere Transaction Model",
  63641. + .linkage = {NULL, NULL}
  63642. + },
  63643. + .forward_alloc_formatted = forward_alloc_formatted_wa,
  63644. + .reverse_alloc_formatted = NULL,
  63645. + .forward_alloc_unformatted = forward_alloc_unformatted_wa,
  63646. + .squeeze_alloc_unformatted = squeeze_alloc_unformatted_wa
  63647. + }
  63648. +};
  63649. +
  63650. +/*
  63651. + * Local variables:
  63652. + * c-indentation-style: "K&R"
  63653. + * mode-name: "LC"
  63654. + * c-basic-offset: 8
  63655. + * tab-width: 8
  63656. + * fill-column: 79
  63657. + * End:
  63658. + */
  63659. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/pool.c linux-4.14.2/fs/reiser4/pool.c
  63660. --- linux-4.14.2.orig/fs/reiser4/pool.c 1970-01-01 01:00:00.000000000 +0100
  63661. +++ linux-4.14.2/fs/reiser4/pool.c 2017-11-26 22:13:09.000000000 +0100
  63662. @@ -0,0 +1,231 @@
  63663. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  63664. + * reiser4/README */
  63665. +
  63666. +/* Fast pool allocation.
  63667. +
  63668. + There are situations when some sub-system normally asks memory allocator
  63669. + for only few objects, but under some circumstances could require much
  63670. + more. Typical and actually motivating example is tree balancing. It needs
  63671. + to keep track of nodes that were involved into it, and it is well-known
  63672. + that in reasonable packed balanced tree most (92.938121%) percent of all
  63673. + balancings end up after working with only few nodes (3.141592 on
  63674. + average). But in rare cases balancing can involve much more nodes
  63675. + (3*tree_height+1 in extremal situation).
  63676. +
  63677. + On the one hand, we don't want to resort to dynamic allocation (slab,
  63678. + malloc(), etc.) to allocate data structures required to keep track of
  63679. + nodes during balancing. On the other hand, we cannot statically allocate
  63680. + required amount of space on the stack, because first: it is useless wastage
  63681. + of precious resource, and second: this amount is unknown in advance (tree
  63682. + height can change).
  63683. +
  63684. + Pools, implemented in this file are solution for this problem:
  63685. +
  63686. + - some configurable amount of objects is statically preallocated on the
  63687. + stack
  63688. +
  63689. + - if this preallocated pool is exhausted and more objects is requested
  63690. + they are allocated dynamically.
  63691. +
  63692. + Pools encapsulate distinction between statically and dynamically allocated
  63693. + objects. Both allocation and recycling look exactly the same.
  63694. +
  63695. + To keep track of dynamically allocated objects, pool adds its own linkage
  63696. + to each object.
  63697. +
  63698. + NOTE-NIKITA This linkage also contains some balancing-specific data. This
  63699. + is not perfect. On the other hand, balancing is currently the only client
  63700. + of pool code.
  63701. +
  63702. + NOTE-NIKITA Another desirable feature is to rewrite all pool manipulation
  63703. + functions in the style of tslist/tshash, i.e., make them unreadable, but
  63704. + type-safe.
  63705. +
  63706. +*/
  63707. +
  63708. +#include "debug.h"
  63709. +#include "pool.h"
  63710. +#include "super.h"
  63711. +
  63712. +#include <linux/types.h>
  63713. +#include <linux/err.h>
  63714. +
  63715. +/* initialize new pool object @h */
  63716. +static void reiser4_init_pool_obj(struct reiser4_pool_header *h)
  63717. +{
  63718. + INIT_LIST_HEAD(&h->usage_linkage);
  63719. + INIT_LIST_HEAD(&h->level_linkage);
  63720. + INIT_LIST_HEAD(&h->extra_linkage);
  63721. +}
  63722. +
  63723. +/* initialize new pool */
  63724. +void reiser4_init_pool(struct reiser4_pool *pool /* pool to initialize */ ,
  63725. + size_t obj_size /* size of objects in @pool */ ,
  63726. + int num_of_objs /* number of preallocated objects */ ,
  63727. + char *data/* area for preallocated objects */)
  63728. +{
  63729. + struct reiser4_pool_header *h;
  63730. + int i;
  63731. +
  63732. + assert("nikita-955", pool != NULL);
  63733. + assert("nikita-1044", obj_size > 0);
  63734. + assert("nikita-956", num_of_objs >= 0);
  63735. + assert("nikita-957", data != NULL);
  63736. +
  63737. + memset(pool, 0, sizeof *pool);
  63738. + pool->obj_size = obj_size;
  63739. + pool->data = data;
  63740. + INIT_LIST_HEAD(&pool->free);
  63741. + INIT_LIST_HEAD(&pool->used);
  63742. + INIT_LIST_HEAD(&pool->extra);
  63743. + memset(data, 0, obj_size * num_of_objs);
  63744. + for (i = 0; i < num_of_objs; ++i) {
  63745. + h = (struct reiser4_pool_header *) (data + i * obj_size);
  63746. + reiser4_init_pool_obj(h);
  63747. + /* add pool header to the end of pool's free list */
  63748. + list_add_tail(&h->usage_linkage, &pool->free);
  63749. + }
  63750. +}
  63751. +
  63752. +/* release pool resources
  63753. +
  63754. + Release all resources acquired by this pool, specifically, dynamically
  63755. + allocated objects.
  63756. +
  63757. +*/
  63758. +void reiser4_done_pool(struct reiser4_pool *pool UNUSED_ARG)
  63759. +{
  63760. +}
  63761. +
  63762. +/* allocate carry object from @pool
  63763. +
  63764. + First, try to get preallocated object. If this fails, resort to dynamic
  63765. + allocation.
  63766. +
  63767. +*/
  63768. +static void *reiser4_pool_alloc(struct reiser4_pool *pool)
  63769. +{
  63770. + struct reiser4_pool_header *result;
  63771. +
  63772. + assert("nikita-959", pool != NULL);
  63773. +
  63774. + if (!list_empty(&pool->free)) {
  63775. + struct list_head *linkage;
  63776. +
  63777. + linkage = pool->free.next;
  63778. + list_del(linkage);
  63779. + INIT_LIST_HEAD(linkage);
  63780. + result = list_entry(linkage, struct reiser4_pool_header,
  63781. + usage_linkage);
  63782. + BUG_ON(!list_empty(&result->level_linkage) ||
  63783. + !list_empty(&result->extra_linkage));
  63784. + } else {
  63785. + /* pool is empty. Extra allocations don't deserve dedicated
  63786. + slab to be served from, as they are expected to be rare. */
  63787. + result = kmalloc(pool->obj_size, reiser4_ctx_gfp_mask_get());
  63788. + if (result != 0) {
  63789. + reiser4_init_pool_obj(result);
  63790. + list_add(&result->extra_linkage, &pool->extra);
  63791. + } else
  63792. + return ERR_PTR(RETERR(-ENOMEM));
  63793. + BUG_ON(!list_empty(&result->usage_linkage) ||
  63794. + !list_empty(&result->level_linkage));
  63795. + }
  63796. + ++pool->objs;
  63797. + list_add(&result->usage_linkage, &pool->used);
  63798. + memset(result + 1, 0, pool->obj_size - sizeof *result);
  63799. + return result;
  63800. +}
  63801. +
  63802. +/* return object back to the pool */
  63803. +void reiser4_pool_free(struct reiser4_pool *pool,
  63804. + struct reiser4_pool_header *h)
  63805. +{
  63806. + assert("nikita-961", h != NULL);
  63807. + assert("nikita-962", pool != NULL);
  63808. +
  63809. + --pool->objs;
  63810. + assert("nikita-963", pool->objs >= 0);
  63811. +
  63812. + list_del_init(&h->usage_linkage);
  63813. + list_del_init(&h->level_linkage);
  63814. +
  63815. + if (list_empty(&h->extra_linkage))
  63816. + /*
  63817. + * pool header is not an extra one. Push it onto free list
  63818. + * using usage_linkage
  63819. + */
  63820. + list_add(&h->usage_linkage, &pool->free);
  63821. + else {
  63822. + /* remove pool header from pool's extra list and kfree it */
  63823. + list_del(&h->extra_linkage);
  63824. + kfree(h);
  63825. + }
  63826. +}
  63827. +
  63828. +/* add new object to the carry level list
  63829. +
  63830. + Carry level is FIFO most of the time, but not always. Complications arise
  63831. + when make_space() function tries to go to the left neighbor and thus adds
  63832. + carry node before existing nodes, and also, when updating delimiting keys
  63833. + after moving data between two nodes, we want left node to be locked before
  63834. + right node.
  63835. +
  63836. + Latter case is confusing at the first glance. Problem is that COP_UPDATE
  63837. + opration that updates delimiting keys is sometimes called with two nodes
  63838. + (when data are moved between two nodes) and sometimes with only one node
  63839. + (when leftmost item is deleted in a node). In any case operation is
  63840. + supplied with at least node whose left delimiting key is to be updated
  63841. + (that is "right" node).
  63842. +
  63843. + @pool - from which to allocate new object;
  63844. + @list - where to add object;
  63845. + @reference - after (or before) which existing object to add
  63846. +*/
  63847. +struct reiser4_pool_header *reiser4_add_obj(struct reiser4_pool *pool,
  63848. + struct list_head *list,
  63849. + pool_ordering order,
  63850. + struct reiser4_pool_header *reference)
  63851. +{
  63852. + struct reiser4_pool_header *result;
  63853. +
  63854. + assert("nikita-972", pool != NULL);
  63855. +
  63856. + result = reiser4_pool_alloc(pool);
  63857. + if (IS_ERR(result))
  63858. + return result;
  63859. +
  63860. + assert("nikita-973", result != NULL);
  63861. +
  63862. + switch (order) {
  63863. + case POOLO_BEFORE:
  63864. + __list_add(&result->level_linkage,
  63865. + reference->level_linkage.prev,
  63866. + &reference->level_linkage);
  63867. + break;
  63868. + case POOLO_AFTER:
  63869. + __list_add(&result->level_linkage,
  63870. + &reference->level_linkage,
  63871. + reference->level_linkage.next);
  63872. + break;
  63873. + case POOLO_LAST:
  63874. + list_add_tail(&result->level_linkage, list);
  63875. + break;
  63876. + case POOLO_FIRST:
  63877. + list_add(&result->level_linkage, list);
  63878. + break;
  63879. + default:
  63880. + wrong_return_value("nikita-927", "order");
  63881. + }
  63882. + return result;
  63883. +}
  63884. +
  63885. +/* Make Linus happy.
  63886. + Local variables:
  63887. + c-indentation-style: "K&R"
  63888. + mode-name: "LC"
  63889. + c-basic-offset: 8
  63890. + tab-width: 8
  63891. + fill-column: 120
  63892. + End:
  63893. +*/
  63894. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/pool.h linux-4.14.2/fs/reiser4/pool.h
  63895. --- linux-4.14.2.orig/fs/reiser4/pool.h 1970-01-01 01:00:00.000000000 +0100
  63896. +++ linux-4.14.2/fs/reiser4/pool.h 2017-11-26 22:13:09.000000000 +0100
  63897. @@ -0,0 +1,57 @@
  63898. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  63899. + * reiser4/README */
  63900. +
  63901. +/* Fast pool allocation */
  63902. +
  63903. +#ifndef __REISER4_POOL_H__
  63904. +#define __REISER4_POOL_H__
  63905. +
  63906. +#include <linux/types.h>
  63907. +
  63908. +struct reiser4_pool {
  63909. + size_t obj_size;
  63910. + int objs;
  63911. + char *data;
  63912. + struct list_head free;
  63913. + struct list_head used;
  63914. + struct list_head extra;
  63915. +};
  63916. +
  63917. +struct reiser4_pool_header {
  63918. + /* object is either on free or "used" lists */
  63919. + struct list_head usage_linkage;
  63920. + struct list_head level_linkage;
  63921. + struct list_head extra_linkage;
  63922. +};
  63923. +
  63924. +typedef enum {
  63925. + POOLO_BEFORE,
  63926. + POOLO_AFTER,
  63927. + POOLO_LAST,
  63928. + POOLO_FIRST
  63929. +} pool_ordering;
  63930. +
  63931. +/* pool manipulation functions */
  63932. +
  63933. +extern void reiser4_init_pool(struct reiser4_pool *pool, size_t obj_size,
  63934. + int num_of_objs, char *data);
  63935. +extern void reiser4_done_pool(struct reiser4_pool *pool);
  63936. +extern void reiser4_pool_free(struct reiser4_pool *pool,
  63937. + struct reiser4_pool_header *h);
  63938. +struct reiser4_pool_header *reiser4_add_obj(struct reiser4_pool *pool,
  63939. + struct list_head *list,
  63940. + pool_ordering order,
  63941. + struct reiser4_pool_header *reference);
  63942. +
  63943. +/* __REISER4_POOL_H__ */
  63944. +#endif
  63945. +
  63946. +/* Make Linus happy.
  63947. + Local variables:
  63948. + c-indentation-style: "K&R"
  63949. + mode-name: "LC"
  63950. + c-basic-offset: 8
  63951. + tab-width: 8
  63952. + fill-column: 120
  63953. + End:
  63954. +*/
  63955. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/readahead.c linux-4.14.2/fs/reiser4/readahead.c
  63956. --- linux-4.14.2.orig/fs/reiser4/readahead.c 1970-01-01 01:00:00.000000000 +0100
  63957. +++ linux-4.14.2/fs/reiser4/readahead.c 2017-11-26 22:13:09.000000000 +0100
  63958. @@ -0,0 +1,140 @@
  63959. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  63960. + * reiser4/README */
  63961. +
  63962. +#include "forward.h"
  63963. +#include "tree.h"
  63964. +#include "tree_walk.h"
  63965. +#include "super.h"
  63966. +#include "inode.h"
  63967. +#include "key.h"
  63968. +#include "znode.h"
  63969. +
  63970. +#include <linux/swap.h> /* for totalram_pages */
  63971. +
  63972. +void reiser4_init_ra_info(ra_info_t *rai)
  63973. +{
  63974. + rai->key_to_stop = *reiser4_min_key();
  63975. +}
  63976. +
  63977. +/* global formatted node readahead parameter. It can be set by mount option
  63978. + * -o readahead:NUM:1 */
  63979. +static inline int ra_adjacent_only(int flags)
  63980. +{
  63981. + return flags & RA_ADJACENT_ONLY;
  63982. +}
  63983. +
  63984. +/* this is used by formatted_readahead to decide whether read for right neighbor
  63985. + * of node is to be issued. It returns 1 if right neighbor's first key is less
  63986. + * or equal to readahead's stop key */
  63987. +static int should_readahead_neighbor(znode * node, ra_info_t *info)
  63988. +{
  63989. + int result;
  63990. +
  63991. + read_lock_dk(znode_get_tree(node));
  63992. + result = keyle(znode_get_rd_key(node), &info->key_to_stop);
  63993. + read_unlock_dk(znode_get_tree(node));
  63994. + return result;
  63995. +}
  63996. +
  63997. +#define LOW_MEM_PERCENTAGE (5)
  63998. +
  63999. +static int low_on_memory(void)
  64000. +{
  64001. + unsigned int freepages;
  64002. +
  64003. + freepages = nr_free_pages();
  64004. + return freepages < (totalram_pages * LOW_MEM_PERCENTAGE / 100);
  64005. +}
  64006. +
  64007. +/* start read for @node and for a few of its right neighbors */
  64008. +void formatted_readahead(znode * node, ra_info_t *info)
  64009. +{
  64010. + struct formatted_ra_params *ra_params;
  64011. + znode *cur;
  64012. + int i;
  64013. + int grn_flags;
  64014. + lock_handle next_lh;
  64015. +
  64016. + /* do nothing if node block number has not been assigned to node (which
  64017. + * means it is still in cache). */
  64018. + if (reiser4_blocknr_is_fake(znode_get_block(node)))
  64019. + return;
  64020. +
  64021. + ra_params = get_current_super_ra_params();
  64022. +
  64023. + if (znode_page(node) == NULL)
  64024. + jstartio(ZJNODE(node));
  64025. +
  64026. + if (znode_get_level(node) != LEAF_LEVEL)
  64027. + return;
  64028. +
  64029. + /* don't waste memory for read-ahead when low on memory */
  64030. + if (low_on_memory())
  64031. + return;
  64032. +
  64033. + /* We can have locked nodes on upper tree levels, in this situation lock
  64034. + priorities do not help to resolve deadlocks, we have to use TRY_LOCK
  64035. + here. */
  64036. + grn_flags = (GN_CAN_USE_UPPER_LEVELS | GN_TRY_LOCK);
  64037. +
  64038. + i = 0;
  64039. + cur = zref(node);
  64040. + init_lh(&next_lh);
  64041. + while (i < ra_params->max) {
  64042. + const reiser4_block_nr * nextblk;
  64043. +
  64044. + if (!should_readahead_neighbor(cur, info))
  64045. + break;
  64046. +
  64047. + if (reiser4_get_right_neighbor
  64048. + (&next_lh, cur, ZNODE_READ_LOCK, grn_flags))
  64049. + break;
  64050. +
  64051. + nextblk = znode_get_block(next_lh.node);
  64052. + if (reiser4_blocknr_is_fake(nextblk) ||
  64053. + (ra_adjacent_only(ra_params->flags)
  64054. + && *nextblk != *znode_get_block(cur) + 1))
  64055. + break;
  64056. +
  64057. + zput(cur);
  64058. + cur = zref(next_lh.node);
  64059. + done_lh(&next_lh);
  64060. + if (znode_page(cur) == NULL)
  64061. + jstartio(ZJNODE(cur));
  64062. + else
  64063. + /* Do not scan read-ahead window if pages already
  64064. + * allocated (and i/o already started). */
  64065. + break;
  64066. +
  64067. + i++;
  64068. + }
  64069. + zput(cur);
  64070. + done_lh(&next_lh);
  64071. +}
  64072. +
  64073. +void reiser4_readdir_readahead_init(struct inode *dir, tap_t *tap)
  64074. +{
  64075. + reiser4_key *stop_key;
  64076. +
  64077. + assert("nikita-3542", dir != NULL);
  64078. + assert("nikita-3543", tap != NULL);
  64079. +
  64080. + stop_key = &tap->ra_info.key_to_stop;
  64081. + /* initialize readdir readahead information: include into readahead
  64082. + * stat data of all files of the directory */
  64083. + set_key_locality(stop_key, get_inode_oid(dir));
  64084. + set_key_type(stop_key, KEY_SD_MINOR);
  64085. + set_key_ordering(stop_key, get_key_ordering(reiser4_max_key()));
  64086. + set_key_objectid(stop_key, get_key_objectid(reiser4_max_key()));
  64087. + set_key_offset(stop_key, get_key_offset(reiser4_max_key()));
  64088. +}
  64089. +
  64090. +/*
  64091. + Local variables:
  64092. + c-indentation-style: "K&R"
  64093. + mode-name: "LC"
  64094. + c-basic-offset: 8
  64095. + tab-width: 8
  64096. + fill-column: 80
  64097. + End:
  64098. +*/
  64099. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/readahead.h linux-4.14.2/fs/reiser4/readahead.h
  64100. --- linux-4.14.2.orig/fs/reiser4/readahead.h 1970-01-01 01:00:00.000000000 +0100
  64101. +++ linux-4.14.2/fs/reiser4/readahead.h 2017-11-26 22:13:09.000000000 +0100
  64102. @@ -0,0 +1,42 @@
  64103. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  64104. + * reiser4/README */
  64105. +
  64106. +#ifndef __READAHEAD_H__
  64107. +#define __READAHEAD_H__
  64108. +
  64109. +#include "key.h"
  64110. +
  64111. +typedef enum {
  64112. + RA_ADJACENT_ONLY = 1, /* only requests nodes which are adjacent.
  64113. + Default is NO (not only adjacent) */
  64114. +} ra_global_flags;
  64115. +
  64116. +/* reiser4 super block has a field of this type.
  64117. + It controls readahead during tree traversals */
  64118. +struct formatted_ra_params {
  64119. + unsigned long max; /* request not more than this amount of nodes.
  64120. + Default is totalram_pages / 4 */
  64121. + int flags;
  64122. +};
  64123. +
  64124. +typedef struct {
  64125. + reiser4_key key_to_stop;
  64126. +} ra_info_t;
  64127. +
  64128. +void formatted_readahead(znode * , ra_info_t *);
  64129. +void reiser4_init_ra_info(ra_info_t *rai);
  64130. +
  64131. +extern void reiser4_readdir_readahead_init(struct inode *dir, tap_t *tap);
  64132. +
  64133. +/* __READAHEAD_H__ */
  64134. +#endif
  64135. +
  64136. +/*
  64137. + Local variables:
  64138. + c-indentation-style: "K&R"
  64139. + mode-name: "LC"
  64140. + c-basic-offset: 8
  64141. + tab-width: 8
  64142. + fill-column: 120
  64143. + End:
  64144. +*/
  64145. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/README linux-4.14.2/fs/reiser4/README
  64146. --- linux-4.14.2.orig/fs/reiser4/README 1970-01-01 01:00:00.000000000 +0100
  64147. +++ linux-4.14.2/fs/reiser4/README 2017-11-26 22:13:09.000000000 +0100
  64148. @@ -0,0 +1,128 @@
  64149. +[LICENSING]
  64150. +
  64151. +Reiser4 is hereby licensed under the GNU General
  64152. +Public License version 2.
  64153. +
  64154. +Source code files that contain the phrase "licensing governed by
  64155. +reiser4/README" are "governed files" throughout this file. Governed
  64156. +files are licensed under the GPL. The portions of them owned by Hans
  64157. +Reiser, or authorized to be licensed by him, have been in the past,
  64158. +and likely will be in the future, licensed to other parties under
  64159. +other licenses. If you add your code to governed files, and don't
  64160. +want it to be owned by Hans Reiser, put your copyright label on that
  64161. +code so the poor blight and his customers can keep things straight.
  64162. +All portions of governed files not labeled otherwise are owned by Hans
  64163. +Reiser, and by adding your code to it, widely distributing it to
  64164. +others or sending us a patch, and leaving the sentence in stating that
  64165. +licensing is governed by the statement in this file, you accept this.
  64166. +It will be a kindness if you identify whether Hans Reiser is allowed
  64167. +to license code labeled as owned by you on your behalf other than
  64168. +under the GPL, because he wants to know if it is okay to do so and put
  64169. +a check in the mail to you (for non-trivial improvements) when he
  64170. +makes his next sale. He makes no guarantees as to the amount if any,
  64171. +though he feels motivated to motivate contributors, and you can surely
  64172. +discuss this with him before or after contributing. You have the
  64173. +right to decline to allow him to license your code contribution other
  64174. +than under the GPL.
  64175. +
  64176. +Further licensing options are available for commercial and/or other
  64177. +interests directly from Hans Reiser: reiser@namesys.com. If you interpret
  64178. +the GPL as not allowing those additional licensing options, you read
  64179. +it wrongly, and Richard Stallman agrees with me, when carefully read
  64180. +you can see that those restrictions on additional terms do not apply
  64181. +to the owner of the copyright, and my interpretation of this shall
  64182. +govern for this license.
  64183. +
  64184. +[END LICENSING]
  64185. +
  64186. +Reiser4 is a file system based on dancing tree algorithms, and is
  64187. +described at http://www.namesys.com
  64188. +
  64189. +mkfs.reiser4 and other utilities are on our webpage or wherever your
  64190. +Linux provider put them. You really want to be running the latest
  64191. +version off the website if you use fsck.
  64192. +
  64193. +Yes, if you update your reiser4 kernel module you do have to
  64194. +recompile your kernel, most of the time. The errors you get will be
  64195. +quite cryptic if your forget to do so.
  64196. +
  64197. +Hideous Commercial Pitch: Spread your development costs across other OS
  64198. +vendors. Select from the best in the world, not the best in your
  64199. +building, by buying from third party OS component suppliers. Leverage
  64200. +the software component development power of the internet. Be the most
  64201. +aggressive in taking advantage of the commercial possibilities of
  64202. +decentralized internet development, and add value through your branded
  64203. +integration that you sell as an operating system. Let your competitors
  64204. +be the ones to compete against the entire internet by themselves. Be
  64205. +hip, get with the new economic trend, before your competitors do. Send
  64206. +email to reiser@namesys.com
  64207. +
  64208. +Hans Reiser was the primary architect of Reiser4, but a whole team
  64209. +chipped their ideas in. He invested everything he had into Namesys
  64210. +for 5.5 dark years of no money before Reiser3 finally started to work well
  64211. +enough to bring in money. He owns the copyright.
  64212. +
  64213. +DARPA was the primary sponsor of Reiser4. DARPA does not endorse
  64214. +Reiser4, it merely sponsors it. DARPA is, in solely Hans's personal
  64215. +opinion, unique in its willingness to invest into things more
  64216. +theoretical than the VC community can readily understand, and more
  64217. +longterm than allows them to be sure that they will be the ones to
  64218. +extract the economic benefits from. DARPA also integrated us into a
  64219. +security community that transformed our security worldview.
  64220. +
  64221. +Vladimir Saveliev is our lead programmer, with us from the beginning,
  64222. +and he worked long hours writing the cleanest code. This is why he is
  64223. +now the lead programmer after years of commitment to our work. He
  64224. +always made the effort to be the best he could be, and to make his
  64225. +code the best that it could be. What resulted was quite remarkable. I
  64226. +don't think that money can ever motivate someone to work the way he
  64227. +did, he is one of the most selfless men I know.
  64228. +
  64229. +Alexander Lyamin was our sysadmin, and helped to educate us in
  64230. +security issues. Moscow State University and IMT were very generous
  64231. +in the internet access they provided us, and in lots of other little
  64232. +ways that a generous institution can be.
  64233. +
  64234. +Alexander Zarochentcev (sometimes known as zam, or sasha), wrote the
  64235. +locking code, the block allocator, and finished the flushing code.
  64236. +His code is always crystal clean and well structured.
  64237. +
  64238. +Nikita Danilov wrote the core of the balancing code, the core of the
  64239. +plugins code, and the directory code. He worked a steady pace of long
  64240. +hours that produced a whole lot of well abstracted code. He is our
  64241. +senior computer scientist.
  64242. +
  64243. +Vladimir Demidov wrote the parser. Writing an in kernel parser is
  64244. +something very few persons have the skills for, and it is thanks to
  64245. +him that we can say that the parser is really not so big compared to
  64246. +various bits of our other code, and making a parser work in the kernel
  64247. +was not so complicated as everyone would imagine mainly because it was
  64248. +him doing it...
  64249. +
  64250. +Joshua McDonald wrote the transaction manager, and the flush code.
  64251. +The flush code unexpectedly turned out be extremely hairy for reasons
  64252. +you can read about on our web page, and he did a great job on an
  64253. +extremely difficult task.
  64254. +
  64255. +Nina Reiser handled our accounting, government relations, and much
  64256. +more.
  64257. +
  64258. +Ramon Reiser developed our website.
  64259. +
  64260. +Beverly Palmer drew our graphics.
  64261. +
  64262. +Vitaly Fertman developed librepair, userspace plugins repair code, fsck
  64263. +and worked with Umka on developing libreiser4 and userspace plugins.
  64264. +
  64265. +Yury Umanets (aka Umka) developed libreiser4, userspace plugins and
  64266. +userspace tools (reiser4progs).
  64267. +
  64268. +Oleg Drokin (aka Green) is the release manager who fixes everything.
  64269. +It is so nice to have someone like that on the team. He (plus Chris
  64270. +and Jeff) make it possible for the entire rest of the Namesys team to
  64271. +focus on Reiser4, and he fixed a whole lot of Reiser4 bugs also. It
  64272. +is just amazing to watch his talent for spotting bugs in action.
  64273. +
  64274. +Edward Shishkin wrote cryptcompress file plugin (which manages files
  64275. +built of encrypted and(or) compressed bodies) and other plugins related
  64276. +to transparent encryption and compression support.
  64277. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/reiser4.h linux-4.14.2/fs/reiser4/reiser4.h
  64278. --- linux-4.14.2.orig/fs/reiser4/reiser4.h 1970-01-01 01:00:00.000000000 +0100
  64279. +++ linux-4.14.2/fs/reiser4/reiser4.h 2017-11-26 22:13:09.000000000 +0100
  64280. @@ -0,0 +1,260 @@
  64281. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  64282. + * reiser4/README */
  64283. +
  64284. +
  64285. +/* definitions of common constants used by reiser4 */
  64286. +
  64287. +#if !defined( __REISER4_H__ )
  64288. +#define __REISER4_H__
  64289. +
  64290. +#include <asm/param.h> /* for HZ */
  64291. +#include <linux/errno.h>
  64292. +#include <linux/types.h>
  64293. +#include <linux/fs.h>
  64294. +#include <linux/hardirq.h>
  64295. +#include <linux/sched.h>
  64296. +
  64297. +/*
  64298. + * reiser4 compilation options.
  64299. + */
  64300. +
  64301. +#if defined(CONFIG_REISER4_DEBUG)
  64302. +/* turn on assertion checks */
  64303. +#define REISER4_DEBUG (1)
  64304. +#else
  64305. +#define REISER4_DEBUG (0)
  64306. +#endif
  64307. +
  64308. +#define REISER4_SHA256 (0)
  64309. +
  64310. +/*
  64311. + * Turn on large keys mode. In his mode (which is default), reiser4 key has 4
  64312. + * 8-byte components. In the old "small key" mode, it's 3 8-byte
  64313. + * components. Additional component, referred to as "ordering" is used to
  64314. + * order items from which given object is composed of. As such, ordering is
  64315. + * placed between locality and objectid. For directory item ordering contains
  64316. + * initial prefix of the file name this item is for. This sorts all directory
  64317. + * items within given directory lexicographically (but see
  64318. + * fibration.[ch]). For file body and stat-data, ordering contains initial
  64319. + * prefix of the name file was initially created with. In the common case
  64320. + * (files with single name) this allows to order file bodies and stat-datas in
  64321. + * the same order as their respective directory entries, thus speeding up
  64322. + * readdir.
  64323. + *
  64324. + * Note, that kernel can only mount file system with the same key size as one
  64325. + * it is compiled for, so flipping this option may render your data
  64326. + * inaccessible.
  64327. + */
  64328. +#define REISER4_LARGE_KEY (1)
  64329. +/*#define REISER4_LARGE_KEY (0)*/
  64330. +
  64331. +/*#define GUESS_EXISTS 1*/
  64332. +
  64333. +/*
  64334. + * PLEASE update fs/reiser4/kattr.c:show_options() when adding new compilation
  64335. + * option
  64336. + */
  64337. +
  64338. +#define REISER4_SUPER_MAGIC_STRING "ReIsEr4"
  64339. +extern const int REISER4_MAGIC_OFFSET; /* offset to magic string from the
  64340. + * beginning of device */
  64341. +
  64342. +/* here go tunable parameters that are not worth special entry in kernel
  64343. + configuration */
  64344. +
  64345. +/* default number of slots in coord-by-key caches */
  64346. +#define CBK_CACHE_SLOTS (16)
  64347. +/* how many elementary tree operation to carry on the next level */
  64348. +#define CARRIES_POOL_SIZE (5)
  64349. +/* size of pool of preallocated nodes for carry process. */
  64350. +#define NODES_LOCKED_POOL_SIZE (5)
  64351. +
  64352. +#define REISER4_NEW_NODE_FLAGS (COPI_LOAD_LEFT | COPI_LOAD_RIGHT | COPI_GO_LEFT)
  64353. +#define REISER4_NEW_EXTENT_FLAGS (COPI_LOAD_LEFT | COPI_LOAD_RIGHT | COPI_GO_LEFT)
  64354. +#define REISER4_PASTE_FLAGS (COPI_GO_LEFT)
  64355. +#define REISER4_INSERT_FLAGS (COPI_GO_LEFT)
  64356. +
  64357. +/* we are supporting reservation of disk space on uid basis */
  64358. +#define REISER4_SUPPORT_UID_SPACE_RESERVATION (0)
  64359. +/* we are supporting reservation of disk space for groups */
  64360. +#define REISER4_SUPPORT_GID_SPACE_RESERVATION (0)
  64361. +/* we are supporting reservation of disk space for root */
  64362. +#define REISER4_SUPPORT_ROOT_SPACE_RESERVATION (0)
  64363. +/* we use rapid flush mode, see flush.c for comments. */
  64364. +#define REISER4_USE_RAPID_FLUSH (1)
  64365. +
  64366. +/*
  64367. + * set this to 0 if you don't want to use wait-for-flush in ->writepage().
  64368. + */
  64369. +#define REISER4_USE_ENTD (1)
  64370. +
  64371. +/* key allocation is Plan-A */
  64372. +#define REISER4_PLANA_KEY_ALLOCATION (1)
  64373. +/* key allocation follows good old 3.x scheme */
  64374. +#define REISER4_3_5_KEY_ALLOCATION (0)
  64375. +
  64376. +/* size of hash-table for znodes */
  64377. +#define REISER4_ZNODE_HASH_TABLE_SIZE (1 << 13)
  64378. +
  64379. +/* number of buckets in lnode hash-table */
  64380. +#define LNODE_HTABLE_BUCKETS (1024)
  64381. +
  64382. +/* some ridiculously high maximal limit on height of znode tree. This
  64383. + is used in declaration of various per level arrays and
  64384. + to allocate stattistics gathering array for per-level stats. */
  64385. +#define REISER4_MAX_ZTREE_HEIGHT (8)
  64386. +
  64387. +#define REISER4_PANIC_MSG_BUFFER_SIZE (1024)
  64388. +
  64389. +/* If array contains less than REISER4_SEQ_SEARCH_BREAK elements then,
  64390. + sequential search is on average faster than binary. This is because
  64391. + of better optimization and because sequential search is more CPU
  64392. + cache friendly. This number (25) was found by experiments on dual AMD
  64393. + Athlon(tm), 1400MHz.
  64394. +
  64395. + NOTE: testing in kernel has shown that binary search is more effective than
  64396. + implied by results of the user level benchmarking. Probably because in the
  64397. + node keys are separated by other data. So value was adjusted after few
  64398. + tests. More thorough tuning is needed.
  64399. +*/
  64400. +#define REISER4_SEQ_SEARCH_BREAK (3)
  64401. +
  64402. +/* don't allow tree to be lower than this */
  64403. +#define REISER4_MIN_TREE_HEIGHT (TWIG_LEVEL)
  64404. +
  64405. +/* NOTE NIKITA this is no longer used: maximal atom size is auto-adjusted to
  64406. + * available memory. */
  64407. +/* Default value of maximal atom size. Can be ovewritten by
  64408. + tmgr.atom_max_size mount option. By default infinity. */
  64409. +#define REISER4_ATOM_MAX_SIZE ((unsigned)(~0))
  64410. +
  64411. +/* Default value of maximal atom age (in jiffies). After reaching this age
  64412. + atom will be forced to commit, either synchronously or asynchronously. Can
  64413. + be overwritten by tmgr.atom_max_age mount option. */
  64414. +#define REISER4_ATOM_MAX_AGE (600 * HZ)
  64415. +
  64416. +/* sleeping period for ktxnmrgd */
  64417. +#define REISER4_TXNMGR_TIMEOUT (5 * HZ)
  64418. +
  64419. +/* timeout to wait for ent thread in writepage. Default: 3 milliseconds. */
  64420. +#define REISER4_ENTD_TIMEOUT (3 * HZ / 1000)
  64421. +
  64422. +/* start complaining after that many restarts in coord_by_key().
  64423. +
  64424. + This either means incredibly heavy contention for this part of a tree, or
  64425. + some corruption or bug.
  64426. +*/
  64427. +#define REISER4_CBK_ITERATIONS_LIMIT (100)
  64428. +
  64429. +/* return -EIO after that many iterations in coord_by_key().
  64430. +
  64431. + I have witnessed more than 800 iterations (in 30 thread test) before cbk
  64432. + finished. --nikita
  64433. +*/
  64434. +#define REISER4_MAX_CBK_ITERATIONS 500000
  64435. +
  64436. +/* put a per-inode limit on maximal number of directory entries with identical
  64437. + keys in hashed directory.
  64438. +
  64439. + Disable this until inheritance interfaces stabilize: we need some way to
  64440. + set per directory limit.
  64441. +*/
  64442. +#define REISER4_USE_COLLISION_LIMIT (0)
  64443. +
  64444. +/* If flush finds more than FLUSH_RELOCATE_THRESHOLD adjacent dirty leaf-level
  64445. + blocks it will force them to be relocated. */
  64446. +#define FLUSH_RELOCATE_THRESHOLD 64
  64447. +/* If flush finds can find a block allocation closer than at most
  64448. + FLUSH_RELOCATE_DISTANCE from the preceder it will relocate to that position.
  64449. + */
  64450. +#define FLUSH_RELOCATE_DISTANCE 64
  64451. +
  64452. +/* If we have written this much or more blocks before encountering busy jnode
  64453. + in flush list - abort flushing hoping that next time we get called
  64454. + this jnode will be clean already, and we will save some seeks. */
  64455. +#define FLUSH_WRITTEN_THRESHOLD 50
  64456. +
  64457. +/* The maximum number of nodes to scan left on a level during flush. */
  64458. +#define FLUSH_SCAN_MAXNODES 10000
  64459. +
  64460. +/* per-atom limit of flushers */
  64461. +#define ATOM_MAX_FLUSHERS (1)
  64462. +
  64463. +/* default tracing buffer size */
  64464. +#define REISER4_TRACE_BUF_SIZE (1 << 15)
  64465. +
  64466. +/* what size units of IO we would like cp, etc., to use, in writing to
  64467. + reiser4. In bytes.
  64468. +
  64469. + Can be overwritten by optimal_io_size mount option.
  64470. +*/
  64471. +#define REISER4_OPTIMAL_IO_SIZE (64 * 1024)
  64472. +
  64473. +/* see comments in inode.c:oid_to_uino() */
  64474. +#define REISER4_UINO_SHIFT (1 << 30)
  64475. +
  64476. +/* Mark function argument as unused to avoid compiler warnings. */
  64477. +#define UNUSED_ARG __attribute__((unused))
  64478. +
  64479. +#if ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)
  64480. +#define NONNULL __attribute__((nonnull))
  64481. +#else
  64482. +#define NONNULL
  64483. +#endif
  64484. +
  64485. +/* master super block offset in bytes.*/
  64486. +#define REISER4_MASTER_OFFSET 65536
  64487. +
  64488. +/* size of VFS block */
  64489. +#define VFS_BLKSIZE 512
  64490. +/* number of bits in size of VFS block (512==2^9) */
  64491. +#define VFS_BLKSIZE_BITS 9
  64492. +
  64493. +#define REISER4_I reiser4_inode_data
  64494. +
  64495. +/* implication */
  64496. +#define ergo(antecedent, consequent) (!(antecedent) || (consequent))
  64497. +/* logical equivalence */
  64498. +#define equi(p1, p2) (ergo((p1), (p2)) && ergo((p2), (p1)))
  64499. +
  64500. +#define sizeof_array(x) ((int) (sizeof(x) / sizeof(x[0])))
  64501. +
  64502. +#define NOT_YET (0)
  64503. +
  64504. +/** Reiser4 specific error codes **/
  64505. +
  64506. +#define REISER4_ERROR_CODE_BASE 10000
  64507. +
  64508. +/* Neighbor is not available (side neighbor or parent) */
  64509. +#define E_NO_NEIGHBOR (REISER4_ERROR_CODE_BASE)
  64510. +
  64511. +/* Node was not found in cache */
  64512. +#define E_NOT_IN_CACHE (REISER4_ERROR_CODE_BASE + 1)
  64513. +
  64514. +/* node has no free space enough for completion of balancing operation */
  64515. +#define E_NODE_FULL (REISER4_ERROR_CODE_BASE + 2)
  64516. +
  64517. +/* repeat operation */
  64518. +#define E_REPEAT (REISER4_ERROR_CODE_BASE + 3)
  64519. +
  64520. +/* deadlock happens */
  64521. +#define E_DEADLOCK (REISER4_ERROR_CODE_BASE + 4)
  64522. +
  64523. +/* operation cannot be performed, because it would block and non-blocking mode
  64524. + * was requested. */
  64525. +#define E_BLOCK (REISER4_ERROR_CODE_BASE + 5)
  64526. +
  64527. +/* wait some event (depends on context), then repeat */
  64528. +#define E_WAIT (REISER4_ERROR_CODE_BASE + 6)
  64529. +
  64530. +#endif /* __REISER4_H__ */
  64531. +
  64532. +/* Make Linus happy.
  64533. + Local variables:
  64534. + c-indentation-style: "K&R"
  64535. + mode-name: "LC"
  64536. + c-basic-offset: 8
  64537. + tab-width: 8
  64538. + fill-column: 120
  64539. + End:
  64540. +*/
  64541. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/safe_link.c linux-4.14.2/fs/reiser4/safe_link.c
  64542. --- linux-4.14.2.orig/fs/reiser4/safe_link.c 1970-01-01 01:00:00.000000000 +0100
  64543. +++ linux-4.14.2/fs/reiser4/safe_link.c 2017-11-26 22:13:09.000000000 +0100
  64544. @@ -0,0 +1,354 @@
  64545. +/* Copyright 2003, 2004 by Hans Reiser, licensing governed by
  64546. + * reiser4/README */
  64547. +
  64548. +/* Safe-links. */
  64549. +
  64550. +/*
  64551. + * Safe-links are used to maintain file system consistency during operations
  64552. + * that spawns multiple transactions. For example:
  64553. + *
  64554. + * 1. Unlink. UNIX supports "open-but-unlinked" files, that is files
  64555. + * without user-visible names in the file system, but still opened by some
  64556. + * active process. What happens here is that unlink proper (i.e., removal
  64557. + * of the last file name) and file deletion (truncate of file body to zero
  64558. + * and deletion of stat-data, that happens when last file descriptor is
  64559. + * closed), may belong to different transactions T1 and T2. If a crash
  64560. + * happens after T1 commit, but before T2 commit, on-disk file system has
  64561. + * a file without name, that is, disk space leak.
  64562. + *
  64563. + * 2. Truncate. Truncate of large file may spawn multiple transactions. If
  64564. + * system crashes while truncate was in-progress, file is left partially
  64565. + * truncated, which violates "atomicity guarantees" of reiser4, viz. that
  64566. + * every system is atomic.
  64567. + *
  64568. + * Safe-links address both above cases. Basically, safe-link is a way post
  64569. + * some operation to be executed during commit of some other transaction than
  64570. + * current one. (Another way to look at the safe-link is to interpret it as a
  64571. + * logical logging.)
  64572. + *
  64573. + * Specifically, at the beginning of unlink safe-link in inserted in the
  64574. + * tree. This safe-link is normally removed by file deletion code (during
  64575. + * transaction T2 in the above terms). Truncate also inserts safe-link that is
  64576. + * normally removed when truncate operation is finished.
  64577. + *
  64578. + * This means, that in the case of "clean umount" there are no safe-links in
  64579. + * the tree. If safe-links are observed during mount, it means that (a) system
  64580. + * was terminated abnormally, and (b) safe-link correspond to the "pending"
  64581. + * (i.e., not finished) operations that were in-progress during system
  64582. + * termination. Each safe-link record enough information to complete
  64583. + * corresponding operation, and mount simply "replays" them (hence, the
  64584. + * analogy with the logical logging).
  64585. + *
  64586. + * Safe-links are implemented as blackbox items (see
  64587. + * plugin/item/blackbox.[ch]).
  64588. + *
  64589. + * For the reference: ext3 also has similar mechanism, it's called "an orphan
  64590. + * list" there.
  64591. + */
  64592. +
  64593. +#include "safe_link.h"
  64594. +#include "debug.h"
  64595. +#include "inode.h"
  64596. +
  64597. +#include "plugin/item/blackbox.h"
  64598. +
  64599. +#include <linux/fs.h>
  64600. +
  64601. +/*
  64602. + * On-disk format of safe-link.
  64603. + */
  64604. +typedef struct safelink {
  64605. + reiser4_key sdkey; /* key of stat-data for the file safe-link is
  64606. + * for */
  64607. + d64 size; /* size to which file should be truncated */
  64608. +} safelink_t;
  64609. +
  64610. +/*
  64611. + * locality where safe-link items are stored. Next to the objectid of root
  64612. + * directory.
  64613. + */
  64614. +static oid_t safe_link_locality(reiser4_tree * tree)
  64615. +{
  64616. + return get_key_objectid(get_super_private(tree->super)->df_plug->
  64617. + root_dir_key(tree->super)) + 1;
  64618. +}
  64619. +
  64620. +/*
  64621. + Construct a key for the safe-link. Key has the following format:
  64622. +
  64623. +| 60 | 4 | 64 | 4 | 60 | 64 |
  64624. ++---------------+---+------------------+---+---------------+------------------+
  64625. +| locality | 0 | 0 | 0 | objectid | link type |
  64626. ++---------------+---+------------------+---+---------------+------------------+
  64627. +| | | | |
  64628. +| 8 bytes | 8 bytes | 8 bytes | 8 bytes |
  64629. +
  64630. + This is in large keys format. In small keys format second 8 byte chunk is
  64631. + out. Locality is a constant returned by safe_link_locality(). objectid is
  64632. + an oid of a file on which operation protected by this safe-link is
  64633. + performed. link-type is used to distinguish safe-links for different
  64634. + operations.
  64635. +
  64636. + */
  64637. +static reiser4_key *build_link_key(reiser4_tree * tree, oid_t oid,
  64638. + reiser4_safe_link_t link, reiser4_key * key)
  64639. +{
  64640. + reiser4_key_init(key);
  64641. + set_key_locality(key, safe_link_locality(tree));
  64642. + set_key_objectid(key, oid);
  64643. + set_key_offset(key, link);
  64644. + return key;
  64645. +}
  64646. +
  64647. +/*
  64648. + * how much disk space is necessary to insert and remove (in the
  64649. + * error-handling path) safe-link.
  64650. + */
  64651. +static __u64 safe_link_tograb(reiser4_tree * tree)
  64652. +{
  64653. + return
  64654. + /* insert safe link */
  64655. + estimate_one_insert_item(tree) +
  64656. + /* remove safe link */
  64657. + estimate_one_item_removal(tree) +
  64658. + /* drill to the leaf level during insertion */
  64659. + 1 + estimate_one_insert_item(tree) +
  64660. + /*
  64661. + * possible update of existing safe-link. Actually, if
  64662. + * safe-link existed already (we failed to remove it), then no
  64663. + * insertion is necessary, so this term is already "covered",
  64664. + * but for simplicity let's left it.
  64665. + */
  64666. + 1;
  64667. +}
  64668. +
  64669. +/*
  64670. + * grab enough disk space to insert and remove (in the error-handling path)
  64671. + * safe-link.
  64672. + */
  64673. +int safe_link_grab(reiser4_tree * tree, reiser4_ba_flags_t flags)
  64674. +{
  64675. + int result;
  64676. +
  64677. + grab_space_enable();
  64678. + /* The sbinfo->delete_mutex can be taken here.
  64679. + * safe_link_release() should be called before leaving reiser4
  64680. + * context. */
  64681. + result =
  64682. + reiser4_grab_reserved(tree->super, safe_link_tograb(tree), flags);
  64683. + grab_space_enable();
  64684. + return result;
  64685. +}
  64686. +
  64687. +/*
  64688. + * release unused disk space reserved by safe_link_grab().
  64689. + */
  64690. +void safe_link_release(reiser4_tree * tree)
  64691. +{
  64692. + reiser4_release_reserved(tree->super);
  64693. +}
  64694. +
  64695. +/*
  64696. + * insert into tree safe-link for operation @link on inode @inode.
  64697. + */
  64698. +int safe_link_add(struct inode *inode, reiser4_safe_link_t link)
  64699. +{
  64700. + reiser4_key key;
  64701. + safelink_t sl;
  64702. + int length;
  64703. + int result;
  64704. + reiser4_tree *tree;
  64705. +
  64706. + build_sd_key(inode, &sl.sdkey);
  64707. + length = sizeof sl.sdkey;
  64708. +
  64709. + if (link == SAFE_TRUNCATE) {
  64710. + /*
  64711. + * for truncate we have to store final file length also,
  64712. + * expand item.
  64713. + */
  64714. + length += sizeof(sl.size);
  64715. + put_unaligned(cpu_to_le64(inode->i_size), &sl.size);
  64716. + }
  64717. + tree = reiser4_tree_by_inode(inode);
  64718. + build_link_key(tree, get_inode_oid(inode), link, &key);
  64719. +
  64720. + result = store_black_box(tree, &key, &sl, length);
  64721. + if (result == -EEXIST)
  64722. + result = update_black_box(tree, &key, &sl, length);
  64723. + return result;
  64724. +}
  64725. +
  64726. +/*
  64727. + * remove safe-link corresponding to the operation @link on inode @inode from
  64728. + * the tree.
  64729. + */
  64730. +int safe_link_del(reiser4_tree * tree, oid_t oid, reiser4_safe_link_t link)
  64731. +{
  64732. + reiser4_key key;
  64733. +
  64734. + return kill_black_box(tree, build_link_key(tree, oid, link, &key));
  64735. +}
  64736. +
  64737. +/*
  64738. + * in-memory structure to keep information extracted from safe-link. This is
  64739. + * used to iterate over all safe-links.
  64740. + */
  64741. +struct safe_link_context {
  64742. + reiser4_tree *tree; /* internal tree */
  64743. + reiser4_key key; /* safe-link key */
  64744. + reiser4_key sdkey; /* key of object stat-data */
  64745. + reiser4_safe_link_t link; /* safe-link type */
  64746. + oid_t oid; /* object oid */
  64747. + __u64 size; /* final size for truncate */
  64748. +};
  64749. +
  64750. +/*
  64751. + * start iterating over all safe-links.
  64752. + */
  64753. +static void safe_link_iter_begin(reiser4_tree * tree,
  64754. + struct safe_link_context *ctx)
  64755. +{
  64756. + ctx->tree = tree;
  64757. + reiser4_key_init(&ctx->key);
  64758. + set_key_locality(&ctx->key, safe_link_locality(tree));
  64759. + set_key_objectid(&ctx->key, get_key_objectid(reiser4_max_key()));
  64760. + set_key_offset(&ctx->key, get_key_offset(reiser4_max_key()));
  64761. +}
  64762. +
  64763. +/*
  64764. + * return next safe-link.
  64765. + */
  64766. +static int safe_link_iter_next(struct safe_link_context *ctx)
  64767. +{
  64768. + int result;
  64769. + safelink_t sl;
  64770. +
  64771. + result = load_black_box(ctx->tree, &ctx->key, &sl, sizeof sl, 0);
  64772. + if (result == 0) {
  64773. + ctx->oid = get_key_objectid(&ctx->key);
  64774. + ctx->link = get_key_offset(&ctx->key);
  64775. + ctx->sdkey = sl.sdkey;
  64776. + if (ctx->link == SAFE_TRUNCATE)
  64777. + ctx->size = le64_to_cpu(get_unaligned(&sl.size));
  64778. + }
  64779. + return result;
  64780. +}
  64781. +
  64782. +/*
  64783. + * check are there any more safe-links left in the tree.
  64784. + */
  64785. +static int safe_link_iter_finished(struct safe_link_context *ctx)
  64786. +{
  64787. + return get_key_locality(&ctx->key) != safe_link_locality(ctx->tree);
  64788. +}
  64789. +
  64790. +/*
  64791. + * finish safe-link iteration.
  64792. + */
  64793. +static void safe_link_iter_end(struct safe_link_context *ctx)
  64794. +{
  64795. + /* nothing special */
  64796. +}
  64797. +
  64798. +/*
  64799. + * process single safe-link.
  64800. + */
  64801. +static int process_safelink(struct super_block *super, reiser4_safe_link_t link,
  64802. + reiser4_key * sdkey, oid_t oid, __u64 size)
  64803. +{
  64804. + struct inode *inode;
  64805. + int result;
  64806. +
  64807. + /*
  64808. + * obtain object inode by reiser4_iget(), then call object plugin
  64809. + * ->safelink() method to do actual work, then delete safe-link on
  64810. + * success.
  64811. + */
  64812. + inode = reiser4_iget(super, sdkey, 1);
  64813. + if (!IS_ERR(inode)) {
  64814. + file_plugin *fplug;
  64815. +
  64816. + fplug = inode_file_plugin(inode);
  64817. + assert("nikita-3428", fplug != NULL);
  64818. + assert("", oid == get_inode_oid(inode));
  64819. + if (fplug->safelink != NULL) {
  64820. + /* reiser4_txn_restart_current is not necessary because
  64821. + * mounting is signle thread. However, without it
  64822. + * deadlock detection code will complain (see
  64823. + * nikita-3361). */
  64824. + reiser4_txn_restart_current();
  64825. + result = fplug->safelink(inode, link, size);
  64826. + } else {
  64827. + warning("nikita-3430",
  64828. + "Cannot handle safelink for %lli",
  64829. + (unsigned long long)oid);
  64830. + reiser4_print_key("key", sdkey);
  64831. + result = 0;
  64832. + }
  64833. + if (result != 0) {
  64834. + warning("nikita-3431",
  64835. + "Error processing safelink for %lli: %i",
  64836. + (unsigned long long)oid, result);
  64837. + }
  64838. + reiser4_iget_complete(inode);
  64839. + iput(inode);
  64840. + if (result == 0) {
  64841. + result = safe_link_grab(reiser4_get_tree(super),
  64842. + BA_CAN_COMMIT);
  64843. + if (result == 0)
  64844. + result =
  64845. + safe_link_del(reiser4_get_tree(super), oid,
  64846. + link);
  64847. + safe_link_release(reiser4_get_tree(super));
  64848. + /*
  64849. + * restart transaction: if there was large number of
  64850. + * safe-links, their processing may fail to fit into
  64851. + * single transaction.
  64852. + */
  64853. + if (result == 0)
  64854. + reiser4_txn_restart_current();
  64855. + }
  64856. + } else
  64857. + result = PTR_ERR(inode);
  64858. + return result;
  64859. +}
  64860. +
  64861. +/*
  64862. + * iterate over all safe-links in the file-system processing them one by one.
  64863. + */
  64864. +int process_safelinks(struct super_block *super)
  64865. +{
  64866. + struct safe_link_context ctx;
  64867. + int result;
  64868. +
  64869. + if (rofs_super(super))
  64870. + /* do nothing on the read-only file system */
  64871. + return 0;
  64872. + safe_link_iter_begin(&get_super_private(super)->tree, &ctx);
  64873. + result = 0;
  64874. + do {
  64875. + result = safe_link_iter_next(&ctx);
  64876. + if (safe_link_iter_finished(&ctx) || result == -ENOENT) {
  64877. + result = 0;
  64878. + break;
  64879. + }
  64880. + if (result == 0)
  64881. + result = process_safelink(super, ctx.link,
  64882. + &ctx.sdkey, ctx.oid,
  64883. + ctx.size);
  64884. + } while (result == 0);
  64885. + safe_link_iter_end(&ctx);
  64886. + return result;
  64887. +}
  64888. +
  64889. +/* Make Linus happy.
  64890. + Local variables:
  64891. + c-indentation-style: "K&R"
  64892. + mode-name: "LC"
  64893. + c-basic-offset: 8
  64894. + tab-width: 8
  64895. + fill-column: 120
  64896. + scroll-step: 1
  64897. + End:
  64898. +*/
  64899. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/safe_link.h linux-4.14.2/fs/reiser4/safe_link.h
  64900. --- linux-4.14.2.orig/fs/reiser4/safe_link.h 1970-01-01 01:00:00.000000000 +0100
  64901. +++ linux-4.14.2/fs/reiser4/safe_link.h 2017-11-26 22:13:09.000000000 +0100
  64902. @@ -0,0 +1,29 @@
  64903. +/* Copyright 2003 by Hans Reiser, licensing governed by
  64904. + * reiser4/README */
  64905. +
  64906. +/* Safe-links. See safe_link.c for details. */
  64907. +
  64908. +#if !defined(__FS_SAFE_LINK_H__)
  64909. +#define __FS_SAFE_LINK_H__
  64910. +
  64911. +#include "tree.h"
  64912. +
  64913. +int safe_link_grab(reiser4_tree * tree, reiser4_ba_flags_t flags);
  64914. +void safe_link_release(reiser4_tree * tree);
  64915. +int safe_link_add(struct inode *inode, reiser4_safe_link_t link);
  64916. +int safe_link_del(reiser4_tree *, oid_t oid, reiser4_safe_link_t link);
  64917. +
  64918. +int process_safelinks(struct super_block *super);
  64919. +
  64920. +/* __FS_SAFE_LINK_H__ */
  64921. +#endif
  64922. +
  64923. +/* Make Linus happy.
  64924. + Local variables:
  64925. + c-indentation-style: "K&R"
  64926. + mode-name: "LC"
  64927. + c-basic-offset: 8
  64928. + tab-width: 8
  64929. + fill-column: 120
  64930. + End:
  64931. +*/
  64932. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/seal.c linux-4.14.2/fs/reiser4/seal.c
  64933. --- linux-4.14.2.orig/fs/reiser4/seal.c 1970-01-01 01:00:00.000000000 +0100
  64934. +++ linux-4.14.2/fs/reiser4/seal.c 2017-11-26 22:13:09.000000000 +0100
  64935. @@ -0,0 +1,219 @@
  64936. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  64937. +/* Seals implementation. */
  64938. +/* Seals are "weak" tree pointers. They are analogous to tree coords in
  64939. + allowing to bypass tree traversal. But normal usage of coords implies that
  64940. + node pointed to by coord is locked, whereas seals don't keep a lock (or
  64941. + even a reference) to znode. In stead, each znode contains a version number,
  64942. + increased on each znode modification. This version number is copied into a
  64943. + seal when seal is created. Later, one can "validate" seal by calling
  64944. + reiser4_seal_validate(). If znode is in cache and its version number is
  64945. + still the same, seal is "pristine" and coord associated with it can be
  64946. + re-used immediately.
  64947. +
  64948. + If, on the other hand, znode is out of cache, or it is obviously different
  64949. + one from the znode seal was initially attached to (for example, it is on
  64950. + the different level, or is being removed from the tree), seal is
  64951. + irreparably invalid ("burned") and tree traversal has to be repeated.
  64952. +
  64953. + Otherwise, there is some hope, that while znode was modified (and seal was
  64954. + "broken" as a result), key attached to the seal is still in the node. This
  64955. + is checked by first comparing this key with delimiting keys of node and, if
  64956. + key is ok, doing intra-node lookup.
  64957. +
  64958. + Znode version is maintained in the following way:
  64959. +
  64960. + there is reiser4_tree.znode_epoch counter. Whenever new znode is created,
  64961. + znode_epoch is incremented and its new value is stored in ->version field
  64962. + of new znode. Whenever znode is dirtied (which means it was probably
  64963. + modified), znode_epoch is also incremented and its new value is stored in
  64964. + znode->version. This is done so, because just incrementing znode->version
  64965. + on each update is not enough: it may so happen, that znode get deleted, new
  64966. + znode is allocated for the same disk block and gets the same version
  64967. + counter, tricking seal code into false positive.
  64968. +*/
  64969. +
  64970. +#include "forward.h"
  64971. +#include "debug.h"
  64972. +#include "key.h"
  64973. +#include "coord.h"
  64974. +#include "seal.h"
  64975. +#include "plugin/item/item.h"
  64976. +#include "plugin/node/node.h"
  64977. +#include "jnode.h"
  64978. +#include "znode.h"
  64979. +#include "super.h"
  64980. +
  64981. +static znode *seal_node(const seal_t *seal);
  64982. +static int seal_matches(const seal_t *seal, znode * node);
  64983. +
  64984. +/* initialise seal. This can be called several times on the same seal. @coord
  64985. + and @key can be NULL. */
  64986. +void reiser4_seal_init(seal_t *seal /* seal to initialise */ ,
  64987. + const coord_t *coord /* coord @seal will be
  64988. + * attached to */ ,
  64989. + const reiser4_key * key UNUSED_ARG /* key @seal will be
  64990. + * attached to */ )
  64991. +{
  64992. + assert("nikita-1886", seal != NULL);
  64993. + memset(seal, 0, sizeof *seal);
  64994. + if (coord != NULL) {
  64995. + znode *node;
  64996. +
  64997. + node = coord->node;
  64998. + assert("nikita-1987", node != NULL);
  64999. + spin_lock_znode(node);
  65000. + seal->version = node->version;
  65001. + assert("nikita-1988", seal->version != 0);
  65002. + seal->block = *znode_get_block(node);
  65003. +#if REISER4_DEBUG
  65004. + seal->coord1 = *coord;
  65005. + if (key != NULL)
  65006. + seal->key = *key;
  65007. +#endif
  65008. + spin_unlock_znode(node);
  65009. + }
  65010. +}
  65011. +
  65012. +/* finish with seal */
  65013. +void reiser4_seal_done(seal_t *seal/* seal to clear */)
  65014. +{
  65015. + assert("nikita-1887", seal != NULL);
  65016. + seal->version = 0;
  65017. +}
  65018. +
  65019. +/* true if seal was initialised */
  65020. +int reiser4_seal_is_set(const seal_t *seal/* seal to query */)
  65021. +{
  65022. + assert("nikita-1890", seal != NULL);
  65023. + return seal->version != 0;
  65024. +}
  65025. +
  65026. +#if REISER4_DEBUG
  65027. +/* helper function for reiser4_seal_validate(). It checks that item at @coord
  65028. + * has expected key. This is to detect cases where node was modified but wasn't
  65029. + * marked dirty. */
  65030. +static inline int check_seal_match(const coord_t *coord /* coord to check */ ,
  65031. + const reiser4_key *k__/* expected key */)
  65032. +{
  65033. + reiser4_key ukey;
  65034. +
  65035. + /* FIXME-VS: we only can compare keys for items whose units
  65036. + represent exactly one key */
  65037. + if (coord->between != AT_UNIT)
  65038. + return 1;
  65039. + if (!coord_is_existing_unit(coord))
  65040. + return 0;
  65041. + if (item_is_extent(coord))
  65042. + return 1;
  65043. + if (item_is_ctail(coord))
  65044. + return keyge(k__, unit_key_by_coord(coord, &ukey));
  65045. + return keyeq(k__, unit_key_by_coord(coord, &ukey));
  65046. +}
  65047. +#endif
  65048. +
  65049. +/* this is used by reiser4_seal_validate. It accepts return value of
  65050. + * longterm_lock_znode and returns 1 if it can be interpreted as seal
  65051. + * validation failure. For instance, when longterm_lock_znode returns -EINVAL,
  65052. + * reiser4_seal_validate returns -E_REPEAT and caller will call tre search.
  65053. + * We cannot do this in longterm_lock_znode(), because sometimes we want to
  65054. + * distinguish between -EINVAL and -E_REPEAT. */
  65055. +static int should_repeat(int return_code)
  65056. +{
  65057. + return return_code == -EINVAL;
  65058. +}
  65059. +
  65060. +/* (re-)validate seal.
  65061. +
  65062. + Checks whether seal is pristine, and try to revalidate it if possible.
  65063. +
  65064. + If seal was burned, or broken irreparably, return -E_REPEAT.
  65065. +
  65066. + NOTE-NIKITA currently reiser4_seal_validate() returns -E_REPEAT if key we are
  65067. + looking for is in range of keys covered by the sealed node, but item wasn't
  65068. + found by node ->lookup() method. Alternative is to return -ENOENT in this
  65069. + case, but this would complicate callers logic.
  65070. +
  65071. +*/
  65072. +int reiser4_seal_validate(seal_t *seal /* seal to validate */,
  65073. + coord_t *coord /* coord to validate against */,
  65074. + const reiser4_key * key /* key to validate against */,
  65075. + lock_handle * lh /* resulting lock handle */,
  65076. + znode_lock_mode mode /* lock node */,
  65077. + znode_lock_request request/* locking priority */)
  65078. +{
  65079. + znode *node;
  65080. + int result;
  65081. +
  65082. + assert("nikita-1889", seal != NULL);
  65083. + assert("nikita-1881", reiser4_seal_is_set(seal));
  65084. + assert("nikita-1882", key != NULL);
  65085. + assert("nikita-1883", coord != NULL);
  65086. + assert("nikita-1884", lh != NULL);
  65087. + assert("nikita-1885", keyeq(&seal->key, key));
  65088. + assert("nikita-1989", coords_equal(&seal->coord1, coord));
  65089. +
  65090. + /* obtain znode by block number */
  65091. + node = seal_node(seal);
  65092. + if (!node)
  65093. + /* znode wasn't in cache */
  65094. + return RETERR(-E_REPEAT);
  65095. + /* znode was in cache, lock it */
  65096. + result = longterm_lock_znode(lh, node, mode, request);
  65097. + zput(node);
  65098. + if (result == 0) {
  65099. + if (seal_matches(seal, node)) {
  65100. + /* if seal version and znode version
  65101. + coincide */
  65102. + ON_DEBUG(coord_update_v(coord));
  65103. + assert("nikita-1990",
  65104. + node == seal->coord1.node);
  65105. + assert("nikita-1898",
  65106. + WITH_DATA_RET(coord->node, 1,
  65107. + check_seal_match(coord,
  65108. + key)));
  65109. + } else
  65110. + result = RETERR(-E_REPEAT);
  65111. + }
  65112. + if (result != 0) {
  65113. + if (should_repeat(result))
  65114. + result = RETERR(-E_REPEAT);
  65115. + /* unlock node on failure */
  65116. + done_lh(lh);
  65117. + }
  65118. + return result;
  65119. +}
  65120. +
  65121. +/* helpers functions */
  65122. +
  65123. +/* obtain reference to znode seal points to, if in cache */
  65124. +static znode *seal_node(const seal_t *seal/* seal to query */)
  65125. +{
  65126. + assert("nikita-1891", seal != NULL);
  65127. + return zlook(current_tree, &seal->block);
  65128. +}
  65129. +
  65130. +/* true if @seal version and @node version coincide */
  65131. +static int seal_matches(const seal_t *seal /* seal to check */ ,
  65132. + znode * node/* node to check */)
  65133. +{
  65134. + int result;
  65135. +
  65136. + assert("nikita-1991", seal != NULL);
  65137. + assert("nikita-1993", node != NULL);
  65138. +
  65139. + spin_lock_znode(node);
  65140. + result = (seal->version == node->version);
  65141. + spin_unlock_znode(node);
  65142. + return result;
  65143. +}
  65144. +
  65145. +/* Make Linus happy.
  65146. + Local variables:
  65147. + c-indentation-style: "K&R"
  65148. + mode-name: "LC"
  65149. + c-basic-offset: 8
  65150. + tab-width: 8
  65151. + fill-column: 120
  65152. + scroll-step: 1
  65153. + End:
  65154. +*/
  65155. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/seal.h linux-4.14.2/fs/reiser4/seal.h
  65156. --- linux-4.14.2.orig/fs/reiser4/seal.h 1970-01-01 01:00:00.000000000 +0100
  65157. +++ linux-4.14.2/fs/reiser4/seal.h 2017-11-26 22:13:09.000000000 +0100
  65158. @@ -0,0 +1,49 @@
  65159. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  65160. +
  65161. +/* Declaration of seals: "weak" tree pointers. See seal.c for comments. */
  65162. +
  65163. +#ifndef __SEAL_H__
  65164. +#define __SEAL_H__
  65165. +
  65166. +#include "forward.h"
  65167. +#include "debug.h"
  65168. +#include "dformat.h"
  65169. +#include "key.h"
  65170. +#include "coord.h"
  65171. +
  65172. +/* for __u?? types */
  65173. +/*#include <linux/types.h>*/
  65174. +
  65175. +/* seal. See comment at the top of seal.c */
  65176. +typedef struct seal_s {
  65177. + /* version of znode recorder at the time of seal creation */
  65178. + __u64 version;
  65179. + /* block number of znode attached to this seal */
  65180. + reiser4_block_nr block;
  65181. +#if REISER4_DEBUG
  65182. + /* coord this seal is attached to. For debugging. */
  65183. + coord_t coord1;
  65184. + /* key this seal is attached to. For debugging. */
  65185. + reiser4_key key;
  65186. +#endif
  65187. +} seal_t;
  65188. +
  65189. +extern void reiser4_seal_init(seal_t *, const coord_t *, const reiser4_key *);
  65190. +extern void reiser4_seal_done(seal_t *);
  65191. +extern int reiser4_seal_is_set(const seal_t *);
  65192. +extern int reiser4_seal_validate(seal_t *, coord_t *,
  65193. + const reiser4_key *, lock_handle * ,
  65194. + znode_lock_mode mode, znode_lock_request request);
  65195. +
  65196. +/* __SEAL_H__ */
  65197. +#endif
  65198. +
  65199. +/* Make Linus happy.
  65200. + Local variables:
  65201. + c-indentation-style: "K&R"
  65202. + mode-name: "LC"
  65203. + c-basic-offset: 8
  65204. + tab-width: 8
  65205. + fill-column: 120
  65206. + End:
  65207. +*/
  65208. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/search.c linux-4.14.2/fs/reiser4/search.c
  65209. --- linux-4.14.2.orig/fs/reiser4/search.c 1970-01-01 01:00:00.000000000 +0100
  65210. +++ linux-4.14.2/fs/reiser4/search.c 2017-11-26 22:13:09.000000000 +0100
  65211. @@ -0,0 +1,1612 @@
  65212. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  65213. + * reiser4/README */
  65214. +
  65215. +#include "forward.h"
  65216. +#include "debug.h"
  65217. +#include "dformat.h"
  65218. +#include "key.h"
  65219. +#include "coord.h"
  65220. +#include "seal.h"
  65221. +#include "plugin/item/item.h"
  65222. +#include "plugin/node/node.h"
  65223. +#include "plugin/plugin.h"
  65224. +#include "jnode.h"
  65225. +#include "znode.h"
  65226. +#include "block_alloc.h"
  65227. +#include "tree_walk.h"
  65228. +#include "tree.h"
  65229. +#include "reiser4.h"
  65230. +#include "super.h"
  65231. +#include "inode.h"
  65232. +
  65233. +#include <linux/slab.h>
  65234. +
  65235. +static const char *bias_name(lookup_bias bias);
  65236. +
  65237. +/* tree searching algorithm, intranode searching algorithms are in
  65238. + plugin/node/ */
  65239. +
  65240. +/* tree lookup cache
  65241. + *
  65242. + * The coord by key cache consists of small list of recently accessed nodes
  65243. + * maintained according to the LRU discipline. Before doing real top-to-down
  65244. + * tree traversal this cache is scanned for nodes that can contain key
  65245. + * requested.
  65246. + *
  65247. + * The efficiency of coord cache depends heavily on locality of reference for
  65248. + * tree accesses. Our user level simulations show reasonably good hit ratios
  65249. + * for coord cache under most loads so far.
  65250. + */
  65251. +
  65252. +/* Initialise coord cache slot */
  65253. +static void cbk_cache_init_slot(cbk_cache_slot *slot)
  65254. +{
  65255. + assert("nikita-345", slot != NULL);
  65256. +
  65257. + INIT_LIST_HEAD(&slot->lru);
  65258. + slot->node = NULL;
  65259. +}
  65260. +
  65261. +/* Initialize coord cache */
  65262. +int cbk_cache_init(cbk_cache * cache/* cache to init */)
  65263. +{
  65264. + int i;
  65265. +
  65266. + assert("nikita-346", cache != NULL);
  65267. +
  65268. + cache->slot =
  65269. + kmalloc(sizeof(cbk_cache_slot) * cache->nr_slots,
  65270. + reiser4_ctx_gfp_mask_get());
  65271. + if (cache->slot == NULL)
  65272. + return RETERR(-ENOMEM);
  65273. +
  65274. + INIT_LIST_HEAD(&cache->lru);
  65275. + for (i = 0; i < cache->nr_slots; ++i) {
  65276. + cbk_cache_init_slot(cache->slot + i);
  65277. + list_add_tail(&((cache->slot + i)->lru), &cache->lru);
  65278. + }
  65279. + rwlock_init(&cache->guard);
  65280. + return 0;
  65281. +}
  65282. +
  65283. +/* free cbk cache data */
  65284. +void cbk_cache_done(cbk_cache * cache/* cache to release */)
  65285. +{
  65286. + assert("nikita-2493", cache != NULL);
  65287. + if (cache->slot != NULL) {
  65288. + kfree(cache->slot);
  65289. + cache->slot = NULL;
  65290. + }
  65291. +}
  65292. +
  65293. +/* macro to iterate over all cbk cache slots */
  65294. +#define for_all_slots(cache, slot) \
  65295. + for ((slot) = list_entry((cache)->lru.next, cbk_cache_slot, lru); \
  65296. + &(cache)->lru != &(slot)->lru; \
  65297. + (slot) = list_entry(slot->lru.next, cbk_cache_slot, lru))
  65298. +
  65299. +#if REISER4_DEBUG
  65300. +/* this function assures that [cbk-cache-invariant] invariant holds */
  65301. +static int cbk_cache_invariant(const cbk_cache * cache)
  65302. +{
  65303. + cbk_cache_slot *slot;
  65304. + int result;
  65305. + int unused;
  65306. +
  65307. + if (cache->nr_slots == 0)
  65308. + return 1;
  65309. +
  65310. + assert("nikita-2469", cache != NULL);
  65311. + unused = 0;
  65312. + result = 1;
  65313. + read_lock(&((cbk_cache *)cache)->guard);
  65314. + for_all_slots(cache, slot) {
  65315. + /* in LRU first go all `used' slots followed by `unused' */
  65316. + if (unused && (slot->node != NULL))
  65317. + result = 0;
  65318. + if (slot->node == NULL)
  65319. + unused = 1;
  65320. + else {
  65321. + cbk_cache_slot *scan;
  65322. +
  65323. + /* all cached nodes are different */
  65324. + scan = slot;
  65325. + while (result) {
  65326. + scan = list_entry(scan->lru.next,
  65327. + cbk_cache_slot, lru);
  65328. + if (&cache->lru == &scan->lru)
  65329. + break;
  65330. + if (slot->node == scan->node)
  65331. + result = 0;
  65332. + }
  65333. + }
  65334. + if (!result)
  65335. + break;
  65336. + }
  65337. + read_unlock(&((cbk_cache *)cache)->guard);
  65338. + return result;
  65339. +}
  65340. +
  65341. +#endif
  65342. +
  65343. +/* Remove references, if any, to @node from coord cache */
  65344. +void cbk_cache_invalidate(const znode * node /* node to remove from cache */ ,
  65345. + reiser4_tree * tree/* tree to remove node from */)
  65346. +{
  65347. + cbk_cache_slot *slot;
  65348. + cbk_cache *cache;
  65349. + int i;
  65350. +
  65351. + assert("nikita-350", node != NULL);
  65352. + assert("nikita-1479", LOCK_CNT_GTZ(rw_locked_tree));
  65353. +
  65354. + cache = &tree->cbk_cache;
  65355. + assert("nikita-2470", cbk_cache_invariant(cache));
  65356. +
  65357. + write_lock(&(cache->guard));
  65358. + for (i = 0, slot = cache->slot; i < cache->nr_slots; ++i, ++slot) {
  65359. + if (slot->node == node) {
  65360. + list_move_tail(&slot->lru, &cache->lru);
  65361. + slot->node = NULL;
  65362. + break;
  65363. + }
  65364. + }
  65365. + write_unlock(&(cache->guard));
  65366. + assert("nikita-2471", cbk_cache_invariant(cache));
  65367. +}
  65368. +
  65369. +/* add to the cbk-cache in the "tree" information about "node". This
  65370. + can actually be update of existing slot in a cache. */
  65371. +static void cbk_cache_add(const znode * node/* node to add to the cache */)
  65372. +{
  65373. + cbk_cache *cache;
  65374. +
  65375. + cbk_cache_slot *slot;
  65376. + int i;
  65377. +
  65378. + assert("nikita-352", node != NULL);
  65379. +
  65380. + cache = &znode_get_tree(node)->cbk_cache;
  65381. + assert("nikita-2472", cbk_cache_invariant(cache));
  65382. +
  65383. + if (cache->nr_slots == 0)
  65384. + return;
  65385. +
  65386. + write_lock(&(cache->guard));
  65387. + /* find slot to update/add */
  65388. + for (i = 0, slot = cache->slot; i < cache->nr_slots; ++i, ++slot) {
  65389. + /* oops, this node is already in a cache */
  65390. + if (slot->node == node)
  65391. + break;
  65392. + }
  65393. + /* if all slots are used, reuse least recently used one */
  65394. + if (i == cache->nr_slots) {
  65395. + slot = list_entry(cache->lru.prev, cbk_cache_slot, lru);
  65396. + slot->node = (znode *) node;
  65397. + }
  65398. + list_move(&slot->lru, &cache->lru);
  65399. + write_unlock(&(cache->guard));
  65400. + assert("nikita-2473", cbk_cache_invariant(cache));
  65401. +}
  65402. +
  65403. +static int setup_delimiting_keys(cbk_handle * h);
  65404. +static lookup_result coord_by_handle(cbk_handle * handle);
  65405. +static lookup_result traverse_tree(cbk_handle * h);
  65406. +static int cbk_cache_search(cbk_handle * h);
  65407. +
  65408. +static level_lookup_result cbk_level_lookup(cbk_handle * h);
  65409. +static level_lookup_result cbk_node_lookup(cbk_handle * h);
  65410. +
  65411. +/* helper functions */
  65412. +
  65413. +static void update_stale_dk(reiser4_tree * tree, znode * node);
  65414. +
  65415. +/* release parent node during traversal */
  65416. +static void put_parent(cbk_handle * h);
  65417. +/* check consistency of fields */
  65418. +static int sanity_check(cbk_handle * h);
  65419. +/* release resources in handle */
  65420. +static void hput(cbk_handle * h);
  65421. +
  65422. +static level_lookup_result search_to_left(cbk_handle * h);
  65423. +
  65424. +/* pack numerous (numberous I should say) arguments of coord_by_key() into
  65425. + * cbk_handle */
  65426. +static cbk_handle *cbk_pack(cbk_handle * handle,
  65427. + reiser4_tree * tree,
  65428. + const reiser4_key * key,
  65429. + coord_t *coord,
  65430. + lock_handle * active_lh,
  65431. + lock_handle * parent_lh,
  65432. + znode_lock_mode lock_mode,
  65433. + lookup_bias bias,
  65434. + tree_level lock_level,
  65435. + tree_level stop_level,
  65436. + __u32 flags, ra_info_t *info)
  65437. +{
  65438. + memset(handle, 0, sizeof *handle);
  65439. +
  65440. + handle->tree = tree;
  65441. + handle->key = key;
  65442. + handle->lock_mode = lock_mode;
  65443. + handle->bias = bias;
  65444. + handle->lock_level = lock_level;
  65445. + handle->stop_level = stop_level;
  65446. + handle->coord = coord;
  65447. + /* set flags. See comment in tree.h:cbk_flags */
  65448. + handle->flags = flags | CBK_TRUST_DK | CBK_USE_CRABLOCK;
  65449. +
  65450. + handle->active_lh = active_lh;
  65451. + handle->parent_lh = parent_lh;
  65452. + handle->ra_info = info;
  65453. + return handle;
  65454. +}
  65455. +
  65456. +/* main tree lookup procedure
  65457. +
  65458. + Check coord cache. If key we are looking for is not found there, call cbk()
  65459. + to do real tree traversal.
  65460. +
  65461. + As we have extents on the twig level, @lock_level and @stop_level can
  65462. + be different from LEAF_LEVEL and each other.
  65463. +
  65464. + Thread cannot keep any reiser4 locks (tree, znode, dk spin-locks, or znode
  65465. + long term locks) while calling this.
  65466. +*/
  65467. +lookup_result coord_by_key(reiser4_tree * tree /* tree to perform search
  65468. + * in. Usually this tree is
  65469. + * part of file-system
  65470. + * super-block */ ,
  65471. + const reiser4_key * key /* key to look for */ ,
  65472. + coord_t *coord /* where to store found
  65473. + * position in a tree. Fields
  65474. + * in "coord" are only valid if
  65475. + * coord_by_key() returned
  65476. + * "CBK_COORD_FOUND" */ ,
  65477. + lock_handle * lh, /* resulting lock handle */
  65478. + znode_lock_mode lock_mode /* type of lookup we
  65479. + * want on node. Pass
  65480. + * ZNODE_READ_LOCK here
  65481. + * if you only want to
  65482. + * read item found and
  65483. + * ZNODE_WRITE_LOCK if
  65484. + * you want to modify
  65485. + * it */ ,
  65486. + lookup_bias bias /* what to return if coord
  65487. + * with exactly the @key is
  65488. + * not in the tree */ ,
  65489. + tree_level lock_level/* tree level where to start
  65490. + * taking @lock type of
  65491. + * locks */ ,
  65492. + tree_level stop_level/* tree level to stop. Pass
  65493. + * LEAF_LEVEL or TWIG_LEVEL
  65494. + * here Item being looked
  65495. + * for has to be between
  65496. + * @lock_level and
  65497. + * @stop_level, inclusive */ ,
  65498. + __u32 flags /* search flags */ ,
  65499. + ra_info_t *
  65500. + info
  65501. + /* information about desired tree traversal
  65502. + * readahead */
  65503. + )
  65504. +{
  65505. + cbk_handle handle;
  65506. + lock_handle parent_lh;
  65507. + lookup_result result;
  65508. +
  65509. + init_lh(lh);
  65510. + init_lh(&parent_lh);
  65511. +
  65512. + assert("nikita-3023", reiser4_schedulable());
  65513. +
  65514. + assert("nikita-353", tree != NULL);
  65515. + assert("nikita-354", key != NULL);
  65516. + assert("nikita-355", coord != NULL);
  65517. + assert("nikita-356", (bias == FIND_EXACT)
  65518. + || (bias == FIND_MAX_NOT_MORE_THAN));
  65519. + assert("nikita-357", stop_level >= LEAF_LEVEL);
  65520. + /* no locks can be held during tree traversal */
  65521. + assert("nikita-2104", lock_stack_isclean(get_current_lock_stack()));
  65522. +
  65523. + cbk_pack(&handle,
  65524. + tree,
  65525. + key,
  65526. + coord,
  65527. + lh,
  65528. + &parent_lh,
  65529. + lock_mode, bias, lock_level, stop_level, flags, info);
  65530. +
  65531. + result = coord_by_handle(&handle);
  65532. + assert("nikita-3247",
  65533. + ergo(!IS_CBKERR(result), coord->node == lh->node));
  65534. + return result;
  65535. +}
  65536. +
  65537. +/* like coord_by_key(), but starts traversal from vroot of @object rather than
  65538. + * from tree root. */
  65539. +lookup_result reiser4_object_lookup(struct inode *object,
  65540. + const reiser4_key * key,
  65541. + coord_t *coord,
  65542. + lock_handle * lh,
  65543. + znode_lock_mode lock_mode,
  65544. + lookup_bias bias,
  65545. + tree_level lock_level,
  65546. + tree_level stop_level, __u32 flags,
  65547. + ra_info_t *info)
  65548. +{
  65549. + cbk_handle handle;
  65550. + lock_handle parent_lh;
  65551. + lookup_result result;
  65552. +
  65553. + init_lh(lh);
  65554. + init_lh(&parent_lh);
  65555. +
  65556. + assert("nikita-3023", reiser4_schedulable());
  65557. +
  65558. + assert("nikita-354", key != NULL);
  65559. + assert("nikita-355", coord != NULL);
  65560. + assert("nikita-356", (bias == FIND_EXACT)
  65561. + || (bias == FIND_MAX_NOT_MORE_THAN));
  65562. + assert("nikita-357", stop_level >= LEAF_LEVEL);
  65563. + /* no locks can be held during tree search by key */
  65564. + assert("nikita-2104", lock_stack_isclean(get_current_lock_stack()));
  65565. +
  65566. + cbk_pack(&handle,
  65567. + object != NULL ? reiser4_tree_by_inode(object) : current_tree,
  65568. + key,
  65569. + coord,
  65570. + lh,
  65571. + &parent_lh,
  65572. + lock_mode, bias, lock_level, stop_level, flags, info);
  65573. + handle.object = object;
  65574. +
  65575. + result = coord_by_handle(&handle);
  65576. + assert("nikita-3247",
  65577. + ergo(!IS_CBKERR(result), coord->node == lh->node));
  65578. + return result;
  65579. +}
  65580. +
  65581. +/* lookup by cbk_handle. Common part of coord_by_key() and
  65582. + reiser4_object_lookup(). */
  65583. +static lookup_result coord_by_handle(cbk_handle * handle)
  65584. +{
  65585. + /*
  65586. + * first check cbk_cache (which is look-aside cache for our tree) and
  65587. + * of this fails, start traversal.
  65588. + */
  65589. + /* first check whether "key" is in cache of recent lookups. */
  65590. + if (cbk_cache_search(handle) == 0)
  65591. + return handle->result;
  65592. + else
  65593. + return traverse_tree(handle);
  65594. +}
  65595. +
  65596. +/* Execute actor for each item (or unit, depending on @through_units_p),
  65597. + starting from @coord, right-ward, until either:
  65598. +
  65599. + - end of the tree is reached
  65600. + - unformatted node is met
  65601. + - error occurred
  65602. + - @actor returns 0 or less
  65603. +
  65604. + Error code, or last actor return value is returned.
  65605. +
  65606. + This is used by plugin/dir/hashe_dir.c:reiser4_find_entry() to move through
  65607. + sequence of entries with identical keys and alikes.
  65608. +*/
  65609. +int reiser4_iterate_tree(reiser4_tree * tree /* tree to scan */ ,
  65610. + coord_t *coord /* coord to start from */ ,
  65611. + lock_handle * lh /* lock handle to start with and to
  65612. + * update along the way */ ,
  65613. + tree_iterate_actor_t actor /* function to call on each
  65614. + * item/unit */ ,
  65615. + void *arg /* argument to pass to @actor */ ,
  65616. + znode_lock_mode mode /* lock mode on scanned nodes */ ,
  65617. + int through_units_p /* call @actor on each item or on
  65618. + * each unit */ )
  65619. +{
  65620. + int result;
  65621. +
  65622. + assert("nikita-1143", tree != NULL);
  65623. + assert("nikita-1145", coord != NULL);
  65624. + assert("nikita-1146", lh != NULL);
  65625. + assert("nikita-1147", actor != NULL);
  65626. +
  65627. + result = zload(coord->node);
  65628. + coord_clear_iplug(coord);
  65629. + if (result != 0)
  65630. + return result;
  65631. + if (!coord_is_existing_unit(coord)) {
  65632. + zrelse(coord->node);
  65633. + return -ENOENT;
  65634. + }
  65635. + while ((result = actor(tree, coord, lh, arg)) > 0) {
  65636. + /* move further */
  65637. + if ((through_units_p && coord_next_unit(coord)) ||
  65638. + (!through_units_p && coord_next_item(coord))) {
  65639. + do {
  65640. + lock_handle couple;
  65641. +
  65642. + /* move to the next node */
  65643. + init_lh(&couple);
  65644. + result =
  65645. + reiser4_get_right_neighbor(&couple,
  65646. + coord->node,
  65647. + (int)mode,
  65648. + GN_CAN_USE_UPPER_LEVELS);
  65649. + zrelse(coord->node);
  65650. + if (result == 0) {
  65651. +
  65652. + result = zload(couple.node);
  65653. + if (result != 0) {
  65654. + done_lh(&couple);
  65655. + return result;
  65656. + }
  65657. +
  65658. + coord_init_first_unit(coord,
  65659. + couple.node);
  65660. + done_lh(lh);
  65661. + move_lh(lh, &couple);
  65662. + } else
  65663. + return result;
  65664. + } while (node_is_empty(coord->node));
  65665. + }
  65666. +
  65667. + assert("nikita-1149", coord_is_existing_unit(coord));
  65668. + }
  65669. + zrelse(coord->node);
  65670. + return result;
  65671. +}
  65672. +
  65673. +/* return locked uber znode for @tree */
  65674. +int get_uber_znode(reiser4_tree * tree, znode_lock_mode mode,
  65675. + znode_lock_request pri, lock_handle * lh)
  65676. +{
  65677. + int result;
  65678. +
  65679. + result = longterm_lock_znode(lh, tree->uber, mode, pri);
  65680. + return result;
  65681. +}
  65682. +
  65683. +/* true if @key is strictly within @node
  65684. +
  65685. + we are looking for possibly non-unique key and it is item is at the edge of
  65686. + @node. May be it is in the neighbor.
  65687. +*/
  65688. +static int znode_contains_key_strict(znode * node /* node to check key
  65689. + * against */ ,
  65690. + const reiser4_key *
  65691. + key /* key to check */ ,
  65692. + int isunique)
  65693. +{
  65694. + int answer;
  65695. +
  65696. + assert("nikita-1760", node != NULL);
  65697. + assert("nikita-1722", key != NULL);
  65698. +
  65699. + if (keyge(key, &node->rd_key))
  65700. + return 0;
  65701. +
  65702. + answer = keycmp(&node->ld_key, key);
  65703. +
  65704. + if (isunique)
  65705. + return answer != GREATER_THAN;
  65706. + else
  65707. + return answer == LESS_THAN;
  65708. +}
  65709. +
  65710. +/*
  65711. + * Virtual Root (vroot) code.
  65712. + *
  65713. + * For given file system object (e.g., regular file or directory) let's
  65714. + * define its "virtual root" as lowest in the tree (that is, furtherest
  65715. + * from the tree root) node such that all body items of said object are
  65716. + * located in a tree rooted at this node.
  65717. + *
  65718. + * Once vroot of object is found all tree lookups for items within body of
  65719. + * this object ("object lookups") can be started from its vroot rather
  65720. + * than from real root. This has following advantages:
  65721. + *
  65722. + * 1. amount of nodes traversed during lookup (and, hence, amount of
  65723. + * key comparisons made) decreases, and
  65724. + *
  65725. + * 2. contention on tree root is decreased. This latter was actually
  65726. + * motivating reason behind vroot, because spin lock of root node,
  65727. + * which is taken when acquiring long-term lock on root node is the
  65728. + * hottest lock in the reiser4.
  65729. + *
  65730. + * How to find vroot.
  65731. + *
  65732. + * When vroot of object F is not yet determined, all object lookups start
  65733. + * from the root of the tree. At each tree level during traversal we have
  65734. + * a node N such that a key we are looking for (which is the key inside
  65735. + * object's body) is located within N. In function handle_vroot() called
  65736. + * from cbk_level_lookup() we check whether N is possible vroot for
  65737. + * F. Check is trivial---if neither leftmost nor rightmost item of N
  65738. + * belongs to F (and we already have helpful ->owns_item() method of
  65739. + * object plugin for this), then N is possible vroot of F. This, of
  65740. + * course, relies on the assumption that each object occupies contiguous
  65741. + * range of keys in the tree.
  65742. + *
  65743. + * Thus, traversing tree downward and checking each node as we go, we can
  65744. + * find lowest such node, which, by definition, is vroot.
  65745. + *
  65746. + * How to track vroot.
  65747. + *
  65748. + * Nohow. If actual vroot changes, next object lookup will just restart
  65749. + * from the actual tree root, refreshing object's vroot along the way.
  65750. + *
  65751. + */
  65752. +
  65753. +/*
  65754. + * Check whether @node is possible vroot of @object.
  65755. + */
  65756. +static void handle_vroot(struct inode *object, znode * node)
  65757. +{
  65758. + file_plugin *fplug;
  65759. + coord_t coord;
  65760. +
  65761. + fplug = inode_file_plugin(object);
  65762. + assert("nikita-3353", fplug != NULL);
  65763. + assert("nikita-3354", fplug->owns_item != NULL);
  65764. +
  65765. + if (unlikely(node_is_empty(node)))
  65766. + return;
  65767. +
  65768. + coord_init_first_unit(&coord, node);
  65769. + /*
  65770. + * if leftmost item of @node belongs to @object, we cannot be sure
  65771. + * that @node is vroot of @object, because, some items of @object are
  65772. + * probably in the sub-tree rooted at the left neighbor of @node.
  65773. + */
  65774. + if (fplug->owns_item(object, &coord))
  65775. + return;
  65776. + coord_init_last_unit(&coord, node);
  65777. + /* mutatis mutandis for the rightmost item */
  65778. + if (fplug->owns_item(object, &coord))
  65779. + return;
  65780. + /* otherwise, @node is possible vroot of @object */
  65781. + inode_set_vroot(object, node);
  65782. +}
  65783. +
  65784. +/*
  65785. + * helper function used by traverse tree to start tree traversal not from the
  65786. + * tree root, but from @h->object's vroot, if possible.
  65787. + */
  65788. +static int prepare_object_lookup(cbk_handle * h)
  65789. +{
  65790. + znode *vroot;
  65791. + int result;
  65792. +
  65793. + vroot = inode_get_vroot(h->object);
  65794. + if (vroot == NULL) {
  65795. + /*
  65796. + * object doesn't have known vroot, start from real tree root.
  65797. + */
  65798. + return LOOKUP_CONT;
  65799. + }
  65800. +
  65801. + h->level = znode_get_level(vroot);
  65802. + /* take a long-term lock on vroot */
  65803. + h->result = longterm_lock_znode(h->active_lh, vroot,
  65804. + cbk_lock_mode(h->level, h),
  65805. + ZNODE_LOCK_LOPRI);
  65806. + result = LOOKUP_REST;
  65807. + if (h->result == 0) {
  65808. + int isunique;
  65809. + int inside;
  65810. +
  65811. + isunique = h->flags & CBK_UNIQUE;
  65812. + /* check that key is inside vroot */
  65813. + read_lock_dk(h->tree);
  65814. + inside = (znode_contains_key_strict(vroot, h->key, isunique) &&
  65815. + !ZF_ISSET(vroot, JNODE_HEARD_BANSHEE));
  65816. + read_unlock_dk(h->tree);
  65817. + if (inside) {
  65818. + h->result = zload(vroot);
  65819. + if (h->result == 0) {
  65820. + /* search for key in vroot. */
  65821. + result = cbk_node_lookup(h);
  65822. + zrelse(vroot); /*h->active_lh->node); */
  65823. + if (h->active_lh->node != vroot) {
  65824. + result = LOOKUP_REST;
  65825. + } else if (result == LOOKUP_CONT) {
  65826. + move_lh(h->parent_lh, h->active_lh);
  65827. + h->flags &= ~CBK_DKSET;
  65828. + }
  65829. + }
  65830. + }
  65831. + }
  65832. +
  65833. + zput(vroot);
  65834. +
  65835. + if (IS_CBKERR(h->result) || result == LOOKUP_REST)
  65836. + hput(h);
  65837. + return result;
  65838. +}
  65839. +
  65840. +/* main function that handles common parts of tree traversal: starting
  65841. + (fake znode handling), restarts, error handling, completion */
  65842. +static lookup_result traverse_tree(cbk_handle * h/* search handle */)
  65843. +{
  65844. + int done;
  65845. + int iterations;
  65846. + int vroot_used;
  65847. +
  65848. + assert("nikita-365", h != NULL);
  65849. + assert("nikita-366", h->tree != NULL);
  65850. + assert("nikita-367", h->key != NULL);
  65851. + assert("nikita-368", h->coord != NULL);
  65852. + assert("nikita-369", (h->bias == FIND_EXACT)
  65853. + || (h->bias == FIND_MAX_NOT_MORE_THAN));
  65854. + assert("nikita-370", h->stop_level >= LEAF_LEVEL);
  65855. + assert("nikita-2949", !(h->flags & CBK_DKSET));
  65856. + assert("zam-355", lock_stack_isclean(get_current_lock_stack()));
  65857. +
  65858. + done = 0;
  65859. + iterations = 0;
  65860. + vroot_used = 0;
  65861. +
  65862. + /* loop for restarts */
  65863. +restart:
  65864. +
  65865. + assert("nikita-3024", reiser4_schedulable());
  65866. +
  65867. + h->result = CBK_COORD_FOUND;
  65868. + /* connect_znode() needs it */
  65869. + h->ld_key = *reiser4_min_key();
  65870. + h->rd_key = *reiser4_max_key();
  65871. + h->flags |= CBK_DKSET;
  65872. + h->error = NULL;
  65873. +
  65874. + if (!vroot_used && h->object != NULL) {
  65875. + vroot_used = 1;
  65876. + done = prepare_object_lookup(h);
  65877. + if (done == LOOKUP_REST)
  65878. + goto restart;
  65879. + else if (done == LOOKUP_DONE)
  65880. + return h->result;
  65881. + }
  65882. + if (h->parent_lh->node == NULL) {
  65883. + done =
  65884. + get_uber_znode(h->tree, ZNODE_READ_LOCK, ZNODE_LOCK_LOPRI,
  65885. + h->parent_lh);
  65886. +
  65887. + assert("nikita-1637", done != -E_DEADLOCK);
  65888. +
  65889. + h->block = h->tree->root_block;
  65890. + h->level = h->tree->height;
  65891. + h->coord->node = h->parent_lh->node;
  65892. +
  65893. + if (done != 0)
  65894. + return done;
  65895. + }
  65896. +
  65897. + /* loop descending a tree */
  65898. + while (!done) {
  65899. +
  65900. + if (unlikely((iterations > REISER4_CBK_ITERATIONS_LIMIT) &&
  65901. + IS_POW(iterations))) {
  65902. + warning("nikita-1481", "Too many iterations: %i",
  65903. + iterations);
  65904. + reiser4_print_key("key", h->key);
  65905. + ++iterations;
  65906. + } else if (unlikely(iterations > REISER4_MAX_CBK_ITERATIONS)) {
  65907. + h->error =
  65908. + "reiser-2018: Too many iterations. Tree corrupted, or (less likely) starvation occurring.";
  65909. + h->result = RETERR(-EIO);
  65910. + break;
  65911. + }
  65912. + switch (cbk_level_lookup(h)) {
  65913. + case LOOKUP_CONT:
  65914. + move_lh(h->parent_lh, h->active_lh);
  65915. + continue;
  65916. + default:
  65917. + wrong_return_value("nikita-372", "cbk_level");
  65918. + case LOOKUP_DONE:
  65919. + done = 1;
  65920. + break;
  65921. + case LOOKUP_REST:
  65922. + hput(h);
  65923. + /* deadlock avoidance is normal case. */
  65924. + if (h->result != -E_DEADLOCK)
  65925. + ++iterations;
  65926. + reiser4_preempt_point();
  65927. + goto restart;
  65928. + }
  65929. + }
  65930. + /* that's all. The rest is error handling */
  65931. + if (unlikely(h->error != NULL)) {
  65932. + warning("nikita-373", "%s: level: %i, "
  65933. + "lock_level: %i, stop_level: %i "
  65934. + "lock_mode: %s, bias: %s",
  65935. + h->error, h->level, h->lock_level, h->stop_level,
  65936. + lock_mode_name(h->lock_mode), bias_name(h->bias));
  65937. + reiser4_print_address("block", &h->block);
  65938. + reiser4_print_key("key", h->key);
  65939. + print_coord_content("coord", h->coord);
  65940. + }
  65941. + /* `unlikely' error case */
  65942. + if (unlikely(IS_CBKERR(h->result))) {
  65943. + /* failure. do cleanup */
  65944. + hput(h);
  65945. + } else {
  65946. + assert("nikita-1605", WITH_DATA_RET
  65947. + (h->coord->node, 1,
  65948. + ergo((h->result == CBK_COORD_FOUND) &&
  65949. + (h->bias == FIND_EXACT) &&
  65950. + (!node_is_empty(h->coord->node)),
  65951. + coord_is_existing_item(h->coord))));
  65952. + }
  65953. + return h->result;
  65954. +}
  65955. +
  65956. +/* find delimiting keys of child
  65957. +
  65958. + Determine left and right delimiting keys for child pointed to by
  65959. + @parent_coord.
  65960. +
  65961. +*/
  65962. +static void find_child_delimiting_keys(znode * parent /* parent znode, passed
  65963. + * locked */ ,
  65964. + const coord_t *parent_coord
  65965. + /* coord where pointer
  65966. + * to child is stored
  65967. + */ ,
  65968. + reiser4_key * ld /* where to store left
  65969. + * delimiting key */ ,
  65970. + reiser4_key * rd /* where to store right
  65971. + * delimiting key */ )
  65972. +{
  65973. + coord_t neighbor;
  65974. +
  65975. + assert("nikita-1484", parent != NULL);
  65976. + assert_rw_locked(&(znode_get_tree(parent)->dk_lock));
  65977. +
  65978. + coord_dup(&neighbor, parent_coord);
  65979. +
  65980. + if (neighbor.between == AT_UNIT)
  65981. + /* imitate item ->lookup() behavior. */
  65982. + neighbor.between = AFTER_UNIT;
  65983. +
  65984. + if (coord_set_to_left(&neighbor) == 0)
  65985. + unit_key_by_coord(&neighbor, ld);
  65986. + else {
  65987. + assert("nikita-14851", 0);
  65988. + *ld = *znode_get_ld_key(parent);
  65989. + }
  65990. +
  65991. + coord_dup(&neighbor, parent_coord);
  65992. + if (neighbor.between == AT_UNIT)
  65993. + neighbor.between = AFTER_UNIT;
  65994. + if (coord_set_to_right(&neighbor) == 0)
  65995. + unit_key_by_coord(&neighbor, rd);
  65996. + else
  65997. + *rd = *znode_get_rd_key(parent);
  65998. +}
  65999. +
  66000. +/*
  66001. + * setup delimiting keys for a child
  66002. + *
  66003. + * @parent parent node
  66004. + *
  66005. + * @coord location in @parent where pointer to @child is
  66006. + *
  66007. + * @child child node
  66008. + */
  66009. +int
  66010. +set_child_delimiting_keys(znode * parent, const coord_t *coord, znode * child)
  66011. +{
  66012. + reiser4_tree *tree;
  66013. +
  66014. + assert("nikita-2952",
  66015. + znode_get_level(parent) == znode_get_level(coord->node));
  66016. +
  66017. + /* fast check without taking dk lock. This is safe, because
  66018. + * JNODE_DKSET is never cleared once set. */
  66019. + if (!ZF_ISSET(child, JNODE_DKSET)) {
  66020. + tree = znode_get_tree(parent);
  66021. + write_lock_dk(tree);
  66022. + if (likely(!ZF_ISSET(child, JNODE_DKSET))) {
  66023. + find_child_delimiting_keys(parent, coord,
  66024. + &child->ld_key,
  66025. + &child->rd_key);
  66026. + ON_DEBUG(child->ld_key_version =
  66027. + atomic_inc_return(&delim_key_version);
  66028. + child->rd_key_version =
  66029. + atomic_inc_return(&delim_key_version););
  66030. + ZF_SET(child, JNODE_DKSET);
  66031. + }
  66032. + write_unlock_dk(tree);
  66033. + return 1;
  66034. + }
  66035. + return 0;
  66036. +}
  66037. +
  66038. +/* Perform tree lookup at one level. This is called from cbk_traverse()
  66039. + function that drives lookup through tree and calls cbk_node_lookup() to
  66040. + perform lookup within one node.
  66041. +
  66042. + See comments in a code.
  66043. +*/
  66044. +static level_lookup_result cbk_level_lookup(cbk_handle * h/* search handle */)
  66045. +{
  66046. + int ret;
  66047. + int setdk;
  66048. + int ldkeyset = 0;
  66049. + reiser4_key ldkey;
  66050. + reiser4_key key;
  66051. + znode *active;
  66052. +
  66053. + assert("nikita-3025", reiser4_schedulable());
  66054. +
  66055. + /* acquire reference to @active node */
  66056. + active =
  66057. + zget(h->tree, &h->block, h->parent_lh->node, h->level,
  66058. + reiser4_ctx_gfp_mask_get());
  66059. +
  66060. + if (IS_ERR(active)) {
  66061. + h->result = PTR_ERR(active);
  66062. + return LOOKUP_DONE;
  66063. + }
  66064. +
  66065. + /* lock @active */
  66066. + h->result = longterm_lock_znode(h->active_lh,
  66067. + active,
  66068. + cbk_lock_mode(h->level, h),
  66069. + ZNODE_LOCK_LOPRI);
  66070. + /* longterm_lock_znode() acquires additional reference to znode (which
  66071. + will be later released by longterm_unlock_znode()). Release
  66072. + reference acquired by zget().
  66073. + */
  66074. + zput(active);
  66075. + if (unlikely(h->result != 0))
  66076. + goto fail_or_restart;
  66077. +
  66078. + setdk = 0;
  66079. + /* if @active is accessed for the first time, setup delimiting keys on
  66080. + it. Delimiting keys are taken from the parent node. See
  66081. + setup_delimiting_keys() for details.
  66082. + */
  66083. + if (h->flags & CBK_DKSET) {
  66084. + setdk = setup_delimiting_keys(h);
  66085. + h->flags &= ~CBK_DKSET;
  66086. + } else {
  66087. + znode *parent;
  66088. +
  66089. + parent = h->parent_lh->node;
  66090. + h->result = zload(parent);
  66091. + if (unlikely(h->result != 0))
  66092. + goto fail_or_restart;
  66093. +
  66094. + if (!ZF_ISSET(active, JNODE_DKSET))
  66095. + setdk = set_child_delimiting_keys(parent,
  66096. + h->coord, active);
  66097. + else {
  66098. + read_lock_dk(h->tree);
  66099. + find_child_delimiting_keys(parent, h->coord, &ldkey,
  66100. + &key);
  66101. + read_unlock_dk(h->tree);
  66102. + ldkeyset = 1;
  66103. + }
  66104. + zrelse(parent);
  66105. + }
  66106. +
  66107. + /* this is ugly kludge. Reminder: this is necessary, because
  66108. + ->lookup() method returns coord with ->between field probably set
  66109. + to something different from AT_UNIT.
  66110. + */
  66111. + h->coord->between = AT_UNIT;
  66112. +
  66113. + if (znode_just_created(active) && (h->coord->node != NULL)) {
  66114. + write_lock_tree(h->tree);
  66115. + /* if we are going to load znode right now, setup
  66116. + ->in_parent: coord where pointer to this node is stored in
  66117. + parent.
  66118. + */
  66119. + coord_to_parent_coord(h->coord, &active->in_parent);
  66120. + write_unlock_tree(h->tree);
  66121. + }
  66122. +
  66123. + /* check connectedness without holding tree lock---false negatives
  66124. + * will be re-checked by connect_znode(), and false positives are
  66125. + * impossible---@active cannot suddenly turn into unconnected
  66126. + * state. */
  66127. + if (!znode_is_connected(active)) {
  66128. + h->result = connect_znode(h->coord, active);
  66129. + if (unlikely(h->result != 0)) {
  66130. + put_parent(h);
  66131. + goto fail_or_restart;
  66132. + }
  66133. + }
  66134. +
  66135. + jload_prefetch(ZJNODE(active));
  66136. +
  66137. + if (setdk)
  66138. + update_stale_dk(h->tree, active);
  66139. +
  66140. + /* put_parent() cannot be called earlier, because connect_znode()
  66141. + assumes parent node is referenced; */
  66142. + put_parent(h);
  66143. +
  66144. + if ((!znode_contains_key_lock(active, h->key) &&
  66145. + (h->flags & CBK_TRUST_DK))
  66146. + || ZF_ISSET(active, JNODE_HEARD_BANSHEE)) {
  66147. + /* 1. key was moved out of this node while this thread was
  66148. + waiting for the lock. Restart. More elaborate solution is
  66149. + to determine where key moved (to the left, or to the right)
  66150. + and try to follow it through sibling pointers.
  66151. +
  66152. + 2. or, node itself is going to be removed from the
  66153. + tree. Release lock and restart.
  66154. + */
  66155. + h->result = -E_REPEAT;
  66156. + }
  66157. + if (h->result == -E_REPEAT)
  66158. + return LOOKUP_REST;
  66159. +
  66160. + h->result = zload_ra(active, h->ra_info);
  66161. + if (h->result)
  66162. + return LOOKUP_DONE;
  66163. +
  66164. + /* sanity checks */
  66165. + if (sanity_check(h)) {
  66166. + zrelse(active);
  66167. + return LOOKUP_DONE;
  66168. + }
  66169. +
  66170. + /* check that key of leftmost item in the @active is the same as in
  66171. + * its parent */
  66172. + if (ldkeyset && !node_is_empty(active) &&
  66173. + !keyeq(leftmost_key_in_node(active, &key), &ldkey)) {
  66174. + warning("vs-3533", "Keys are inconsistent. Fsck?");
  66175. + reiser4_print_key("inparent", &ldkey);
  66176. + reiser4_print_key("inchild", &key);
  66177. + h->result = RETERR(-EIO);
  66178. + zrelse(active);
  66179. + return LOOKUP_DONE;
  66180. + }
  66181. +
  66182. + if (h->object != NULL)
  66183. + handle_vroot(h->object, active);
  66184. +
  66185. + ret = cbk_node_lookup(h);
  66186. +
  66187. + /* h->active_lh->node might change, but active is yet to be zrelsed */
  66188. + zrelse(active);
  66189. +
  66190. + return ret;
  66191. +
  66192. +fail_or_restart:
  66193. + if (h->result == -E_DEADLOCK)
  66194. + return LOOKUP_REST;
  66195. + return LOOKUP_DONE;
  66196. +}
  66197. +
  66198. +#if REISER4_DEBUG
  66199. +/* check left and right delimiting keys of a znode */
  66200. +void check_dkeys(znode * node)
  66201. +{
  66202. + znode *left;
  66203. + znode *right;
  66204. +
  66205. + read_lock_tree(current_tree);
  66206. + read_lock_dk(current_tree);
  66207. +
  66208. + assert("vs-1710", znode_is_any_locked(node));
  66209. + assert("vs-1197",
  66210. + !keygt(znode_get_ld_key(node), znode_get_rd_key(node)));
  66211. +
  66212. + left = node->left;
  66213. + right = node->right;
  66214. +
  66215. + if (ZF_ISSET(node, JNODE_LEFT_CONNECTED) && ZF_ISSET(node, JNODE_DKSET)
  66216. + && left != NULL && ZF_ISSET(left, JNODE_DKSET))
  66217. + /* check left neighbor. Note that left neighbor is not locked,
  66218. + so it might get wrong delimiting keys therefore */
  66219. + assert("vs-1198",
  66220. + (keyeq(znode_get_rd_key(left), znode_get_ld_key(node))
  66221. + || ZF_ISSET(left, JNODE_HEARD_BANSHEE)));
  66222. +
  66223. + if (ZF_ISSET(node, JNODE_RIGHT_CONNECTED) && ZF_ISSET(node, JNODE_DKSET)
  66224. + && right != NULL && ZF_ISSET(right, JNODE_DKSET))
  66225. + /* check right neighbor. Note that right neighbor is not
  66226. + locked, so it might get wrong delimiting keys therefore */
  66227. + assert("vs-1199",
  66228. + (keyeq(znode_get_rd_key(node), znode_get_ld_key(right))
  66229. + || ZF_ISSET(right, JNODE_HEARD_BANSHEE)));
  66230. +
  66231. + read_unlock_dk(current_tree);
  66232. + read_unlock_tree(current_tree);
  66233. +}
  66234. +#endif
  66235. +
  66236. +/* true if @key is left delimiting key of @node */
  66237. +static int key_is_ld(znode * node, const reiser4_key * key)
  66238. +{
  66239. + int ld;
  66240. +
  66241. + assert("nikita-1716", node != NULL);
  66242. + assert("nikita-1758", key != NULL);
  66243. +
  66244. + read_lock_dk(znode_get_tree(node));
  66245. + assert("nikita-1759", znode_contains_key(node, key));
  66246. + ld = keyeq(znode_get_ld_key(node), key);
  66247. + read_unlock_dk(znode_get_tree(node));
  66248. + return ld;
  66249. +}
  66250. +
  66251. +/* Process one node during tree traversal.
  66252. +
  66253. + This is called by cbk_level_lookup(). */
  66254. +static level_lookup_result cbk_node_lookup(cbk_handle * h/* search handle */)
  66255. +{
  66256. + /* node plugin of @active */
  66257. + node_plugin *nplug;
  66258. + /* item plugin of item that was found */
  66259. + item_plugin *iplug;
  66260. + /* search bias */
  66261. + lookup_bias node_bias;
  66262. + /* node we are operating upon */
  66263. + znode *active;
  66264. + /* tree we are searching in */
  66265. + reiser4_tree *tree;
  66266. + /* result */
  66267. + int result;
  66268. +
  66269. + assert("nikita-379", h != NULL);
  66270. +
  66271. + active = h->active_lh->node;
  66272. + tree = h->tree;
  66273. +
  66274. + nplug = active->nplug;
  66275. + assert("nikita-380", nplug != NULL);
  66276. +
  66277. + ON_DEBUG(check_dkeys(active));
  66278. +
  66279. + /* return item from "active" node with maximal key not greater than
  66280. + "key" */
  66281. + node_bias = h->bias;
  66282. + result = nplug->lookup(active, h->key, node_bias, h->coord);
  66283. + if (unlikely(result != NS_FOUND && result != NS_NOT_FOUND)) {
  66284. + /* error occurred */
  66285. + h->result = result;
  66286. + return LOOKUP_DONE;
  66287. + }
  66288. + if (h->level == h->stop_level) {
  66289. + /* welcome to the stop level */
  66290. + assert("nikita-381", h->coord->node == active);
  66291. + if (result == NS_FOUND) {
  66292. + /* success of tree lookup */
  66293. + if (!(h->flags & CBK_UNIQUE)
  66294. + && key_is_ld(active, h->key))
  66295. + return search_to_left(h);
  66296. + else
  66297. + h->result = CBK_COORD_FOUND;
  66298. + } else {
  66299. + h->result = CBK_COORD_NOTFOUND;
  66300. + }
  66301. + if (!(h->flags & CBK_IN_CACHE))
  66302. + cbk_cache_add(active);
  66303. + return LOOKUP_DONE;
  66304. + }
  66305. +
  66306. + if (h->level > TWIG_LEVEL && result == NS_NOT_FOUND) {
  66307. + h->error = "not found on internal node";
  66308. + h->result = result;
  66309. + return LOOKUP_DONE;
  66310. + }
  66311. +
  66312. + assert("vs-361", h->level > h->stop_level);
  66313. +
  66314. + if (handle_eottl(h, &result)) {
  66315. + assert("vs-1674", (result == LOOKUP_DONE ||
  66316. + result == LOOKUP_REST));
  66317. + return result;
  66318. + }
  66319. +
  66320. + /* go down to next level */
  66321. + check_me("vs-12", zload(h->coord->node) == 0);
  66322. + assert("nikita-2116", item_is_internal(h->coord));
  66323. + iplug = item_plugin_by_coord(h->coord);
  66324. + iplug->s.internal.down_link(h->coord, h->key, &h->block);
  66325. + zrelse(h->coord->node);
  66326. + --h->level;
  66327. + return LOOKUP_CONT; /* continue */
  66328. +}
  66329. +
  66330. +/* scan cbk_cache slots looking for a match for @h */
  66331. +static int cbk_cache_scan_slots(cbk_handle * h/* cbk handle */)
  66332. +{
  66333. + level_lookup_result llr;
  66334. + znode *node;
  66335. + reiser4_tree *tree;
  66336. + cbk_cache_slot *slot;
  66337. + cbk_cache *cache;
  66338. + tree_level level;
  66339. + int isunique;
  66340. + const reiser4_key *key;
  66341. + int result;
  66342. +
  66343. + assert("nikita-1317", h != NULL);
  66344. + assert("nikita-1315", h->tree != NULL);
  66345. + assert("nikita-1316", h->key != NULL);
  66346. +
  66347. + tree = h->tree;
  66348. + cache = &tree->cbk_cache;
  66349. + if (cache->nr_slots == 0)
  66350. + /* size of cbk cache was set to 0 by mount time option. */
  66351. + return RETERR(-ENOENT);
  66352. +
  66353. + assert("nikita-2474", cbk_cache_invariant(cache));
  66354. + node = NULL; /* to keep gcc happy */
  66355. + level = h->level;
  66356. + key = h->key;
  66357. + isunique = h->flags & CBK_UNIQUE;
  66358. + result = RETERR(-ENOENT);
  66359. +
  66360. + /*
  66361. + * this is time-critical function and dragons had, hence, been settled
  66362. + * here.
  66363. + *
  66364. + * Loop below scans cbk cache slots trying to find matching node with
  66365. + * suitable range of delimiting keys and located at the h->level.
  66366. + *
  66367. + * Scan is done under cbk cache spin lock that protects slot->node
  66368. + * pointers. If suitable node is found we want to pin it in
  66369. + * memory. But slot->node can point to the node with x_count 0
  66370. + * (unreferenced). Such node can be recycled at any moment, or can
  66371. + * already be in the process of being recycled (within jput()).
  66372. + *
  66373. + * As we found node in the cbk cache, it means that jput() hasn't yet
  66374. + * called cbk_cache_invalidate().
  66375. + *
  66376. + * We acquire reference to the node without holding tree lock, and
  66377. + * later, check node's RIP bit. This avoids races with jput().
  66378. + */
  66379. +
  66380. + rcu_read_lock();
  66381. + read_lock(&((cbk_cache *)cache)->guard);
  66382. +
  66383. + slot = list_entry(cache->lru.next, cbk_cache_slot, lru);
  66384. + slot = list_entry(slot->lru.prev, cbk_cache_slot, lru);
  66385. + BUG_ON(&slot->lru != &cache->lru);/*????*/
  66386. + while (1) {
  66387. +
  66388. + slot = list_entry(slot->lru.next, cbk_cache_slot, lru);
  66389. +
  66390. + if (&cache->lru != &slot->lru)
  66391. + node = slot->node;
  66392. + else
  66393. + node = NULL;
  66394. +
  66395. + if (unlikely(node == NULL))
  66396. + break;
  66397. +
  66398. + /*
  66399. + * this is (hopefully) the only place in the code where we are
  66400. + * working with delimiting keys without holding dk lock. This
  66401. + * is fine here, because this is only "guess" anyway---keys
  66402. + * are rechecked under dk lock below.
  66403. + */
  66404. + if (znode_get_level(node) == level &&
  66405. + /* reiser4_min_key < key < reiser4_max_key */
  66406. + znode_contains_key_strict(node, key, isunique)) {
  66407. + zref(node);
  66408. + result = 0;
  66409. + spin_lock_prefetch(&tree->tree_lock);
  66410. + break;
  66411. + }
  66412. + }
  66413. + read_unlock(&((cbk_cache *)cache)->guard);
  66414. +
  66415. + assert("nikita-2475", cbk_cache_invariant(cache));
  66416. +
  66417. + if (unlikely(result == 0 && ZF_ISSET(node, JNODE_RIP)))
  66418. + result = -ENOENT;
  66419. +
  66420. + rcu_read_unlock();
  66421. +
  66422. + if (result != 0) {
  66423. + h->result = CBK_COORD_NOTFOUND;
  66424. + return RETERR(-ENOENT);
  66425. + }
  66426. +
  66427. + result =
  66428. + longterm_lock_znode(h->active_lh, node, cbk_lock_mode(level, h),
  66429. + ZNODE_LOCK_LOPRI);
  66430. + zput(node);
  66431. + if (result != 0)
  66432. + return result;
  66433. + result = zload(node);
  66434. + if (result != 0)
  66435. + return result;
  66436. +
  66437. + /* recheck keys */
  66438. + read_lock_dk(tree);
  66439. + result = (znode_contains_key_strict(node, key, isunique) &&
  66440. + !ZF_ISSET(node, JNODE_HEARD_BANSHEE));
  66441. + read_unlock_dk(tree);
  66442. + if (result) {
  66443. + /* do lookup inside node */
  66444. + llr = cbk_node_lookup(h);
  66445. + /* if cbk_node_lookup() wandered to another node (due to eottl
  66446. + or non-unique keys), adjust @node */
  66447. + /*node = h->active_lh->node; */
  66448. +
  66449. + if (llr != LOOKUP_DONE) {
  66450. + /* restart or continue on the next level */
  66451. + result = RETERR(-ENOENT);
  66452. + } else if (IS_CBKERR(h->result))
  66453. + /* io or oom */
  66454. + result = RETERR(-ENOENT);
  66455. + else {
  66456. + /* good. Either item found or definitely not found. */
  66457. + result = 0;
  66458. +
  66459. + write_lock(&(cache->guard));
  66460. + if (slot->node == h->active_lh->node) {
  66461. + /* if this node is still in cbk cache---move
  66462. + its slot to the head of the LRU list. */
  66463. + list_move(&slot->lru, &cache->lru);
  66464. + }
  66465. + write_unlock(&(cache->guard));
  66466. + }
  66467. + } else {
  66468. + /* race. While this thread was waiting for the lock, node was
  66469. + rebalanced and item we are looking for, shifted out of it
  66470. + (if it ever was here).
  66471. +
  66472. + Continuing scanning is almost hopeless: node key range was
  66473. + moved to, is almost certainly at the beginning of the LRU
  66474. + list at this time, because it's hot, but restarting
  66475. + scanning from the very beginning is complex. Just return,
  66476. + so that cbk() will be performed. This is not that
  66477. + important, because such races should be rare. Are they?
  66478. + */
  66479. + result = RETERR(-ENOENT); /* -ERAUGHT */
  66480. + }
  66481. + zrelse(node);
  66482. + assert("nikita-2476", cbk_cache_invariant(cache));
  66483. + return result;
  66484. +}
  66485. +
  66486. +/* look for item with given key in the coord cache
  66487. +
  66488. + This function, called by coord_by_key(), scans "coord cache" (&cbk_cache)
  66489. + which is a small LRU list of znodes accessed lately. For each znode in
  66490. + znode in this list, it checks whether key we are looking for fits into key
  66491. + range covered by this node. If so, and in addition, node lies at allowed
  66492. + level (this is to handle extents on a twig level), node is locked, and
  66493. + lookup inside it is performed.
  66494. +
  66495. + we need a measurement of the cost of this cache search compared to the cost
  66496. + of coord_by_key.
  66497. +
  66498. +*/
  66499. +static int cbk_cache_search(cbk_handle * h/* cbk handle */)
  66500. +{
  66501. + int result = 0;
  66502. + tree_level level;
  66503. +
  66504. + /* add CBK_IN_CACHE to the handle flags. This means that
  66505. + * cbk_node_lookup() assumes that cbk_cache is scanned and would add
  66506. + * found node to the cache. */
  66507. + h->flags |= CBK_IN_CACHE;
  66508. + for (level = h->stop_level; level <= h->lock_level; ++level) {
  66509. + h->level = level;
  66510. + result = cbk_cache_scan_slots(h);
  66511. + if (result != 0) {
  66512. + done_lh(h->active_lh);
  66513. + done_lh(h->parent_lh);
  66514. + } else {
  66515. + assert("nikita-1319", !IS_CBKERR(h->result));
  66516. + break;
  66517. + }
  66518. + }
  66519. + h->flags &= ~CBK_IN_CACHE;
  66520. + return result;
  66521. +}
  66522. +
  66523. +/* type of lock we want to obtain during tree traversal. On stop level
  66524. + we want type of lock user asked for, on upper levels: read lock. */
  66525. +znode_lock_mode cbk_lock_mode(tree_level level, cbk_handle * h)
  66526. +{
  66527. + assert("nikita-382", h != NULL);
  66528. +
  66529. + return (level <= h->lock_level) ? h->lock_mode : ZNODE_READ_LOCK;
  66530. +}
  66531. +
  66532. +/* update outdated delimiting keys */
  66533. +static void stale_dk(reiser4_tree * tree, znode * node)
  66534. +{
  66535. + znode *right;
  66536. +
  66537. + read_lock_tree(tree);
  66538. + write_lock_dk(tree);
  66539. + right = node->right;
  66540. +
  66541. + if (ZF_ISSET(node, JNODE_RIGHT_CONNECTED) &&
  66542. + right && ZF_ISSET(right, JNODE_DKSET) &&
  66543. + !keyeq(znode_get_rd_key(node), znode_get_ld_key(right)))
  66544. + znode_set_rd_key(node, znode_get_ld_key(right));
  66545. +
  66546. + write_unlock_dk(tree);
  66547. + read_unlock_tree(tree);
  66548. +}
  66549. +
  66550. +/* check for possibly outdated delimiting keys, and update them if
  66551. + * necessary. */
  66552. +static void update_stale_dk(reiser4_tree * tree, znode * node)
  66553. +{
  66554. + znode *right;
  66555. + reiser4_key rd;
  66556. +
  66557. + read_lock_tree(tree);
  66558. + read_lock_dk(tree);
  66559. + rd = *znode_get_rd_key(node);
  66560. + right = node->right;
  66561. + if (unlikely(ZF_ISSET(node, JNODE_RIGHT_CONNECTED) &&
  66562. + right && ZF_ISSET(right, JNODE_DKSET) &&
  66563. + !keyeq(&rd, znode_get_ld_key(right)))) {
  66564. + assert("nikita-38211", ZF_ISSET(node, JNODE_DKSET));
  66565. + read_unlock_dk(tree);
  66566. + read_unlock_tree(tree);
  66567. + stale_dk(tree, node);
  66568. + return;
  66569. + }
  66570. + read_unlock_dk(tree);
  66571. + read_unlock_tree(tree);
  66572. +}
  66573. +
  66574. +/*
  66575. + * handle searches a the non-unique key.
  66576. + *
  66577. + * Suppose that we are looking for an item with possibly non-unique key 100.
  66578. + *
  66579. + * Root node contains two pointers: one to a node with left delimiting key 0,
  66580. + * and another to a node with left delimiting key 100. Item we interested in
  66581. + * may well happen in the sub-tree rooted at the first pointer.
  66582. + *
  66583. + * To handle this search_to_left() is called when search reaches stop
  66584. + * level. This function checks it is _possible_ that item we are looking for
  66585. + * is in the left neighbor (this can be done by comparing delimiting keys) and
  66586. + * if so, tries to lock left neighbor (this is low priority lock, so it can
  66587. + * deadlock, tree traversal is just restarted if it did) and then checks
  66588. + * whether left neighbor actually contains items with our key.
  66589. + *
  66590. + * Note that this is done on the stop level only. It is possible to try such
  66591. + * left-check on each level, but as duplicate keys are supposed to be rare
  66592. + * (very unlikely that more than one node is completely filled with items with
  66593. + * duplicate keys), it sis cheaper to scan to the left on the stop level once.
  66594. + *
  66595. + */
  66596. +static level_lookup_result search_to_left(cbk_handle * h/* search handle */)
  66597. +{
  66598. + level_lookup_result result;
  66599. + coord_t *coord;
  66600. + znode *node;
  66601. + znode *neighbor;
  66602. +
  66603. + lock_handle lh;
  66604. +
  66605. + assert("nikita-1761", h != NULL);
  66606. + assert("nikita-1762", h->level == h->stop_level);
  66607. +
  66608. + init_lh(&lh);
  66609. + coord = h->coord;
  66610. + node = h->active_lh->node;
  66611. + assert("nikita-1763", coord_is_leftmost_unit(coord));
  66612. +
  66613. + h->result =
  66614. + reiser4_get_left_neighbor(&lh, node, (int)h->lock_mode,
  66615. + GN_CAN_USE_UPPER_LEVELS);
  66616. + neighbor = NULL;
  66617. + switch (h->result) {
  66618. + case -E_DEADLOCK:
  66619. + result = LOOKUP_REST;
  66620. + break;
  66621. + case 0:{
  66622. + node_plugin *nplug;
  66623. + coord_t crd;
  66624. + lookup_bias bias;
  66625. +
  66626. + neighbor = lh.node;
  66627. + h->result = zload(neighbor);
  66628. + if (h->result != 0) {
  66629. + result = LOOKUP_DONE;
  66630. + break;
  66631. + }
  66632. +
  66633. + nplug = neighbor->nplug;
  66634. +
  66635. + coord_init_zero(&crd);
  66636. + bias = h->bias;
  66637. + h->bias = FIND_EXACT;
  66638. + h->result =
  66639. + nplug->lookup(neighbor, h->key, h->bias, &crd);
  66640. + h->bias = bias;
  66641. +
  66642. + if (h->result == NS_NOT_FOUND) {
  66643. + case -E_NO_NEIGHBOR:
  66644. + h->result = CBK_COORD_FOUND;
  66645. + if (!(h->flags & CBK_IN_CACHE))
  66646. + cbk_cache_add(node);
  66647. + default: /* some other error */
  66648. + result = LOOKUP_DONE;
  66649. + } else if (h->result == NS_FOUND) {
  66650. + read_lock_dk(znode_get_tree(neighbor));
  66651. + h->rd_key = *znode_get_ld_key(node);
  66652. + leftmost_key_in_node(neighbor, &h->ld_key);
  66653. + read_unlock_dk(znode_get_tree(neighbor));
  66654. + h->flags |= CBK_DKSET;
  66655. +
  66656. + h->block = *znode_get_block(neighbor);
  66657. + /* clear coord->node so that cbk_level_lookup()
  66658. + wouldn't overwrite parent hint in neighbor.
  66659. +
  66660. + Parent hint was set up by
  66661. + reiser4_get_left_neighbor()
  66662. + */
  66663. + /* FIXME: why do we have to spinlock here? */
  66664. + write_lock_tree(znode_get_tree(neighbor));
  66665. + h->coord->node = NULL;
  66666. + write_unlock_tree(znode_get_tree(neighbor));
  66667. + result = LOOKUP_CONT;
  66668. + } else {
  66669. + result = LOOKUP_DONE;
  66670. + }
  66671. + if (neighbor != NULL)
  66672. + zrelse(neighbor);
  66673. + }
  66674. + }
  66675. + done_lh(&lh);
  66676. + return result;
  66677. +}
  66678. +
  66679. +/* debugging aid: return symbolic name of search bias */
  66680. +static const char *bias_name(lookup_bias bias/* bias to get name of */)
  66681. +{
  66682. + if (bias == FIND_EXACT)
  66683. + return "exact";
  66684. + else if (bias == FIND_MAX_NOT_MORE_THAN)
  66685. + return "left-slant";
  66686. +/* else if( bias == RIGHT_SLANT_BIAS ) */
  66687. +/* return "right-bias"; */
  66688. + else {
  66689. + static char buf[30];
  66690. +
  66691. + sprintf(buf, "unknown: %i", bias);
  66692. + return buf;
  66693. + }
  66694. +}
  66695. +
  66696. +#if REISER4_DEBUG
  66697. +/* debugging aid: print human readable information about @p */
  66698. +void print_coord_content(const char *prefix /* prefix to print */ ,
  66699. + coord_t *p/* coord to print */)
  66700. +{
  66701. + reiser4_key key;
  66702. +
  66703. + if (p == NULL) {
  66704. + printk("%s: null\n", prefix);
  66705. + return;
  66706. + }
  66707. + if ((p->node != NULL) && znode_is_loaded(p->node)
  66708. + && coord_is_existing_item(p))
  66709. + printk("%s: data: %p, length: %i\n", prefix,
  66710. + item_body_by_coord(p), item_length_by_coord(p));
  66711. + if (znode_is_loaded(p->node)) {
  66712. + item_key_by_coord(p, &key);
  66713. + reiser4_print_key(prefix, &key);
  66714. + }
  66715. +}
  66716. +
  66717. +/* debugging aid: print human readable information about @block */
  66718. +void reiser4_print_address(const char *prefix /* prefix to print */ ,
  66719. + const reiser4_block_nr * block/* block number to print */)
  66720. +{
  66721. + printk("%s: %s\n", prefix, sprint_address(block));
  66722. +}
  66723. +#endif
  66724. +
  66725. +/* return string containing human readable representation of @block */
  66726. +char *sprint_address(const reiser4_block_nr *
  66727. + block/* block number to print */)
  66728. +{
  66729. + static char address[30];
  66730. +
  66731. + if (block == NULL)
  66732. + sprintf(address, "null");
  66733. + else if (reiser4_blocknr_is_fake(block))
  66734. + sprintf(address, "%llx", (unsigned long long)(*block));
  66735. + else
  66736. + sprintf(address, "%llu", (unsigned long long)(*block));
  66737. + return address;
  66738. +}
  66739. +
  66740. +/* release parent node during traversal */
  66741. +static void put_parent(cbk_handle * h/* search handle */)
  66742. +{
  66743. + assert("nikita-383", h != NULL);
  66744. + if (h->parent_lh->node != NULL)
  66745. + longterm_unlock_znode(h->parent_lh);
  66746. +}
  66747. +
  66748. +/* helper function used by coord_by_key(): release reference to parent znode
  66749. + stored in handle before processing its child. */
  66750. +static void hput(cbk_handle * h/* search handle */)
  66751. +{
  66752. + assert("nikita-385", h != NULL);
  66753. + done_lh(h->parent_lh);
  66754. + done_lh(h->active_lh);
  66755. +}
  66756. +
  66757. +/* Helper function used by cbk(): update delimiting keys of child node (stored
  66758. + in h->active_lh->node) using key taken from parent on the parent level. */
  66759. +static int setup_delimiting_keys(cbk_handle * h/* search handle */)
  66760. +{
  66761. + znode *active;
  66762. + reiser4_tree *tree;
  66763. +
  66764. + assert("nikita-1088", h != NULL);
  66765. +
  66766. + active = h->active_lh->node;
  66767. +
  66768. + /* fast check without taking dk lock. This is safe, because
  66769. + * JNODE_DKSET is never cleared once set. */
  66770. + if (!ZF_ISSET(active, JNODE_DKSET)) {
  66771. + tree = znode_get_tree(active);
  66772. + write_lock_dk(tree);
  66773. + if (!ZF_ISSET(active, JNODE_DKSET)) {
  66774. + znode_set_ld_key(active, &h->ld_key);
  66775. + znode_set_rd_key(active, &h->rd_key);
  66776. + ZF_SET(active, JNODE_DKSET);
  66777. + }
  66778. + write_unlock_dk(tree);
  66779. + return 1;
  66780. + }
  66781. + return 0;
  66782. +}
  66783. +
  66784. +/* true if @block makes sense for the @tree. Used to detect corrupted node
  66785. + * pointers */
  66786. +static int
  66787. +block_nr_is_correct(reiser4_block_nr * block /* block number to check */ ,
  66788. + reiser4_tree * tree/* tree to check against */)
  66789. +{
  66790. + assert("nikita-757", block != NULL);
  66791. + assert("nikita-758", tree != NULL);
  66792. +
  66793. + /* check to see if it exceeds the size of the device. */
  66794. + return reiser4_blocknr_is_sane_for(tree->super, block);
  66795. +}
  66796. +
  66797. +/* check consistency of fields */
  66798. +static int sanity_check(cbk_handle * h/* search handle */)
  66799. +{
  66800. + assert("nikita-384", h != NULL);
  66801. +
  66802. + if (h->level < h->stop_level) {
  66803. + h->error = "Buried under leaves";
  66804. + h->result = RETERR(-EIO);
  66805. + return LOOKUP_DONE;
  66806. + } else if (!block_nr_is_correct(&h->block, h->tree)) {
  66807. + h->error = "bad block number";
  66808. + h->result = RETERR(-EIO);
  66809. + return LOOKUP_DONE;
  66810. + } else
  66811. + return 0;
  66812. +}
  66813. +
  66814. +/* Make Linus happy.
  66815. + Local variables:
  66816. + c-indentation-style: "K&R"
  66817. + mode-name: "LC"
  66818. + c-basic-offset: 8
  66819. + tab-width: 8
  66820. + fill-column: 120
  66821. + scroll-step: 1
  66822. + End:
  66823. +*/
  66824. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/status_flags.c linux-4.14.2/fs/reiser4/status_flags.c
  66825. --- linux-4.14.2.orig/fs/reiser4/status_flags.c 1970-01-01 01:00:00.000000000 +0100
  66826. +++ linux-4.14.2/fs/reiser4/status_flags.c 2017-11-26 22:14:18.000000000 +0100
  66827. @@ -0,0 +1,180 @@
  66828. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  66829. + * reiser4/README */
  66830. +
  66831. +/* Functions that deal with reiser4 status block, query status and update it,
  66832. + * if needed */
  66833. +
  66834. +#include <linux/bio.h>
  66835. +#include <linux/highmem.h>
  66836. +#include <linux/fs.h>
  66837. +#include <linux/blkdev.h>
  66838. +#include "debug.h"
  66839. +#include "dformat.h"
  66840. +#include "status_flags.h"
  66841. +#include "super.h"
  66842. +
  66843. +/* This is our end I/O handler that marks page uptodate if IO was successful.
  66844. + It also unconditionally unlocks the page, so we can see that io was done.
  66845. + We do not free bio, because we hope to reuse that. */
  66846. +static void reiser4_status_endio(struct bio *bio)
  66847. +{
  66848. + if (!bio->bi_status)
  66849. + SetPageUptodate(bio->bi_io_vec->bv_page);
  66850. + else {
  66851. + ClearPageUptodate(bio->bi_io_vec->bv_page);
  66852. + SetPageError(bio->bi_io_vec->bv_page);
  66853. + }
  66854. + unlock_page(bio->bi_io_vec->bv_page);
  66855. +}
  66856. +
  66857. +/* Initialise status code. This is expected to be called from the disk format
  66858. + code. block paremeter is where status block lives. */
  66859. +int reiser4_status_init(reiser4_block_nr block)
  66860. +{
  66861. + struct super_block *sb = reiser4_get_current_sb();
  66862. + struct reiser4_status *statuspage;
  66863. + struct bio *bio;
  66864. + struct page *page;
  66865. +
  66866. + get_super_private(sb)->status_page = NULL;
  66867. + get_super_private(sb)->status_bio = NULL;
  66868. +
  66869. + page = alloc_pages(reiser4_ctx_gfp_mask_get(), 0);
  66870. + if (!page)
  66871. + return -ENOMEM;
  66872. +
  66873. + bio = bio_alloc(reiser4_ctx_gfp_mask_get(), 1);
  66874. + if (bio != NULL) {
  66875. + bio->bi_iter.bi_sector = block * (sb->s_blocksize >> 9);
  66876. + bio_set_dev(bio, sb->s_bdev);
  66877. + bio->bi_io_vec[0].bv_page = page;
  66878. + bio->bi_io_vec[0].bv_len = sb->s_blocksize;
  66879. + bio->bi_io_vec[0].bv_offset = 0;
  66880. + bio->bi_vcnt = 1;
  66881. + bio->bi_iter.bi_size = sb->s_blocksize;
  66882. + bio->bi_end_io = reiser4_status_endio;
  66883. + } else {
  66884. + __free_pages(page, 0);
  66885. + return -ENOMEM;
  66886. + }
  66887. + lock_page(page);
  66888. + bio_set_op_attrs(bio, READ, 0);
  66889. + submit_bio(bio);
  66890. + wait_on_page_locked(page);
  66891. + if (!PageUptodate(page)) {
  66892. + warning("green-2007",
  66893. + "I/O error while tried to read status page\n");
  66894. + return -EIO;
  66895. + }
  66896. +
  66897. + statuspage = (struct reiser4_status *)kmap_atomic(page);
  66898. + if (memcmp
  66899. + (statuspage->magic, REISER4_STATUS_MAGIC,
  66900. + sizeof(REISER4_STATUS_MAGIC))) {
  66901. + /* Magic does not match. */
  66902. + kunmap_atomic((char *)statuspage);
  66903. + warning("green-2008", "Wrong magic in status block\n");
  66904. + __free_pages(page, 0);
  66905. + bio_put(bio);
  66906. + return -EINVAL;
  66907. + }
  66908. + kunmap_atomic((char *)statuspage);
  66909. +
  66910. + get_super_private(sb)->status_page = page;
  66911. + get_super_private(sb)->status_bio = bio;
  66912. + return 0;
  66913. +}
  66914. +
  66915. +/* Query the status of fs. Returns if the FS can be safely mounted.
  66916. + Also if "status" and "extended" parameters are given, it will fill
  66917. + actual parts of status from disk there. */
  66918. +int reiser4_status_query(u64 *status, u64 *extended)
  66919. +{
  66920. + struct super_block *sb = reiser4_get_current_sb();
  66921. + struct reiser4_status *statuspage;
  66922. + int retval;
  66923. +
  66924. + if (!get_super_private(sb)->status_page)
  66925. + /* No status page? */
  66926. + return REISER4_STATUS_MOUNT_UNKNOWN;
  66927. + statuspage = (struct reiser4_status *)
  66928. + kmap_atomic(get_super_private(sb)->status_page);
  66929. + switch ((long)le64_to_cpu(get_unaligned(&statuspage->status))) {
  66930. + /* FIXME: this cast is a hack for 32 bit arches to work. */
  66931. + case REISER4_STATUS_OK:
  66932. + retval = REISER4_STATUS_MOUNT_OK;
  66933. + break;
  66934. + case REISER4_STATUS_CORRUPTED:
  66935. + retval = REISER4_STATUS_MOUNT_WARN;
  66936. + break;
  66937. + case REISER4_STATUS_DAMAGED:
  66938. + case REISER4_STATUS_DESTROYED:
  66939. + case REISER4_STATUS_IOERROR:
  66940. + retval = REISER4_STATUS_MOUNT_RO;
  66941. + break;
  66942. + default:
  66943. + retval = REISER4_STATUS_MOUNT_UNKNOWN;
  66944. + break;
  66945. + }
  66946. +
  66947. + if (status)
  66948. + *status = le64_to_cpu(get_unaligned(&statuspage->status));
  66949. + if (extended)
  66950. + *extended = le64_to_cpu(get_unaligned(&statuspage->extended_status));
  66951. +
  66952. + kunmap_atomic((char *)statuspage);
  66953. + return retval;
  66954. +}
  66955. +
  66956. +/* This function should be called when something bad happens (e.g. from
  66957. + reiser4_panic). It fills the status structure and tries to push it to disk.*/
  66958. +int reiser4_status_write(__u64 status, __u64 extended_status, char *message)
  66959. +{
  66960. + struct super_block *sb = reiser4_get_current_sb();
  66961. + struct reiser4_status *statuspage;
  66962. + struct bio *bio = get_super_private(sb)->status_bio;
  66963. +
  66964. + if (!get_super_private(sb)->status_page)
  66965. + /* No status page? */
  66966. + return -1;
  66967. + statuspage = (struct reiser4_status *)
  66968. + kmap_atomic(get_super_private(sb)->status_page);
  66969. +
  66970. + put_unaligned(cpu_to_le64(status), &statuspage->status);
  66971. + put_unaligned(cpu_to_le64(extended_status), &statuspage->extended_status);
  66972. + strncpy(statuspage->texterror, message, REISER4_TEXTERROR_LEN);
  66973. +
  66974. + kunmap_atomic((char *)statuspage);
  66975. + bio_reset(bio);
  66976. + bio_set_dev(bio, sb->s_bdev);
  66977. + bio->bi_io_vec[0].bv_page = get_super_private(sb)->status_page;
  66978. + bio->bi_io_vec[0].bv_len = sb->s_blocksize;
  66979. + bio->bi_io_vec[0].bv_offset = 0;
  66980. + bio->bi_vcnt = 1;
  66981. + bio->bi_iter.bi_size = sb->s_blocksize;
  66982. + bio->bi_end_io = reiser4_status_endio;
  66983. + lock_page(get_super_private(sb)->status_page); /* Safe as nobody should
  66984. + * touch our page. */
  66985. + /*
  66986. + * We can block now, but we have no other choice anyway
  66987. + */
  66988. + bio_set_op_attrs(bio, WRITE, 0);
  66989. + submit_bio(bio);
  66990. + /*
  66991. + * We do not wait for IO completon
  66992. + */
  66993. + return 0;
  66994. +}
  66995. +
  66996. +/* Frees the page with status and bio structure. Should be called by disk format
  66997. + * at umount time */
  66998. +int reiser4_status_finish(void)
  66999. +{
  67000. + struct super_block *sb = reiser4_get_current_sb();
  67001. +
  67002. + __free_pages(get_super_private(sb)->status_page, 0);
  67003. + get_super_private(sb)->status_page = NULL;
  67004. + bio_put(get_super_private(sb)->status_bio);
  67005. + get_super_private(sb)->status_bio = NULL;
  67006. + return 0;
  67007. +}
  67008. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/status_flags.h linux-4.14.2/fs/reiser4/status_flags.h
  67009. --- linux-4.14.2.orig/fs/reiser4/status_flags.h 1970-01-01 01:00:00.000000000 +0100
  67010. +++ linux-4.14.2/fs/reiser4/status_flags.h 2017-11-26 22:13:09.000000000 +0100
  67011. @@ -0,0 +1,47 @@
  67012. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  67013. + * reiser4/README */
  67014. +
  67015. +/* Here we declare structures and flags that store reiser4 status on disk.
  67016. + The status that helps us to find out if the filesystem is valid or if it
  67017. + contains some critical, or not so critical errors */
  67018. +
  67019. +#if !defined(__REISER4_STATUS_FLAGS_H__)
  67020. +#define __REISER4_STATUS_FLAGS_H__
  67021. +
  67022. +#include "dformat.h"
  67023. +/* These are major status flags */
  67024. +#define REISER4_STATUS_OK 0
  67025. +#define REISER4_STATUS_CORRUPTED 0x1
  67026. +#define REISER4_STATUS_DAMAGED 0x2
  67027. +#define REISER4_STATUS_DESTROYED 0x4
  67028. +#define REISER4_STATUS_IOERROR 0x8
  67029. +
  67030. +/* Return values for reiser4_status_query() */
  67031. +#define REISER4_STATUS_MOUNT_OK 0
  67032. +#define REISER4_STATUS_MOUNT_WARN 1
  67033. +#define REISER4_STATUS_MOUNT_RO 2
  67034. +#define REISER4_STATUS_MOUNT_UNKNOWN -1
  67035. +
  67036. +#define REISER4_TEXTERROR_LEN 256
  67037. +
  67038. +#define REISER4_STATUS_MAGIC "ReiSeR4StATusBl"
  67039. +/* We probably need to keep its size under sector size which is 512 bytes */
  67040. +struct reiser4_status {
  67041. + char magic[16];
  67042. + d64 status; /* Current FS state */
  67043. + d64 extended_status; /* Any additional info that might have sense in
  67044. + * addition to "status". E.g. last sector where
  67045. + * io error happened if status is
  67046. + * "io error encountered" */
  67047. + d64 stacktrace[10]; /* Last ten functional calls made (addresses) */
  67048. + char texterror[REISER4_TEXTERROR_LEN]; /* Any error message if
  67049. + * appropriate, otherwise filled
  67050. + * with zeroes */
  67051. +};
  67052. +
  67053. +int reiser4_status_init(reiser4_block_nr block);
  67054. +int reiser4_status_query(u64 *status, u64 *extended);
  67055. +int reiser4_status_write(u64 status, u64 extended_status, char *message);
  67056. +int reiser4_status_finish(void);
  67057. +
  67058. +#endif
  67059. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/super.c linux-4.14.2/fs/reiser4/super.c
  67060. --- linux-4.14.2.orig/fs/reiser4/super.c 1970-01-01 01:00:00.000000000 +0100
  67061. +++ linux-4.14.2/fs/reiser4/super.c 2017-11-26 22:13:09.000000000 +0100
  67062. @@ -0,0 +1,306 @@
  67063. +/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
  67064. + * reiser4/README */
  67065. +
  67066. +/* Super-block manipulations. */
  67067. +
  67068. +#include "debug.h"
  67069. +#include "dformat.h"
  67070. +#include "key.h"
  67071. +#include "plugin/security/perm.h"
  67072. +#include "plugin/space/space_allocator.h"
  67073. +#include "plugin/plugin.h"
  67074. +#include "tree.h"
  67075. +#include "vfs_ops.h"
  67076. +#include "super.h"
  67077. +#include "reiser4.h"
  67078. +
  67079. +#include <linux/types.h> /* for __u?? */
  67080. +#include <linux/fs.h> /* for struct super_block */
  67081. +
  67082. +static __u64 reserved_for_gid(const struct super_block *super, gid_t gid);
  67083. +static __u64 reserved_for_uid(const struct super_block *super, uid_t uid);
  67084. +static __u64 reserved_for_root(const struct super_block *super);
  67085. +
  67086. +/* Return reiser4-specific part of super block */
  67087. +reiser4_super_info_data *get_super_private_nocheck(const struct super_block *super)
  67088. +{
  67089. + return (reiser4_super_info_data *) super->s_fs_info;
  67090. +}
  67091. +
  67092. +/* Return reiser4 fstype: value that is returned in ->f_type field by statfs()
  67093. + */
  67094. +long reiser4_statfs_type(const struct super_block *super UNUSED_ARG)
  67095. +{
  67096. + assert("nikita-448", super != NULL);
  67097. + assert("nikita-449", is_reiser4_super(super));
  67098. + return (long)REISER4_SUPER_MAGIC;
  67099. +}
  67100. +
  67101. +/* functions to read/modify fields of reiser4_super_info_data */
  67102. +
  67103. +/* get number of blocks in file system */
  67104. +__u64 reiser4_block_count(const struct super_block *super /* super block
  67105. + queried */ )
  67106. +{
  67107. + assert("vs-494", super != NULL);
  67108. + assert("vs-495", is_reiser4_super(super));
  67109. + return get_super_private(super)->block_count;
  67110. +}
  67111. +
  67112. +#if REISER4_DEBUG
  67113. +/*
  67114. + * number of blocks in the current file system
  67115. + */
  67116. +__u64 reiser4_current_block_count(void)
  67117. +{
  67118. + return get_current_super_private()->block_count;
  67119. +}
  67120. +#endif /* REISER4_DEBUG */
  67121. +
  67122. +/* set number of block in filesystem */
  67123. +void reiser4_set_block_count(const struct super_block *super, __u64 nr)
  67124. +{
  67125. + assert("vs-501", super != NULL);
  67126. + assert("vs-502", is_reiser4_super(super));
  67127. + get_super_private(super)->block_count = nr;
  67128. + /*
  67129. + * The proper calculation of the reserved space counter (%5 of device
  67130. + * block counter) we need a 64 bit division which is missing in Linux
  67131. + * on i386 platform. Because we do not need a precise calculation here
  67132. + * we can replace a div64 operation by this combination of
  67133. + * multiplication and shift: 51. / (2^10) == .0498 .
  67134. + * FIXME: this is a bug. It comes up only for very small filesystems
  67135. + * which probably are never used. Nevertheless, it is a bug. Number of
  67136. + * reserved blocks must be not less than maximal number of blocks which
  67137. + * get grabbed with BA_RESERVED.
  67138. + */
  67139. + get_super_private(super)->blocks_reserved = ((nr * 51) >> 10);
  67140. +}
  67141. +
  67142. +/* amount of blocks used (allocated for data) in file system */
  67143. +__u64 reiser4_data_blocks(const struct super_block *super /* super block
  67144. + queried */ )
  67145. +{
  67146. + assert("nikita-452", super != NULL);
  67147. + assert("nikita-453", is_reiser4_super(super));
  67148. + return get_super_private(super)->blocks_used;
  67149. +}
  67150. +
  67151. +/* set number of block used in filesystem */
  67152. +void reiser4_set_data_blocks(const struct super_block *super, __u64 nr)
  67153. +{
  67154. + assert("vs-503", super != NULL);
  67155. + assert("vs-504", is_reiser4_super(super));
  67156. + get_super_private(super)->blocks_used = nr;
  67157. +}
  67158. +
  67159. +/* amount of free blocks in file system */
  67160. +__u64 reiser4_free_blocks(const struct super_block *super /* super block
  67161. + queried */ )
  67162. +{
  67163. + assert("nikita-454", super != NULL);
  67164. + assert("nikita-455", is_reiser4_super(super));
  67165. + return get_super_private(super)->blocks_free;
  67166. +}
  67167. +
  67168. +/* set number of blocks free in filesystem */
  67169. +void reiser4_set_free_blocks(const struct super_block *super, __u64 nr)
  67170. +{
  67171. + assert("vs-505", super != NULL);
  67172. + assert("vs-506", is_reiser4_super(super));
  67173. + get_super_private(super)->blocks_free = nr;
  67174. +}
  67175. +
  67176. +/* get mkfs unique identifier */
  67177. +__u32 reiser4_mkfs_id(const struct super_block *super /* super block
  67178. + queried */ )
  67179. +{
  67180. + assert("vpf-221", super != NULL);
  67181. + assert("vpf-222", is_reiser4_super(super));
  67182. + return get_super_private(super)->mkfs_id;
  67183. +}
  67184. +
  67185. +/* amount of free blocks in file system */
  67186. +__u64 reiser4_free_committed_blocks(const struct super_block *super)
  67187. +{
  67188. + assert("vs-497", super != NULL);
  67189. + assert("vs-498", is_reiser4_super(super));
  67190. + return get_super_private(super)->blocks_free_committed;
  67191. +}
  67192. +
  67193. +/* amount of blocks in the file system reserved for @uid and @gid */
  67194. +long reiser4_reserved_blocks(const struct super_block *super /* super block
  67195. + queried */ ,
  67196. + uid_t uid /* user id */ ,
  67197. + gid_t gid/* group id */)
  67198. +{
  67199. + long reserved;
  67200. +
  67201. + assert("nikita-456", super != NULL);
  67202. + assert("nikita-457", is_reiser4_super(super));
  67203. +
  67204. + reserved = 0;
  67205. + if (REISER4_SUPPORT_GID_SPACE_RESERVATION)
  67206. + reserved += reserved_for_gid(super, gid);
  67207. + if (REISER4_SUPPORT_UID_SPACE_RESERVATION)
  67208. + reserved += reserved_for_uid(super, uid);
  67209. + if (REISER4_SUPPORT_ROOT_SPACE_RESERVATION && (uid == 0))
  67210. + reserved += reserved_for_root(super);
  67211. + return reserved;
  67212. +}
  67213. +
  67214. +/* get/set value of/to grabbed blocks counter */
  67215. +__u64 reiser4_grabbed_blocks(const struct super_block * super)
  67216. +{
  67217. + assert("zam-512", super != NULL);
  67218. + assert("zam-513", is_reiser4_super(super));
  67219. +
  67220. + return get_super_private(super)->blocks_grabbed;
  67221. +}
  67222. +
  67223. +__u64 reiser4_flush_reserved(const struct super_block *super)
  67224. +{
  67225. + assert("vpf-285", super != NULL);
  67226. + assert("vpf-286", is_reiser4_super(super));
  67227. +
  67228. + return get_super_private(super)->blocks_flush_reserved;
  67229. +}
  67230. +
  67231. +/* get/set value of/to counter of fake allocated formatted blocks */
  67232. +__u64 reiser4_fake_allocated(const struct super_block *super)
  67233. +{
  67234. + assert("zam-516", super != NULL);
  67235. + assert("zam-517", is_reiser4_super(super));
  67236. +
  67237. + return get_super_private(super)->blocks_fake_allocated;
  67238. +}
  67239. +
  67240. +/* get/set value of/to counter of fake allocated unformatted blocks */
  67241. +__u64 reiser4_fake_allocated_unformatted(const struct super_block *super)
  67242. +{
  67243. + assert("zam-516", super != NULL);
  67244. + assert("zam-517", is_reiser4_super(super));
  67245. +
  67246. + return get_super_private(super)->blocks_fake_allocated_unformatted;
  67247. +}
  67248. +
  67249. +/* get/set value of/to counter of clustered blocks */
  67250. +__u64 reiser4_clustered_blocks(const struct super_block *super)
  67251. +{
  67252. + assert("edward-601", super != NULL);
  67253. + assert("edward-602", is_reiser4_super(super));
  67254. +
  67255. + return get_super_private(super)->blocks_clustered;
  67256. +}
  67257. +
  67258. +/* space allocator used by this file system */
  67259. +reiser4_space_allocator * reiser4_get_space_allocator(const struct super_block
  67260. + *super)
  67261. +{
  67262. + assert("nikita-1965", super != NULL);
  67263. + assert("nikita-1966", is_reiser4_super(super));
  67264. + return &get_super_private(super)->space_allocator;
  67265. +}
  67266. +
  67267. +/* return fake inode used to bind formatted nodes in the page cache */
  67268. +struct inode *reiser4_get_super_fake(const struct super_block *super)
  67269. +{
  67270. + assert("nikita-1757", super != NULL);
  67271. + return get_super_private(super)->fake;
  67272. +}
  67273. +
  67274. +/* return fake inode used to bind copied on capture nodes in the page cache */
  67275. +struct inode *reiser4_get_cc_fake(const struct super_block *super)
  67276. +{
  67277. + assert("nikita-1757", super != NULL);
  67278. + return get_super_private(super)->cc;
  67279. +}
  67280. +
  67281. +/* return fake inode used to bind bitmaps and journlal heads */
  67282. +struct inode *reiser4_get_bitmap_fake(const struct super_block *super)
  67283. +{
  67284. + assert("nikita-17571", super != NULL);
  67285. + return get_super_private(super)->bitmap;
  67286. +}
  67287. +
  67288. +/* tree used by this file system */
  67289. +reiser4_tree *reiser4_get_tree(const struct super_block *super)
  67290. +{
  67291. + assert("nikita-460", super != NULL);
  67292. + assert("nikita-461", is_reiser4_super(super));
  67293. + return &get_super_private(super)->tree;
  67294. +}
  67295. +
  67296. +/* Check that @super is (looks like) reiser4 super block. This is mainly for
  67297. + use in assertions. */
  67298. +int is_reiser4_super(const struct super_block *super)
  67299. +{
  67300. + return
  67301. + super != NULL &&
  67302. + get_super_private(super) != NULL &&
  67303. + super->s_op == &(get_super_private(super)->ops.super);
  67304. +}
  67305. +
  67306. +int reiser4_is_set(const struct super_block *super, reiser4_fs_flag f)
  67307. +{
  67308. + return test_bit((int)f, &get_super_private(super)->fs_flags);
  67309. +}
  67310. +
  67311. +/* amount of blocks reserved for given group in file system */
  67312. +static __u64 reserved_for_gid(const struct super_block *super UNUSED_ARG,
  67313. + gid_t gid UNUSED_ARG/* group id */)
  67314. +{
  67315. + return 0;
  67316. +}
  67317. +
  67318. +/* amount of blocks reserved for given user in file system */
  67319. +static __u64 reserved_for_uid(const struct super_block *super UNUSED_ARG,
  67320. + uid_t uid UNUSED_ARG/* user id */)
  67321. +{
  67322. + return 0;
  67323. +}
  67324. +
  67325. +/* amount of blocks reserved for super user in file system */
  67326. +static __u64 reserved_for_root(const struct super_block *super UNUSED_ARG)
  67327. +{
  67328. + return 0;
  67329. +}
  67330. +
  67331. +/*
  67332. + * true if block number @blk makes sense for the file system at @super.
  67333. + */
  67334. +int
  67335. +reiser4_blocknr_is_sane_for(const struct super_block *super,
  67336. + const reiser4_block_nr * blk)
  67337. +{
  67338. + reiser4_super_info_data *sbinfo;
  67339. +
  67340. + assert("nikita-2957", super != NULL);
  67341. + assert("nikita-2958", blk != NULL);
  67342. +
  67343. + if (reiser4_blocknr_is_fake(blk))
  67344. + return 1;
  67345. +
  67346. + sbinfo = get_super_private(super);
  67347. + return *blk < sbinfo->block_count;
  67348. +}
  67349. +
  67350. +#if REISER4_DEBUG
  67351. +/*
  67352. + * true, if block number @blk makes sense for the current file system
  67353. + */
  67354. +int reiser4_blocknr_is_sane(const reiser4_block_nr * blk)
  67355. +{
  67356. + return reiser4_blocknr_is_sane_for(reiser4_get_current_sb(), blk);
  67357. +}
  67358. +#endif /* REISER4_DEBUG */
  67359. +
  67360. +/* Make Linus happy.
  67361. + Local variables:
  67362. + c-indentation-style: "K&R"
  67363. + mode-name: "LC"
  67364. + c-basic-offset: 8
  67365. + tab-width: 8
  67366. + fill-column: 120
  67367. + End:
  67368. +*/
  67369. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/super.h linux-4.14.2/fs/reiser4/super.h
  67370. --- linux-4.14.2.orig/fs/reiser4/super.h 1970-01-01 01:00:00.000000000 +0100
  67371. +++ linux-4.14.2/fs/reiser4/super.h 2017-11-26 22:13:09.000000000 +0100
  67372. @@ -0,0 +1,472 @@
  67373. +/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
  67374. + * reiser4/README */
  67375. +
  67376. +/* Super-block functions. See super.c for details. */
  67377. +
  67378. +#if !defined(__REISER4_SUPER_H__)
  67379. +#define __REISER4_SUPER_H__
  67380. +
  67381. +#include <linux/exportfs.h>
  67382. +
  67383. +#include "tree.h"
  67384. +#include "entd.h"
  67385. +#include "wander.h"
  67386. +#include "fsdata.h"
  67387. +#include "plugin/object.h"
  67388. +#include "plugin/space/space_allocator.h"
  67389. +
  67390. +/*
  67391. + * Flush algorithms parameters.
  67392. + */
  67393. +struct flush_params {
  67394. + unsigned relocate_threshold;
  67395. + unsigned relocate_distance;
  67396. + unsigned written_threshold;
  67397. + unsigned scan_maxnodes;
  67398. +};
  67399. +
  67400. +typedef enum {
  67401. + /*
  67402. + * True if this file system doesn't support hard-links (multiple names)
  67403. + * for directories: this is default UNIX behavior.
  67404. + *
  67405. + * If hard-links on directoires are not allowed, file system is Acyclic
  67406. + * Directed Graph (modulo dot, and dotdot, of course).
  67407. + *
  67408. + * This is used by reiser4_link().
  67409. + */
  67410. + REISER4_ADG = 0,
  67411. + /*
  67412. + * set if all nodes in internal tree have the same node layout plugin.
  67413. + * If so, znode_guess_plugin() will return tree->node_plugin in stead
  67414. + * of guessing plugin by plugin id stored in the node.
  67415. + */
  67416. + REISER4_ONE_NODE_PLUGIN = 1,
  67417. + /* if set, bsd gid assignment is supported. */
  67418. + REISER4_BSD_GID = 2,
  67419. + /* [mac]_time are 32 bit in inode */
  67420. + REISER4_32_BIT_TIMES = 3,
  67421. + /* load all bitmap blocks at mount time */
  67422. + REISER4_DONT_LOAD_BITMAP = 5,
  67423. + /* enforce atomicity during write(2) */
  67424. + REISER4_ATOMIC_WRITE = 6,
  67425. + /* enable issuing of discard requests */
  67426. + REISER4_DISCARD = 8,
  67427. + /* disable hole punching at flush time */
  67428. + REISER4_DONT_PUNCH_HOLES = 9
  67429. +} reiser4_fs_flag;
  67430. +
  67431. +/*
  67432. + * VFS related operation vectors.
  67433. + */
  67434. +struct object_ops {
  67435. + struct super_operations super;
  67436. + struct dentry_operations dentry;
  67437. + struct export_operations export;
  67438. +};
  67439. +
  67440. +/* reiser4-specific part of super block
  67441. +
  67442. + Locking
  67443. +
  67444. + Fields immutable after mount:
  67445. +
  67446. + ->oid*
  67447. + ->space*
  67448. + ->default_[ug]id
  67449. + ->mkfs_id
  67450. + ->trace_flags
  67451. + ->debug_flags
  67452. + ->fs_flags
  67453. + ->df_plug
  67454. + ->optimal_io_size
  67455. + ->plug
  67456. + ->flush
  67457. + ->u (bad name)
  67458. + ->txnmgr
  67459. + ->ra_params
  67460. + ->fsuid
  67461. + ->journal_header
  67462. + ->journal_footer
  67463. +
  67464. + Fields protected by ->lnode_guard
  67465. +
  67466. + ->lnode_htable
  67467. +
  67468. + Fields protected by per-super block spin lock
  67469. +
  67470. + ->block_count
  67471. + ->blocks_used
  67472. + ->blocks_free
  67473. + ->blocks_free_committed
  67474. + ->blocks_grabbed
  67475. + ->blocks_fake_allocated_unformatted
  67476. + ->blocks_fake_allocated
  67477. + ->blocks_flush_reserved
  67478. + ->eflushed
  67479. + ->blocknr_hint_default
  67480. +
  67481. + After journal replaying during mount,
  67482. +
  67483. + ->last_committed_tx
  67484. +
  67485. + is protected by ->tmgr.commit_mutex
  67486. +
  67487. + Invariants involving this data-type:
  67488. +
  67489. + [sb-block-counts]
  67490. + [sb-grabbed]
  67491. + [sb-fake-allocated]
  67492. +*/
  67493. +struct reiser4_super_info_data {
  67494. + /*
  67495. + * guard spinlock which protects reiser4 super block fields (currently
  67496. + * blocks_free, blocks_free_committed)
  67497. + */
  67498. + spinlock_t guard;
  67499. +
  67500. + /* next oid that will be returned by oid_allocate() */
  67501. + oid_t next_to_use;
  67502. + /* total number of used oids */
  67503. + oid_t oids_in_use;
  67504. +
  67505. + /* space manager plugin */
  67506. + reiser4_space_allocator space_allocator;
  67507. +
  67508. + /* transaction model */
  67509. + reiser4_txmod_id txmod;
  67510. +
  67511. + /* reiser4 internal tree */
  67512. + reiser4_tree tree;
  67513. +
  67514. + /*
  67515. + * default user id used for light-weight files without their own
  67516. + * stat-data.
  67517. + */
  67518. + __u32 default_uid;
  67519. +
  67520. + /*
  67521. + * default group id used for light-weight files without their own
  67522. + * stat-data.
  67523. + */
  67524. + __u32 default_gid;
  67525. +
  67526. + /* mkfs identifier generated at mkfs time. */
  67527. + __u32 mkfs_id;
  67528. + /* amount of blocks in a file system */
  67529. + __u64 block_count;
  67530. +
  67531. + /* inviolable reserve */
  67532. + __u64 blocks_reserved;
  67533. +
  67534. + /* amount of blocks used by file system data and meta-data. */
  67535. + __u64 blocks_used;
  67536. +
  67537. + /*
  67538. + * amount of free blocks. This is "working" free blocks counter. It is
  67539. + * like "working" bitmap, please see block_alloc.c for description.
  67540. + */
  67541. + __u64 blocks_free;
  67542. +
  67543. + /*
  67544. + * free block count for fs committed state. This is "commit" version of
  67545. + * free block counter.
  67546. + */
  67547. + __u64 blocks_free_committed;
  67548. +
  67549. + /*
  67550. + * number of blocks reserved for further allocation, for all
  67551. + * threads.
  67552. + */
  67553. + __u64 blocks_grabbed;
  67554. +
  67555. + /* number of fake allocated unformatted blocks in tree. */
  67556. + __u64 blocks_fake_allocated_unformatted;
  67557. +
  67558. + /* number of fake allocated formatted blocks in tree. */
  67559. + __u64 blocks_fake_allocated;
  67560. +
  67561. + /* number of blocks reserved for flush operations. */
  67562. + __u64 blocks_flush_reserved;
  67563. +
  67564. + /* number of blocks reserved for cluster operations. */
  67565. + __u64 blocks_clustered;
  67566. +
  67567. + /* unique file-system identifier */
  67568. + __u32 fsuid;
  67569. +
  67570. + /* On-disk format version. If does not equal to the disk_format
  67571. + plugin version, some format updates (e.g. enlarging plugin
  67572. + set, etc) may have place on mount. */
  67573. + int version;
  67574. +
  67575. + /* file-system wide flags. See reiser4_fs_flag enum */
  67576. + unsigned long fs_flags;
  67577. +
  67578. + /* transaction manager */
  67579. + txn_mgr tmgr;
  67580. +
  67581. + /* ent thread */
  67582. + entd_context entd;
  67583. +
  67584. + /* fake inode used to bind formatted nodes */
  67585. + struct inode *fake;
  67586. + /* inode used to bind bitmaps (and journal heads) */
  67587. + struct inode *bitmap;
  67588. + /* inode used to bind copied on capture nodes */
  67589. + struct inode *cc;
  67590. +
  67591. + /* disk layout plugin */
  67592. + disk_format_plugin *df_plug;
  67593. +
  67594. + /* disk layout specific part of reiser4 super info data */
  67595. + union {
  67596. + format40_super_info format40;
  67597. + } u;
  67598. +
  67599. + /* value we return in st_blksize on stat(2) */
  67600. + unsigned long optimal_io_size;
  67601. +
  67602. + /* parameters for the flush algorithm */
  67603. + struct flush_params flush;
  67604. +
  67605. + /* pointers to jnodes for journal header and footer */
  67606. + jnode *journal_header;
  67607. + jnode *journal_footer;
  67608. +
  67609. + journal_location jloc;
  67610. +
  67611. + /* head block number of last committed transaction */
  67612. + __u64 last_committed_tx;
  67613. +
  67614. + /*
  67615. + * we remember last written location for using as a hint for new block
  67616. + * allocation
  67617. + */
  67618. + __u64 blocknr_hint_default;
  67619. +
  67620. + /* committed number of files (oid allocator state variable ) */
  67621. + __u64 nr_files_committed;
  67622. +
  67623. + struct formatted_ra_params ra_params;
  67624. +
  67625. + /*
  67626. + * A mutex for serializing cut tree operation if out-of-free-space:
  67627. + * the only one cut_tree thread is allowed to grab space from reserved
  67628. + * area (it is 5% of disk space)
  67629. + */
  67630. + struct mutex delete_mutex;
  67631. + /* task owning ->delete_mutex */
  67632. + struct task_struct *delete_mutex_owner;
  67633. +
  67634. + /* Diskmap's blocknumber */
  67635. + __u64 diskmap_block;
  67636. +
  67637. + /* What to do in case of error */
  67638. + int onerror;
  67639. +
  67640. + /* operations for objects on this file system */
  67641. + struct object_ops ops;
  67642. +
  67643. + /*
  67644. + * structure to maintain d_cursors. See plugin/file_ops_readdir.c for
  67645. + * more details
  67646. + */
  67647. + struct d_cursor_info d_info;
  67648. + struct crypto_shash *csum_tfm;
  67649. +
  67650. +#ifdef CONFIG_REISER4_BADBLOCKS
  67651. + /* Alternative master superblock offset (in bytes) */
  67652. + unsigned long altsuper;
  67653. +#endif
  67654. + struct repacker *repacker;
  67655. + struct page *status_page;
  67656. + struct bio *status_bio;
  67657. +
  67658. +#if REISER4_DEBUG
  67659. + /*
  67660. + * minimum used blocks value (includes super blocks, bitmap blocks and
  67661. + * other fs reserved areas), depends on fs format and fs size.
  67662. + */
  67663. + __u64 min_blocks_used;
  67664. +
  67665. + /*
  67666. + * when debugging is on, all jnodes (including znodes, bitmaps, etc.)
  67667. + * are kept on a list anchored at sbinfo->all_jnodes. This list is
  67668. + * protected by sbinfo->all_guard spin lock. This lock should be taken
  67669. + * with _irq modifier, because it is also modified from interrupt
  67670. + * contexts (by RCU).
  67671. + */
  67672. + spinlock_t all_guard;
  67673. + /* list of all jnodes */
  67674. + struct list_head all_jnodes;
  67675. +#endif
  67676. + struct dentry *debugfs_root;
  67677. +};
  67678. +
  67679. +extern reiser4_super_info_data *get_super_private_nocheck(const struct
  67680. + super_block * super);
  67681. +
  67682. +/* Return reiser4-specific part of super block */
  67683. +static inline reiser4_super_info_data *get_super_private(const struct
  67684. + super_block * super)
  67685. +{
  67686. + assert("nikita-447", super != NULL);
  67687. +
  67688. + return (reiser4_super_info_data *) super->s_fs_info;
  67689. +}
  67690. +
  67691. +/* get ent context for the @super */
  67692. +static inline entd_context *get_entd_context(struct super_block *super)
  67693. +{
  67694. + return &get_super_private(super)->entd;
  67695. +}
  67696. +
  67697. +/* "Current" super-block: main super block used during current system
  67698. + call. Reference to this super block is stored in reiser4_context. */
  67699. +static inline struct super_block *reiser4_get_current_sb(void)
  67700. +{
  67701. + return get_current_context()->super;
  67702. +}
  67703. +
  67704. +/* Reiser4-specific part of "current" super-block: main super block used
  67705. + during current system call. Reference to this super block is stored in
  67706. + reiser4_context. */
  67707. +static inline reiser4_super_info_data *get_current_super_private(void)
  67708. +{
  67709. + return get_super_private(reiser4_get_current_sb());
  67710. +}
  67711. +
  67712. +static inline struct formatted_ra_params *get_current_super_ra_params(void)
  67713. +{
  67714. + return &(get_current_super_private()->ra_params);
  67715. +}
  67716. +
  67717. +/*
  67718. + * true, if file system on @super is read-only
  67719. + */
  67720. +static inline int rofs_super(struct super_block *super)
  67721. +{
  67722. + return super->s_flags & MS_RDONLY;
  67723. +}
  67724. +
  67725. +/*
  67726. + * true, if @tree represents read-only file system
  67727. + */
  67728. +static inline int rofs_tree(reiser4_tree * tree)
  67729. +{
  67730. + return rofs_super(tree->super);
  67731. +}
  67732. +
  67733. +/*
  67734. + * true, if file system where @inode lives on, is read-only
  67735. + */
  67736. +static inline int rofs_inode(struct inode *inode)
  67737. +{
  67738. + return rofs_super(inode->i_sb);
  67739. +}
  67740. +
  67741. +/*
  67742. + * true, if file system where @node lives on, is read-only
  67743. + */
  67744. +static inline int rofs_jnode(jnode * node)
  67745. +{
  67746. + return rofs_tree(jnode_get_tree(node));
  67747. +}
  67748. +
  67749. +extern __u64 reiser4_current_block_count(void);
  67750. +
  67751. +extern void build_object_ops(struct super_block *super, struct object_ops *ops);
  67752. +
  67753. +#define REISER4_SUPER_MAGIC 0x52345362 /* (*(__u32 *)"R4Sb"); */
  67754. +
  67755. +static inline void spin_lock_reiser4_super(reiser4_super_info_data *sbinfo)
  67756. +{
  67757. + spin_lock(&(sbinfo->guard));
  67758. +}
  67759. +
  67760. +static inline void spin_unlock_reiser4_super(reiser4_super_info_data *sbinfo)
  67761. +{
  67762. + assert_spin_locked(&(sbinfo->guard));
  67763. + spin_unlock(&(sbinfo->guard));
  67764. +}
  67765. +
  67766. +extern __u64 reiser4_flush_reserved(const struct super_block *);
  67767. +extern int reiser4_is_set(const struct super_block *super, reiser4_fs_flag f);
  67768. +extern long reiser4_statfs_type(const struct super_block *super);
  67769. +extern __u64 reiser4_block_count(const struct super_block *super);
  67770. +extern void reiser4_set_block_count(const struct super_block *super, __u64 nr);
  67771. +extern __u64 reiser4_data_blocks(const struct super_block *super);
  67772. +extern void reiser4_set_data_blocks(const struct super_block *super, __u64 nr);
  67773. +extern __u64 reiser4_free_blocks(const struct super_block *super);
  67774. +extern void reiser4_set_free_blocks(const struct super_block *super, __u64 nr);
  67775. +extern __u32 reiser4_mkfs_id(const struct super_block *super);
  67776. +
  67777. +extern __u64 reiser4_free_committed_blocks(const struct super_block *super);
  67778. +
  67779. +extern __u64 reiser4_grabbed_blocks(const struct super_block *);
  67780. +extern __u64 reiser4_fake_allocated(const struct super_block *);
  67781. +extern __u64 reiser4_fake_allocated_unformatted(const struct super_block *);
  67782. +extern __u64 reiser4_clustered_blocks(const struct super_block *);
  67783. +
  67784. +extern long reiser4_reserved_blocks(const struct super_block *super, uid_t uid,
  67785. + gid_t gid);
  67786. +
  67787. +extern reiser4_space_allocator *
  67788. +reiser4_get_space_allocator(const struct super_block *super);
  67789. +extern reiser4_oid_allocator *
  67790. +reiser4_get_oid_allocator(const struct super_block *super);
  67791. +extern struct inode *reiser4_get_super_fake(const struct super_block *super);
  67792. +extern struct inode *reiser4_get_cc_fake(const struct super_block *super);
  67793. +extern struct inode *reiser4_get_bitmap_fake(const struct super_block *super);
  67794. +extern reiser4_tree *reiser4_get_tree(const struct super_block *super);
  67795. +extern int is_reiser4_super(const struct super_block *super);
  67796. +
  67797. +extern int reiser4_blocknr_is_sane(const reiser4_block_nr * blk);
  67798. +extern int reiser4_blocknr_is_sane_for(const struct super_block *super,
  67799. + const reiser4_block_nr * blk);
  67800. +extern int reiser4_fill_super(struct super_block *s, void *data, int silent);
  67801. +extern int reiser4_done_super(struct super_block *s);
  67802. +
  67803. +/* step of fill super */
  67804. +extern int reiser4_init_fs_info(struct super_block *);
  67805. +extern void reiser4_done_fs_info(struct super_block *);
  67806. +extern int reiser4_init_super_data(struct super_block *, char *opt_string);
  67807. +extern int reiser4_init_read_super(struct super_block *, int silent);
  67808. +extern int reiser4_init_root_inode(struct super_block *);
  67809. +extern reiser4_plugin *get_default_plugin(pset_member memb);
  67810. +
  67811. +/* Maximal possible object id. */
  67812. +#define ABSOLUTE_MAX_OID ((oid_t)~0)
  67813. +
  67814. +#define OIDS_RESERVED (1 << 16)
  67815. +int oid_init_allocator(struct super_block *, oid_t nr_files, oid_t next);
  67816. +oid_t oid_allocate(struct super_block *);
  67817. +int oid_release(struct super_block *, oid_t);
  67818. +oid_t oid_next(const struct super_block *);
  67819. +void oid_count_allocated(void);
  67820. +void oid_count_released(void);
  67821. +long oids_used(const struct super_block *);
  67822. +
  67823. +#if REISER4_DEBUG
  67824. +void print_fs_info(const char *prefix, const struct super_block *);
  67825. +#endif
  67826. +
  67827. +extern void destroy_reiser4_cache(struct kmem_cache **);
  67828. +
  67829. +extern struct super_operations reiser4_super_operations;
  67830. +extern struct export_operations reiser4_export_operations;
  67831. +extern struct dentry_operations reiser4_dentry_operations;
  67832. +
  67833. +/* __REISER4_SUPER_H__ */
  67834. +#endif
  67835. +
  67836. +/*
  67837. + * Local variables:
  67838. + * c-indentation-style: "K&R"
  67839. + * mode-name: "LC"
  67840. + * c-basic-offset: 8
  67841. + * tab-width: 8
  67842. + * fill-column: 120
  67843. + * End:
  67844. + */
  67845. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/super_ops.c linux-4.14.2/fs/reiser4/super_ops.c
  67846. --- linux-4.14.2.orig/fs/reiser4/super_ops.c 1970-01-01 01:00:00.000000000 +0100
  67847. +++ linux-4.14.2/fs/reiser4/super_ops.c 2017-11-26 22:13:09.000000000 +0100
  67848. @@ -0,0 +1,783 @@
  67849. +/* Copyright 2005 by Hans Reiser, licensing governed by
  67850. + * reiser4/README */
  67851. +
  67852. +#include "inode.h"
  67853. +#include "page_cache.h"
  67854. +#include "ktxnmgrd.h"
  67855. +#include "flush.h"
  67856. +#include "safe_link.h"
  67857. +#include "checksum.h"
  67858. +
  67859. +#include <linux/vfs.h>
  67860. +#include <linux/writeback.h>
  67861. +#include <linux/mount.h>
  67862. +#include <linux/seq_file.h>
  67863. +#include <linux/debugfs.h>
  67864. +#include <linux/backing-dev.h>
  67865. +#include <linux/module.h>
  67866. +
  67867. +/* slab cache for inodes */
  67868. +static struct kmem_cache *inode_cache;
  67869. +
  67870. +static struct dentry *reiser4_debugfs_root = NULL;
  67871. +
  67872. +/**
  67873. + * init_once - constructor for reiser4 inodes
  67874. + * @cache: cache @obj belongs to
  67875. + * @obj: inode to be initialized
  67876. + *
  67877. + * Initialization function to be called when new page is allocated by reiser4
  67878. + * inode cache. It is set on inode cache creation.
  67879. + */
  67880. +static void init_once(void *obj)
  67881. +{
  67882. + struct reiser4_inode_object *info;
  67883. +
  67884. + info = obj;
  67885. +
  67886. + /* initialize vfs inode */
  67887. + inode_init_once(&info->vfs_inode);
  67888. +
  67889. + /*
  67890. + * initialize reiser4 specific part fo inode.
  67891. + * NOTE-NIKITA add here initializations for locks, list heads,
  67892. + * etc. that will be added to our private inode part.
  67893. + */
  67894. + INIT_LIST_HEAD(get_readdir_list(&info->vfs_inode));
  67895. + init_rwsem(&info->p.conv_sem);
  67896. + /* init semaphore which is used during inode loading */
  67897. + loading_init_once(&info->p);
  67898. + INIT_RADIX_TREE(jnode_tree_by_reiser4_inode(&info->p),
  67899. + GFP_ATOMIC);
  67900. +#if REISER4_DEBUG
  67901. + info->p.nr_jnodes = 0;
  67902. +#endif
  67903. +}
  67904. +
  67905. +/**
  67906. + * init_inodes - create znode cache
  67907. + *
  67908. + * Initializes slab cache of inodes. It is part of reiser4 module initialization
  67909. + */
  67910. +static int init_inodes(void)
  67911. +{
  67912. + inode_cache = kmem_cache_create("reiser4_inode",
  67913. + sizeof(struct reiser4_inode_object),
  67914. + 0,
  67915. + SLAB_HWCACHE_ALIGN |
  67916. + SLAB_RECLAIM_ACCOUNT, init_once);
  67917. + if (inode_cache == NULL)
  67918. + return RETERR(-ENOMEM);
  67919. + return 0;
  67920. +}
  67921. +
  67922. +/**
  67923. + * done_inodes - delete inode cache
  67924. + *
  67925. + * This is called on reiser4 module unloading or system shutdown.
  67926. + */
  67927. +static void done_inodes(void)
  67928. +{
  67929. + destroy_reiser4_cache(&inode_cache);
  67930. +}
  67931. +
  67932. +/**
  67933. + * reiser4_alloc_inode - alloc_inode of super operations
  67934. + * @super: super block new inode is allocated for
  67935. + *
  67936. + * Allocates new inode, initializes reiser4 specific part of it.
  67937. + */
  67938. +static struct inode *reiser4_alloc_inode(struct super_block *super)
  67939. +{
  67940. + struct reiser4_inode_object *obj;
  67941. +
  67942. + assert("nikita-1696", super != NULL);
  67943. + obj = kmem_cache_alloc(inode_cache, reiser4_ctx_gfp_mask_get());
  67944. + if (obj != NULL) {
  67945. + reiser4_inode *info;
  67946. +
  67947. + info = &obj->p;
  67948. +
  67949. + info->pset = plugin_set_get_empty();
  67950. + info->hset = plugin_set_get_empty();
  67951. + info->extmask = 0;
  67952. + info->locality_id = 0ull;
  67953. + info->plugin_mask = 0;
  67954. + info->heir_mask = 0;
  67955. +#if !REISER4_INO_IS_OID
  67956. + info->oid_hi = 0;
  67957. +#endif
  67958. + reiser4_seal_init(&info->sd_seal, NULL, NULL);
  67959. + coord_init_invalid(&info->sd_coord, NULL);
  67960. + info->flags = 0;
  67961. + spin_lock_init(&info->guard);
  67962. + /* this deals with info's loading semaphore */
  67963. + loading_alloc(info);
  67964. + info->vroot = UBER_TREE_ADDR;
  67965. + return &obj->vfs_inode;
  67966. + } else
  67967. + return NULL;
  67968. +}
  67969. +
  67970. +/**
  67971. + * reiser4_destroy_inode - destroy_inode of super operations
  67972. + * @inode: inode being destroyed
  67973. + *
  67974. + * Puts reiser4 specific portion of inode, frees memory occupied by inode.
  67975. + */
  67976. +static void reiser4_destroy_inode(struct inode *inode)
  67977. +{
  67978. + reiser4_inode *info;
  67979. +
  67980. + info = reiser4_inode_data(inode);
  67981. +
  67982. + assert("vs-1220", inode_has_no_jnodes(info));
  67983. +
  67984. + if (!is_bad_inode(inode) && is_inode_loaded(inode)) {
  67985. + file_plugin *fplug = inode_file_plugin(inode);
  67986. + if (fplug->destroy_inode != NULL)
  67987. + fplug->destroy_inode(inode);
  67988. + }
  67989. + reiser4_dispose_cursors(inode);
  67990. + if (info->pset)
  67991. + plugin_set_put(info->pset);
  67992. + if (info->hset)
  67993. + plugin_set_put(info->hset);
  67994. +
  67995. + /*
  67996. + * cannot add similar assertion about ->i_list as prune_icache return
  67997. + * inode into slab with dangling ->list.{next,prev}. This is safe,
  67998. + * because they are re-initialized in the new_inode().
  67999. + */
  68000. + assert("nikita-2895", hlist_empty(&inode->i_dentry));
  68001. + assert("nikita-2896", hlist_unhashed(&inode->i_hash));
  68002. + assert("nikita-2898", list_empty_careful(get_readdir_list(inode)));
  68003. +
  68004. + /* this deals with info's loading semaphore */
  68005. + loading_destroy(info);
  68006. +
  68007. + kmem_cache_free(inode_cache,
  68008. + container_of(info, struct reiser4_inode_object, p));
  68009. +}
  68010. +
  68011. +/**
  68012. + * reiser4_dirty_inode - dirty_inode of super operations
  68013. + * @inode: inode being dirtied
  68014. + *
  68015. + * Updates stat data.
  68016. + */
  68017. +static void reiser4_dirty_inode(struct inode *inode, int flags)
  68018. +{
  68019. + int result;
  68020. + reiser4_context *ctx;
  68021. +
  68022. + if (!is_in_reiser4_context())
  68023. + return;
  68024. + assert("edward-1606", !IS_RDONLY(inode));
  68025. + assert("edward-1607",
  68026. + (inode_file_plugin(inode)->estimate.update(inode) <=
  68027. + get_current_context()->grabbed_blocks));
  68028. +
  68029. + ctx = get_current_context();
  68030. + if (ctx->locked_page)
  68031. + unlock_page(ctx->locked_page);
  68032. +
  68033. + result = reiser4_update_sd(inode);
  68034. +
  68035. + if (ctx->locked_page)
  68036. + lock_page(ctx->locked_page);
  68037. + if (result)
  68038. + warning("edward-1605", "failed to dirty inode for %llu: %d",
  68039. + get_inode_oid(inode), result);
  68040. +}
  68041. +
  68042. +/**
  68043. + * ->evict_inode() of super operations
  68044. + * @inode: inode to delete
  68045. + *
  68046. + * Calls file plugin's delete_object method to delete object items from
  68047. + * filesystem tree and calls clear_inode().
  68048. + */
  68049. +static void reiser4_evict_inode(struct inode *inode)
  68050. +{
  68051. + reiser4_context *ctx;
  68052. + file_plugin *fplug;
  68053. +
  68054. + ctx = reiser4_init_context(inode->i_sb);
  68055. + if (IS_ERR(ctx)) {
  68056. + warning("vs-15", "failed to init context");
  68057. + return;
  68058. + }
  68059. +
  68060. + if (inode->i_nlink == 0 && is_inode_loaded(inode)) {
  68061. + fplug = inode_file_plugin(inode);
  68062. + if (fplug != NULL && fplug->delete_object != NULL)
  68063. + fplug->delete_object(inode);
  68064. + }
  68065. +
  68066. + truncate_inode_pages_final(&inode->i_data);
  68067. + inode->i_blocks = 0;
  68068. + clear_inode(inode);
  68069. + reiser4_exit_context(ctx);
  68070. +}
  68071. +
  68072. +/**
  68073. + * reiser4_put_super - put_super of super operations
  68074. + * @super: super block to free
  68075. + *
  68076. + * Stops daemons, release resources, umounts in short.
  68077. + */
  68078. +static void reiser4_put_super(struct super_block *super)
  68079. +{
  68080. + reiser4_super_info_data *sbinfo;
  68081. + reiser4_context *ctx;
  68082. +
  68083. + sbinfo = get_super_private(super);
  68084. + assert("vs-1699", sbinfo);
  68085. +
  68086. + debugfs_remove(sbinfo->tmgr.debugfs_atom_count);
  68087. + debugfs_remove(sbinfo->tmgr.debugfs_id_count);
  68088. + debugfs_remove(sbinfo->debugfs_root);
  68089. +
  68090. + ctx = reiser4_init_context(super);
  68091. + if (IS_ERR(ctx)) {
  68092. + warning("vs-17", "failed to init context");
  68093. + return;
  68094. + }
  68095. +
  68096. + /* have disk format plugin to free its resources */
  68097. + if (get_super_private(super)->df_plug->release)
  68098. + get_super_private(super)->df_plug->release(super);
  68099. +
  68100. + reiser4_done_formatted_fake(super);
  68101. + reiser4_done_csum_tfm(sbinfo->csum_tfm);
  68102. +
  68103. + /* stop daemons: ktxnmgr and entd */
  68104. + reiser4_done_entd(super);
  68105. + reiser4_done_ktxnmgrd(super);
  68106. + reiser4_done_txnmgr(&sbinfo->tmgr);
  68107. +
  68108. + assert("edward-1890", list_empty(&get_super_private(super)->all_jnodes));
  68109. + assert("edward-1891", get_current_context()->trans->atom == NULL);
  68110. + reiser4_check_block_counters(super);
  68111. +
  68112. + reiser4_exit_context(ctx);
  68113. + reiser4_done_fs_info(super);
  68114. +}
  68115. +
  68116. +/**
  68117. + * reiser4_statfs - statfs of super operations
  68118. + * @super: super block of file system in queried
  68119. + * @stafs: buffer to fill with statistics
  68120. + *
  68121. + * Returns information about filesystem.
  68122. + */
  68123. +static int reiser4_statfs(struct dentry *dentry, struct kstatfs *statfs)
  68124. +{
  68125. + sector_t total;
  68126. + sector_t reserved;
  68127. + sector_t free;
  68128. + sector_t forroot;
  68129. + sector_t deleted;
  68130. + reiser4_context *ctx;
  68131. + struct super_block *super = dentry->d_sb;
  68132. +
  68133. + assert("nikita-408", super != NULL);
  68134. + assert("nikita-409", statfs != NULL);
  68135. +
  68136. + ctx = reiser4_init_context(super);
  68137. + if (IS_ERR(ctx))
  68138. + return PTR_ERR(ctx);
  68139. +
  68140. + statfs->f_type = reiser4_statfs_type(super);
  68141. + statfs->f_bsize = super->s_blocksize;
  68142. +
  68143. + /*
  68144. + * 5% of total block space is reserved. This is needed for flush and
  68145. + * for truncates (so that we are able to perform truncate/unlink even
  68146. + * on the otherwise completely full file system). If this reservation
  68147. + * is hidden from statfs(2), users will mistakenly guess that they
  68148. + * have enough free space to complete some operation, which is
  68149. + * frustrating.
  68150. + *
  68151. + * Another possible solution is to subtract ->blocks_reserved from
  68152. + * ->f_bfree, but changing available space seems less intrusive than
  68153. + * letting user to see 5% of disk space to be used directly after
  68154. + * mkfs.
  68155. + */
  68156. + total = reiser4_block_count(super);
  68157. + reserved = get_super_private(super)->blocks_reserved;
  68158. + deleted = txnmgr_count_deleted_blocks();
  68159. + free = reiser4_free_blocks(super) + deleted;
  68160. + forroot = reiser4_reserved_blocks(super, 0, 0);
  68161. +
  68162. + /*
  68163. + * These counters may be in inconsistent state because we take the
  68164. + * values without keeping any global spinlock. Here we do a sanity
  68165. + * check that free block counter does not exceed the number of all
  68166. + * blocks.
  68167. + */
  68168. + if (free > total)
  68169. + free = total;
  68170. + statfs->f_blocks = total - reserved;
  68171. + /* make sure statfs->f_bfree is never larger than statfs->f_blocks */
  68172. + if (free > reserved)
  68173. + free -= reserved;
  68174. + else
  68175. + free = 0;
  68176. + statfs->f_bfree = free;
  68177. +
  68178. + if (free > forroot)
  68179. + free -= forroot;
  68180. + else
  68181. + free = 0;
  68182. + statfs->f_bavail = free;
  68183. +
  68184. + statfs->f_files = 0;
  68185. + statfs->f_ffree = 0;
  68186. +
  68187. + /* maximal acceptable name length depends on directory plugin. */
  68188. + assert("nikita-3351", super->s_root->d_inode != NULL);
  68189. + statfs->f_namelen = reiser4_max_filename_len(super->s_root->d_inode);
  68190. + reiser4_exit_context(ctx);
  68191. + return 0;
  68192. +}
  68193. +
  68194. +/**
  68195. + * reiser4_writeback_inodes - writeback_inodes of super operations
  68196. + * @super:
  68197. + * @wb:
  68198. + * @wbc:
  68199. + *
  68200. + * This method is called by background and non-backgound writeback.
  68201. + * Reiser4's implementation uses generic_writeback_sb_inodes to call
  68202. + * reiser4_writepages_dispatch for each of dirty inodes.
  68203. + * reiser4_writepages_dispatch handles pages dirtied via shared
  68204. + * mapping - dirty pages get into atoms. Writeout is called to flush
  68205. + * some atoms.
  68206. + */
  68207. +static long reiser4_writeback_inodes(struct super_block *super,
  68208. + struct bdi_writeback *wb,
  68209. + struct writeback_control *wbc,
  68210. + struct wb_writeback_work *work,
  68211. + bool flush_all)
  68212. +{
  68213. + long result;
  68214. + reiser4_context *ctx;
  68215. +
  68216. + if (wbc->for_kupdate)
  68217. + /* reiser4 has its own means of periodical write-out */
  68218. + goto skip;
  68219. +
  68220. + spin_unlock(&wb->list_lock);
  68221. + ctx = reiser4_init_context(super);
  68222. + if (IS_ERR(ctx)) {
  68223. + warning("vs-13", "failed to init context");
  68224. + spin_lock(&wb->list_lock);
  68225. + goto skip;
  68226. + }
  68227. + /*
  68228. + * call reiser4_writepages for each of dirty inodes to turn
  68229. + * dirty pages into transactions if they were not yet.
  68230. + */
  68231. + spin_lock(&wb->list_lock);
  68232. + result = generic_writeback_sb_inodes(super, wb, wbc, work, flush_all);
  68233. + spin_unlock(&wb->list_lock);
  68234. +
  68235. + if (result <= 0)
  68236. + goto exit;
  68237. + wbc->nr_to_write = result;
  68238. +
  68239. + /* flush goes here */
  68240. + reiser4_writeout(super, wbc);
  68241. + exit:
  68242. + /* avoid recursive calls to ->writeback_inodes */
  68243. + context_set_commit_async(ctx);
  68244. + reiser4_exit_context(ctx);
  68245. + spin_lock(&wb->list_lock);
  68246. +
  68247. + return result;
  68248. + skip:
  68249. + writeback_skip_sb_inodes(super, wb);
  68250. + return 0;
  68251. +}
  68252. +
  68253. +/* ->sync_fs() of super operations */
  68254. +static int reiser4_sync_fs(struct super_block *super, int wait)
  68255. +{
  68256. + reiser4_context *ctx;
  68257. + struct bdi_writeback *wb;
  68258. + struct wb_writeback_work work = {
  68259. + .sb = super,
  68260. + .sync_mode = WB_SYNC_ALL,
  68261. + .range_cyclic = 0,
  68262. + .nr_pages = LONG_MAX,
  68263. + .reason = WB_REASON_SYNC,
  68264. + .for_sync = 1,
  68265. + };
  68266. + struct writeback_control wbc = {
  68267. + .sync_mode = work.sync_mode,
  68268. + .range_cyclic = work.range_cyclic,
  68269. + .range_start = 0,
  68270. + .range_end = LLONG_MAX,
  68271. + };
  68272. + ctx = reiser4_init_context(super);
  68273. + if (IS_ERR(ctx)) {
  68274. + warning("edward-1567", "failed to init context");
  68275. + return PTR_ERR(ctx);
  68276. + }
  68277. + /*
  68278. + * We don't capture superblock here.
  68279. + * Superblock is captured only by operations, which change
  68280. + * its fields different from free_blocks, nr_files, next_oid.
  68281. + * After system crash the mentioned fields are recovered from
  68282. + * journal records, see reiser4_journal_recover_sb_data().
  68283. + * Also superblock is captured at final commit when releasing
  68284. + * disk format.
  68285. + */
  68286. + wb = &inode_to_bdi(reiser4_get_super_fake(super))->wb;
  68287. + spin_lock(&wb->list_lock);
  68288. + generic_writeback_sb_inodes(super, wb, &wbc, &work, true);
  68289. + spin_unlock(&wb->list_lock);
  68290. + wbc.nr_to_write = LONG_MAX;
  68291. + /*
  68292. + * (flush goes here)
  68293. + * commit all transactions
  68294. + */
  68295. + reiser4_writeout(super, &wbc);
  68296. +
  68297. + reiser4_exit_context(ctx);
  68298. + return 0;
  68299. +}
  68300. +
  68301. +static int reiser4_remount(struct super_block *s, int *mount_flags, char *arg)
  68302. +{
  68303. + sync_filesystem(s);
  68304. + return 0;
  68305. +}
  68306. +
  68307. +/**
  68308. + * reiser4_show_options - show_options of super operations
  68309. + * @m: file where to write information
  68310. + * @mnt: mount structure
  68311. + *
  68312. + * Makes reiser4 mount options visible in /proc/mounts.
  68313. + */
  68314. +static int reiser4_show_options(struct seq_file *m, struct dentry *dentry)
  68315. +{
  68316. + struct super_block *super;
  68317. + reiser4_super_info_data *sbinfo;
  68318. +
  68319. + super = dentry->d_sb;
  68320. + sbinfo = get_super_private(super);
  68321. +
  68322. + seq_printf(m, ",atom_max_size=0x%x", sbinfo->tmgr.atom_max_size);
  68323. + seq_printf(m, ",atom_max_age=0x%x", sbinfo->tmgr.atom_max_age);
  68324. + seq_printf(m, ",atom_min_size=0x%x", sbinfo->tmgr.atom_min_size);
  68325. + seq_printf(m, ",atom_max_flushers=0x%x",
  68326. + sbinfo->tmgr.atom_max_flushers);
  68327. + seq_printf(m, ",cbk_cache_slots=0x%x",
  68328. + sbinfo->tree.cbk_cache.nr_slots);
  68329. +
  68330. + return 0;
  68331. +}
  68332. +
  68333. +struct super_operations reiser4_super_operations = {
  68334. + .alloc_inode = reiser4_alloc_inode,
  68335. + .destroy_inode = reiser4_destroy_inode,
  68336. + .dirty_inode = reiser4_dirty_inode,
  68337. + .evict_inode = reiser4_evict_inode,
  68338. + .put_super = reiser4_put_super,
  68339. + .sync_fs = reiser4_sync_fs,
  68340. + .statfs = reiser4_statfs,
  68341. + .remount_fs = reiser4_remount,
  68342. + .writeback_inodes = reiser4_writeback_inodes,
  68343. + .show_options = reiser4_show_options
  68344. +};
  68345. +
  68346. +/**
  68347. + * fill_super - initialize super block on mount
  68348. + * @super: super block to fill
  68349. + * @data: reiser4 specific mount option
  68350. + * @silent:
  68351. + *
  68352. + * This is to be called by reiser4_get_sb. Mounts filesystem.
  68353. + */
  68354. +static int fill_super(struct super_block *super, void *data, int silent)
  68355. +{
  68356. + reiser4_context ctx;
  68357. + int result;
  68358. + reiser4_super_info_data *sbinfo;
  68359. +
  68360. + assert("zam-989", super != NULL);
  68361. +
  68362. + super->s_op = NULL;
  68363. + init_stack_context(&ctx, super);
  68364. +
  68365. + /* allocate reiser4 specific super block */
  68366. + if ((result = reiser4_init_fs_info(super)) != 0)
  68367. + goto failed_init_sinfo;
  68368. +
  68369. + sbinfo = get_super_private(super);
  68370. +
  68371. + if ((result = reiser4_init_csum_tfm(&sbinfo->csum_tfm)) != 0)
  68372. + goto failed_init_csum_tfm;
  68373. +
  68374. + /* initialize various reiser4 parameters, parse mount options */
  68375. + if ((result = reiser4_init_super_data(super, data)) != 0)
  68376. + goto failed_init_super_data;
  68377. +
  68378. + /* read reiser4 master super block, initialize disk format plugin */
  68379. + if ((result = reiser4_init_read_super(super, silent)) != 0)
  68380. + goto failed_init_read_super;
  68381. +
  68382. + /* initialize transaction manager */
  68383. + reiser4_init_txnmgr(&sbinfo->tmgr);
  68384. +
  68385. + /* initialize ktxnmgrd context and start kernel thread ktxnmrgd */
  68386. + if ((result = reiser4_init_ktxnmgrd(super)) != 0)
  68387. + goto failed_init_ktxnmgrd;
  68388. +
  68389. + /* initialize entd context and start kernel thread entd */
  68390. + if ((result = reiser4_init_entd(super)) != 0)
  68391. + goto failed_init_entd;
  68392. +
  68393. + /* initialize address spaces for formatted nodes and bitmaps */
  68394. + if ((result = reiser4_init_formatted_fake(super)) != 0)
  68395. + goto failed_init_formatted_fake;
  68396. +
  68397. + /* initialize disk format plugin */
  68398. + if ((result = get_super_private(super)->df_plug->init_format(super,
  68399. + data)) != 0)
  68400. + goto failed_init_disk_format;
  68401. +
  68402. + /*
  68403. + * There are some 'committed' versions of reiser4 super block counters,
  68404. + * which correspond to reiser4 on-disk state. These counters are
  68405. + * initialized here
  68406. + */
  68407. + sbinfo->blocks_free_committed = sbinfo->blocks_free;
  68408. + sbinfo->nr_files_committed = oids_used(super);
  68409. +
  68410. + /* get inode of root directory */
  68411. + if ((result = reiser4_init_root_inode(super)) != 0)
  68412. + goto failed_init_root_inode;
  68413. +
  68414. + if ((result = get_super_private(super)->df_plug->version_update(super)) != 0)
  68415. + goto failed_update_format_version;
  68416. +
  68417. + process_safelinks(super);
  68418. + reiser4_exit_context(&ctx);
  68419. +
  68420. + sbinfo->debugfs_root = debugfs_create_dir(super->s_id,
  68421. + reiser4_debugfs_root);
  68422. + if (sbinfo->debugfs_root) {
  68423. + sbinfo->tmgr.debugfs_atom_count =
  68424. + debugfs_create_u32("atom_count", S_IFREG|S_IRUSR,
  68425. + sbinfo->debugfs_root,
  68426. + &sbinfo->tmgr.atom_count);
  68427. + sbinfo->tmgr.debugfs_id_count =
  68428. + debugfs_create_u32("id_count", S_IFREG|S_IRUSR,
  68429. + sbinfo->debugfs_root,
  68430. + &sbinfo->tmgr.id_count);
  68431. + }
  68432. + printk("reiser4: %s: using %s.\n", super->s_id,
  68433. + txmod_plugin_by_id(sbinfo->txmod)->h.desc);
  68434. + return 0;
  68435. +
  68436. + failed_update_format_version:
  68437. + failed_init_root_inode:
  68438. + if (sbinfo->df_plug->release)
  68439. + sbinfo->df_plug->release(super);
  68440. + failed_init_disk_format:
  68441. + reiser4_done_formatted_fake(super);
  68442. + failed_init_formatted_fake:
  68443. + reiser4_done_entd(super);
  68444. + failed_init_entd:
  68445. + reiser4_done_ktxnmgrd(super);
  68446. + failed_init_ktxnmgrd:
  68447. + reiser4_done_txnmgr(&sbinfo->tmgr);
  68448. + failed_init_read_super:
  68449. + failed_init_super_data:
  68450. + failed_init_csum_tfm:
  68451. + reiser4_done_fs_info(super);
  68452. + failed_init_sinfo:
  68453. + reiser4_exit_context(&ctx);
  68454. + return result;
  68455. +}
  68456. +
  68457. +/**
  68458. + * reiser4_mount - mount of file_system_type operations
  68459. + * @fs_type:
  68460. + * @flags: mount flags MS_RDONLY, MS_VERBOSE, etc
  68461. + * @dev_name: block device file name
  68462. + * @data: specific mount options
  68463. + *
  68464. + * Reiser4 mount entry.
  68465. + */
  68466. +static struct dentry *reiser4_mount(struct file_system_type *fs_type, int flags,
  68467. + const char *dev_name, void *data)
  68468. +{
  68469. + return mount_bdev(fs_type, flags, dev_name, data, fill_super);
  68470. +}
  68471. +
  68472. +/* structure describing the reiser4 filesystem implementation */
  68473. +static struct file_system_type reiser4_fs_type = {
  68474. + .owner = THIS_MODULE,
  68475. + .name = "reiser4",
  68476. + .fs_flags = FS_REQUIRES_DEV,
  68477. + .mount = reiser4_mount,
  68478. + .kill_sb = kill_block_super,
  68479. + .next = NULL
  68480. +};
  68481. +
  68482. +void destroy_reiser4_cache(struct kmem_cache **cachep)
  68483. +{
  68484. + BUG_ON(*cachep == NULL);
  68485. + kmem_cache_destroy(*cachep);
  68486. + *cachep = NULL;
  68487. +}
  68488. +
  68489. +/**
  68490. + * init_reiser4 - reiser4 initialization entry point
  68491. + *
  68492. + * Initializes reiser4 slabs, registers reiser4 filesystem type. It is called
  68493. + * on kernel initialization or during reiser4 module load.
  68494. + */
  68495. +static int __init init_reiser4(void)
  68496. +{
  68497. + int result;
  68498. +
  68499. + printk(KERN_INFO
  68500. + "Loading Reiser4 (format release: 4.%d.%d) "
  68501. + "See www.namesys.com for a description of Reiser4.\n",
  68502. + get_release_number_major(),
  68503. + get_release_number_minor());
  68504. +
  68505. + /* initialize slab cache of inodes */
  68506. + if ((result = init_inodes()) != 0)
  68507. + goto failed_inode_cache;
  68508. +
  68509. + /* initialize cache of znodes */
  68510. + if ((result = init_znodes()) != 0)
  68511. + goto failed_init_znodes;
  68512. +
  68513. + /* initialize all plugins */
  68514. + if ((result = init_plugins()) != 0)
  68515. + goto failed_init_plugins;
  68516. +
  68517. + /* initialize cache of plugin_set-s and plugin_set's hash table */
  68518. + if ((result = init_plugin_set()) != 0)
  68519. + goto failed_init_plugin_set;
  68520. +
  68521. + /* initialize caches of txn_atom-s and txn_handle-s */
  68522. + if ((result = init_txnmgr_static()) != 0)
  68523. + goto failed_init_txnmgr_static;
  68524. +
  68525. + /* initialize cache of jnodes */
  68526. + if ((result = init_jnodes()) != 0)
  68527. + goto failed_init_jnodes;
  68528. +
  68529. + /* initialize cache of flush queues */
  68530. + if ((result = reiser4_init_fqs()) != 0)
  68531. + goto failed_init_fqs;
  68532. +
  68533. + /* initialize cache of structures attached to dentry->d_fsdata */
  68534. + if ((result = reiser4_init_dentry_fsdata()) != 0)
  68535. + goto failed_init_dentry_fsdata;
  68536. +
  68537. + /* initialize cache of structures attached to file->private_data */
  68538. + if ((result = reiser4_init_file_fsdata()) != 0)
  68539. + goto failed_init_file_fsdata;
  68540. +
  68541. + /*
  68542. + * initialize cache of d_cursors. See plugin/file_ops_readdir.c for
  68543. + * more details
  68544. + */
  68545. + if ((result = reiser4_init_d_cursor()) != 0)
  68546. + goto failed_init_d_cursor;
  68547. +
  68548. + /* initialize cache of blocknr set entries */
  68549. + if ((result = blocknr_set_init_static()) != 0)
  68550. + goto failed_init_blocknr_set;
  68551. +
  68552. + /* initialize cache of blocknr list entries */
  68553. + if ((result = blocknr_list_init_static()) != 0)
  68554. + goto failed_init_blocknr_list;
  68555. +
  68556. + if ((result = register_filesystem(&reiser4_fs_type)) == 0) {
  68557. + reiser4_debugfs_root = debugfs_create_dir("reiser4", NULL);
  68558. + return 0;
  68559. + }
  68560. +
  68561. + blocknr_list_done_static();
  68562. + failed_init_blocknr_list:
  68563. + blocknr_set_done_static();
  68564. + failed_init_blocknr_set:
  68565. + reiser4_done_d_cursor();
  68566. + failed_init_d_cursor:
  68567. + reiser4_done_file_fsdata();
  68568. + failed_init_file_fsdata:
  68569. + reiser4_done_dentry_fsdata();
  68570. + failed_init_dentry_fsdata:
  68571. + reiser4_done_fqs();
  68572. + failed_init_fqs:
  68573. + done_jnodes();
  68574. + failed_init_jnodes:
  68575. + done_txnmgr_static();
  68576. + failed_init_txnmgr_static:
  68577. + done_plugin_set();
  68578. + failed_init_plugin_set:
  68579. + failed_init_plugins:
  68580. + done_znodes();
  68581. + failed_init_znodes:
  68582. + done_inodes();
  68583. + failed_inode_cache:
  68584. + return result;
  68585. +}
  68586. +
  68587. +/**
  68588. + * done_reiser4 - reiser4 exit entry point
  68589. + *
  68590. + * Unregister reiser4 filesystem type, deletes caches. It is called on shutdown
  68591. + * or at module unload.
  68592. + */
  68593. +static void __exit done_reiser4(void)
  68594. +{
  68595. + int result;
  68596. +
  68597. + debugfs_remove(reiser4_debugfs_root);
  68598. + result = unregister_filesystem(&reiser4_fs_type);
  68599. + BUG_ON(result != 0);
  68600. + blocknr_list_done_static();
  68601. + blocknr_set_done_static();
  68602. + reiser4_done_d_cursor();
  68603. + reiser4_done_file_fsdata();
  68604. + reiser4_done_dentry_fsdata();
  68605. + reiser4_done_fqs();
  68606. + done_jnodes();
  68607. + done_txnmgr_static();
  68608. + done_plugin_set();
  68609. + done_znodes();
  68610. + destroy_reiser4_cache(&inode_cache);
  68611. +}
  68612. +
  68613. +module_init(init_reiser4);
  68614. +module_exit(done_reiser4);
  68615. +
  68616. +MODULE_ALIAS_FS("reiser4");
  68617. +
  68618. +MODULE_DESCRIPTION("Reiser4 filesystem");
  68619. +MODULE_AUTHOR("Hans Reiser <Reiser@Namesys.COM>");
  68620. +
  68621. +MODULE_LICENSE("GPL");
  68622. +
  68623. +/*
  68624. + * Local variables:
  68625. + * c-indentation-style: "K&R"
  68626. + * mode-name: "LC"
  68627. + * c-basic-offset: 8
  68628. + * tab-width: 8
  68629. + * fill-column: 79
  68630. + * End:
  68631. + */
  68632. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/tap.c linux-4.14.2/fs/reiser4/tap.c
  68633. --- linux-4.14.2.orig/fs/reiser4/tap.c 1970-01-01 01:00:00.000000000 +0100
  68634. +++ linux-4.14.2/fs/reiser4/tap.c 2017-11-26 22:13:09.000000000 +0100
  68635. @@ -0,0 +1,376 @@
  68636. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  68637. + * reiser4/README */
  68638. +
  68639. +/*
  68640. + Tree Access Pointer (tap).
  68641. +
  68642. + tap is data structure combining coord and lock handle (mostly). It is
  68643. + useful when one has to scan tree nodes (for example, in readdir, or flush),
  68644. + for tap functions allow to move tap in either direction transparently
  68645. + crossing unit/item/node borders.
  68646. +
  68647. + Tap doesn't provide automatic synchronization of its fields as it is
  68648. + supposed to be per-thread object.
  68649. +*/
  68650. +
  68651. +#include "forward.h"
  68652. +#include "debug.h"
  68653. +#include "coord.h"
  68654. +#include "tree.h"
  68655. +#include "context.h"
  68656. +#include "tap.h"
  68657. +#include "znode.h"
  68658. +#include "tree_walk.h"
  68659. +
  68660. +#if REISER4_DEBUG
  68661. +static int tap_invariant(const tap_t *tap);
  68662. +static void tap_check(const tap_t *tap);
  68663. +#else
  68664. +#define tap_check(tap) noop
  68665. +#endif
  68666. +
  68667. +/** load node tap is pointing to, if not loaded already */
  68668. +int reiser4_tap_load(tap_t *tap)
  68669. +{
  68670. + tap_check(tap);
  68671. + if (tap->loaded == 0) {
  68672. + int result;
  68673. +
  68674. + result = zload_ra(tap->coord->node, &tap->ra_info);
  68675. + if (result != 0)
  68676. + return result;
  68677. + coord_clear_iplug(tap->coord);
  68678. + }
  68679. + ++tap->loaded;
  68680. + tap_check(tap);
  68681. + return 0;
  68682. +}
  68683. +
  68684. +/** release node tap is pointing to. Dual to tap_load() */
  68685. +void reiser4_tap_relse(tap_t *tap)
  68686. +{
  68687. + tap_check(tap);
  68688. + if (tap->loaded > 0) {
  68689. + --tap->loaded;
  68690. + if (tap->loaded == 0)
  68691. + zrelse(tap->coord->node);
  68692. + }
  68693. + tap_check(tap);
  68694. +}
  68695. +
  68696. +/**
  68697. + * init tap to consist of @coord and @lh. Locks on nodes will be acquired with
  68698. + * @mode
  68699. + */
  68700. +void reiser4_tap_init(tap_t *tap, coord_t *coord, lock_handle * lh,
  68701. + znode_lock_mode mode)
  68702. +{
  68703. + tap->coord = coord;
  68704. + tap->lh = lh;
  68705. + tap->mode = mode;
  68706. + tap->loaded = 0;
  68707. + INIT_LIST_HEAD(&tap->linkage);
  68708. + reiser4_init_ra_info(&tap->ra_info);
  68709. +}
  68710. +
  68711. +/** add @tap to the per-thread list of all taps */
  68712. +void reiser4_tap_monitor(tap_t *tap)
  68713. +{
  68714. + assert("nikita-2623", tap != NULL);
  68715. + tap_check(tap);
  68716. + list_add(&tap->linkage, reiser4_taps_list());
  68717. + tap_check(tap);
  68718. +}
  68719. +
  68720. +/* duplicate @src into @dst. Copy lock handle. @dst is not initially
  68721. + * loaded. */
  68722. +void reiser4_tap_copy(tap_t *dst, tap_t *src)
  68723. +{
  68724. + assert("nikita-3193", src != NULL);
  68725. + assert("nikita-3194", dst != NULL);
  68726. +
  68727. + *dst->coord = *src->coord;
  68728. + if (src->lh->node)
  68729. + copy_lh(dst->lh, src->lh);
  68730. + dst->mode = src->mode;
  68731. + dst->loaded = 0;
  68732. + INIT_LIST_HEAD(&dst->linkage);
  68733. + dst->ra_info = src->ra_info;
  68734. +}
  68735. +
  68736. +/** finish with @tap */
  68737. +void reiser4_tap_done(tap_t *tap)
  68738. +{
  68739. + assert("nikita-2565", tap != NULL);
  68740. + tap_check(tap);
  68741. + if (tap->loaded > 0)
  68742. + zrelse(tap->coord->node);
  68743. + done_lh(tap->lh);
  68744. + tap->loaded = 0;
  68745. + list_del_init(&tap->linkage);
  68746. + tap->coord->node = NULL;
  68747. +}
  68748. +
  68749. +/**
  68750. + * move @tap to the new node, locked with @target. Load @target, if @tap was
  68751. + * already loaded.
  68752. + */
  68753. +int reiser4_tap_move(tap_t *tap, lock_handle * target)
  68754. +{
  68755. + int result = 0;
  68756. +
  68757. + assert("nikita-2567", tap != NULL);
  68758. + assert("nikita-2568", target != NULL);
  68759. + assert("nikita-2570", target->node != NULL);
  68760. + assert("nikita-2569", tap->coord->node == tap->lh->node);
  68761. +
  68762. + tap_check(tap);
  68763. + if (tap->loaded > 0)
  68764. + result = zload_ra(target->node, &tap->ra_info);
  68765. +
  68766. + if (result == 0) {
  68767. + if (tap->loaded > 0)
  68768. + zrelse(tap->coord->node);
  68769. + done_lh(tap->lh);
  68770. + copy_lh(tap->lh, target);
  68771. + tap->coord->node = target->node;
  68772. + coord_clear_iplug(tap->coord);
  68773. + }
  68774. + tap_check(tap);
  68775. + return result;
  68776. +}
  68777. +
  68778. +/**
  68779. + * move @tap to @target. Acquire lock on @target, if @tap was already
  68780. + * loaded.
  68781. + */
  68782. +static int tap_to(tap_t *tap, znode * target)
  68783. +{
  68784. + int result;
  68785. +
  68786. + assert("nikita-2624", tap != NULL);
  68787. + assert("nikita-2625", target != NULL);
  68788. +
  68789. + tap_check(tap);
  68790. + result = 0;
  68791. + if (tap->coord->node != target) {
  68792. + lock_handle here;
  68793. +
  68794. + init_lh(&here);
  68795. + result = longterm_lock_znode(&here, target,
  68796. + tap->mode, ZNODE_LOCK_HIPRI);
  68797. + if (result == 0) {
  68798. + result = reiser4_tap_move(tap, &here);
  68799. + done_lh(&here);
  68800. + }
  68801. + }
  68802. + tap_check(tap);
  68803. + return result;
  68804. +}
  68805. +
  68806. +/**
  68807. + * move @tap to given @target, loading and locking @target->node if
  68808. + * necessary
  68809. + */
  68810. +int tap_to_coord(tap_t *tap, coord_t *target)
  68811. +{
  68812. + int result;
  68813. +
  68814. + tap_check(tap);
  68815. + result = tap_to(tap, target->node);
  68816. + if (result == 0)
  68817. + coord_dup(tap->coord, target);
  68818. + tap_check(tap);
  68819. + return result;
  68820. +}
  68821. +
  68822. +/** return list of all taps */
  68823. +struct list_head *reiser4_taps_list(void)
  68824. +{
  68825. + return &get_current_context()->taps;
  68826. +}
  68827. +
  68828. +/** helper function for go_{next,prev}_{item,unit,node}() */
  68829. +int go_dir_el(tap_t *tap, sideof dir, int units_p)
  68830. +{
  68831. + coord_t dup;
  68832. + coord_t *coord;
  68833. + int result;
  68834. +
  68835. + int (*coord_dir) (coord_t *);
  68836. + int (*get_dir_neighbor) (lock_handle *, znode *, int, int);
  68837. + void (*coord_init) (coord_t *, const znode *);
  68838. + ON_DEBUG(int (*coord_check) (const coord_t *));
  68839. +
  68840. + assert("nikita-2556", tap != NULL);
  68841. + assert("nikita-2557", tap->coord != NULL);
  68842. + assert("nikita-2558", tap->lh != NULL);
  68843. + assert("nikita-2559", tap->coord->node != NULL);
  68844. +
  68845. + tap_check(tap);
  68846. + if (dir == LEFT_SIDE) {
  68847. + coord_dir = units_p ? coord_prev_unit : coord_prev_item;
  68848. + get_dir_neighbor = reiser4_get_left_neighbor;
  68849. + coord_init = coord_init_last_unit;
  68850. + } else {
  68851. + coord_dir = units_p ? coord_next_unit : coord_next_item;
  68852. + get_dir_neighbor = reiser4_get_right_neighbor;
  68853. + coord_init = coord_init_first_unit;
  68854. + }
  68855. + ON_DEBUG(coord_check =
  68856. + units_p ? coord_is_existing_unit : coord_is_existing_item);
  68857. + assert("nikita-2560", coord_check(tap->coord));
  68858. +
  68859. + coord = tap->coord;
  68860. + coord_dup(&dup, coord);
  68861. + if (coord_dir(&dup) != 0) {
  68862. + do {
  68863. + /* move to the left neighboring node */
  68864. + lock_handle dup;
  68865. +
  68866. + init_lh(&dup);
  68867. + result =
  68868. + get_dir_neighbor(&dup, coord->node, (int)tap->mode,
  68869. + GN_CAN_USE_UPPER_LEVELS);
  68870. + if (result == 0) {
  68871. + result = reiser4_tap_move(tap, &dup);
  68872. + if (result == 0)
  68873. + coord_init(tap->coord, dup.node);
  68874. + done_lh(&dup);
  68875. + }
  68876. + /* skip empty nodes */
  68877. + } while ((result == 0) && node_is_empty(coord->node));
  68878. + } else {
  68879. + result = 0;
  68880. + coord_dup(coord, &dup);
  68881. + }
  68882. + assert("nikita-2564", ergo(!result, coord_check(tap->coord)));
  68883. + tap_check(tap);
  68884. + return result;
  68885. +}
  68886. +
  68887. +/**
  68888. + * move @tap to the next unit, transparently crossing item and node
  68889. + * boundaries
  68890. + */
  68891. +int go_next_unit(tap_t *tap)
  68892. +{
  68893. + return go_dir_el(tap, RIGHT_SIDE, 1);
  68894. +}
  68895. +
  68896. +/**
  68897. + * move @tap to the previous unit, transparently crossing item and node
  68898. + * boundaries
  68899. + */
  68900. +int go_prev_unit(tap_t *tap)
  68901. +{
  68902. + return go_dir_el(tap, LEFT_SIDE, 1);
  68903. +}
  68904. +
  68905. +/**
  68906. + * @shift times apply @actor to the @tap. This is used to move @tap by
  68907. + * @shift units (or items, or nodes) in either direction.
  68908. + */
  68909. +static int rewind_to(tap_t *tap, go_actor_t actor, int shift)
  68910. +{
  68911. + int result;
  68912. +
  68913. + assert("nikita-2555", shift >= 0);
  68914. + assert("nikita-2562", tap->coord->node == tap->lh->node);
  68915. +
  68916. + tap_check(tap);
  68917. + result = reiser4_tap_load(tap);
  68918. + if (result != 0)
  68919. + return result;
  68920. +
  68921. + for (; shift > 0; --shift) {
  68922. + result = actor(tap);
  68923. + assert("nikita-2563", tap->coord->node == tap->lh->node);
  68924. + if (result != 0)
  68925. + break;
  68926. + }
  68927. + reiser4_tap_relse(tap);
  68928. + tap_check(tap);
  68929. + return result;
  68930. +}
  68931. +
  68932. +/** move @tap @shift units rightward */
  68933. +int rewind_right(tap_t *tap, int shift)
  68934. +{
  68935. + return rewind_to(tap, go_next_unit, shift);
  68936. +}
  68937. +
  68938. +/** move @tap @shift units leftward */
  68939. +int rewind_left(tap_t *tap, int shift)
  68940. +{
  68941. + return rewind_to(tap, go_prev_unit, shift);
  68942. +}
  68943. +
  68944. +#if REISER4_DEBUG
  68945. +/** debugging function: print @tap content in human readable form */
  68946. +static void print_tap(const char *prefix, const tap_t *tap)
  68947. +{
  68948. + if (tap == NULL) {
  68949. + printk("%s: null tap\n", prefix);
  68950. + return;
  68951. + }
  68952. + printk("%s: loaded: %i, in-list: %i, node: %p, mode: %s\n", prefix,
  68953. + tap->loaded, (&tap->linkage == tap->linkage.next &&
  68954. + &tap->linkage == tap->linkage.prev),
  68955. + tap->lh->node,
  68956. + lock_mode_name(tap->mode));
  68957. + print_coord("\tcoord", tap->coord, 0);
  68958. +}
  68959. +
  68960. +/** check [tap-sane] invariant */
  68961. +static int tap_invariant(const tap_t *tap)
  68962. +{
  68963. + /* [tap-sane] invariant */
  68964. +
  68965. + if (tap == NULL)
  68966. + return 1;
  68967. + /* tap->mode is one of
  68968. + *
  68969. + * {ZNODE_NO_LOCK, ZNODE_READ_LOCK, ZNODE_WRITE_LOCK}, and
  68970. + */
  68971. + if (tap->mode != ZNODE_NO_LOCK &&
  68972. + tap->mode != ZNODE_READ_LOCK && tap->mode != ZNODE_WRITE_LOCK)
  68973. + return 2;
  68974. + /* tap->coord != NULL, and */
  68975. + if (tap->coord == NULL)
  68976. + return 3;
  68977. + /* tap->lh != NULL, and */
  68978. + if (tap->lh == NULL)
  68979. + return 4;
  68980. + /* tap->loaded > 0 => znode_is_loaded(tap->coord->node), and */
  68981. + if (!ergo(tap->loaded, znode_is_loaded(tap->coord->node)))
  68982. + return 5;
  68983. + /* tap->coord->node == tap->lh->node if tap->lh->node is not 0 */
  68984. + if (tap->lh->node != NULL && tap->coord->node != tap->lh->node)
  68985. + return 6;
  68986. + return 0;
  68987. +}
  68988. +
  68989. +/** debugging function: check internal @tap consistency */
  68990. +static void tap_check(const tap_t *tap)
  68991. +{
  68992. + int result;
  68993. +
  68994. + result = tap_invariant(tap);
  68995. + if (result != 0) {
  68996. + print_tap("broken", tap);
  68997. + reiser4_panic("nikita-2831", "tap broken: %i\n", result);
  68998. + }
  68999. +}
  69000. +#endif
  69001. +
  69002. +/* Make Linus happy.
  69003. + Local variables:
  69004. + c-indentation-style: "K&R"
  69005. + mode-name: "LC"
  69006. + c-basic-offset: 8
  69007. + tab-width: 8
  69008. + fill-column: 120
  69009. + scroll-step: 1
  69010. + End:
  69011. +*/
  69012. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/tap.h linux-4.14.2/fs/reiser4/tap.h
  69013. --- linux-4.14.2.orig/fs/reiser4/tap.h 1970-01-01 01:00:00.000000000 +0100
  69014. +++ linux-4.14.2/fs/reiser4/tap.h 2017-11-26 22:13:09.000000000 +0100
  69015. @@ -0,0 +1,70 @@
  69016. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  69017. +
  69018. +/* Tree Access Pointers. See tap.c for more details. */
  69019. +
  69020. +#if !defined(__REISER4_TAP_H__)
  69021. +#define __REISER4_TAP_H__
  69022. +
  69023. +#include "forward.h"
  69024. +#include "readahead.h"
  69025. +
  69026. +/**
  69027. + tree_access_pointer aka tap. Data structure combining coord_t and lock
  69028. + handle.
  69029. + Invariants involving this data-type, see doc/lock-ordering for details:
  69030. +
  69031. + [tap-sane]
  69032. + */
  69033. +struct tree_access_pointer {
  69034. + /* coord tap is at */
  69035. + coord_t *coord;
  69036. + /* lock handle on ->coord->node */
  69037. + lock_handle *lh;
  69038. + /* mode of lock acquired by this tap */
  69039. + znode_lock_mode mode;
  69040. + /* incremented by reiser4_tap_load().
  69041. + Decremented by reiser4_tap_relse(). */
  69042. + int loaded;
  69043. + /* list of taps */
  69044. + struct list_head linkage;
  69045. + /* read-ahead hint */
  69046. + ra_info_t ra_info;
  69047. +};
  69048. +
  69049. +typedef int (*go_actor_t) (tap_t *tap);
  69050. +
  69051. +extern int reiser4_tap_load(tap_t *tap);
  69052. +extern void reiser4_tap_relse(tap_t *tap);
  69053. +extern void reiser4_tap_init(tap_t *tap, coord_t *coord, lock_handle * lh,
  69054. + znode_lock_mode mode);
  69055. +extern void reiser4_tap_monitor(tap_t *tap);
  69056. +extern void reiser4_tap_copy(tap_t *dst, tap_t *src);
  69057. +extern void reiser4_tap_done(tap_t *tap);
  69058. +extern int reiser4_tap_move(tap_t *tap, lock_handle * target);
  69059. +extern int tap_to_coord(tap_t *tap, coord_t *target);
  69060. +
  69061. +extern int go_dir_el(tap_t *tap, sideof dir, int units_p);
  69062. +extern int go_next_unit(tap_t *tap);
  69063. +extern int go_prev_unit(tap_t *tap);
  69064. +extern int rewind_right(tap_t *tap, int shift);
  69065. +extern int rewind_left(tap_t *tap, int shift);
  69066. +
  69067. +extern struct list_head *reiser4_taps_list(void);
  69068. +
  69069. +#define for_all_taps(tap) \
  69070. + for (tap = list_entry(reiser4_taps_list()->next, tap_t, linkage); \
  69071. + reiser4_taps_list() != &tap->linkage; \
  69072. + tap = list_entry(tap->linkage.next, tap_t, linkage))
  69073. +
  69074. +/* __REISER4_TAP_H__ */
  69075. +#endif
  69076. +/* Make Linus happy.
  69077. + Local variables:
  69078. + c-indentation-style: "K&R"
  69079. + mode-name: "LC"
  69080. + c-basic-offset: 8
  69081. + tab-width: 8
  69082. + fill-column: 120
  69083. + scroll-step: 1
  69084. + End:
  69085. +*/
  69086. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/tree.c linux-4.14.2/fs/reiser4/tree.c
  69087. --- linux-4.14.2.orig/fs/reiser4/tree.c 1970-01-01 01:00:00.000000000 +0100
  69088. +++ linux-4.14.2/fs/reiser4/tree.c 2017-11-26 22:13:09.000000000 +0100
  69089. @@ -0,0 +1,1884 @@
  69090. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  69091. + * reiser4/README */
  69092. +
  69093. +/*
  69094. + * KEYS IN A TREE.
  69095. + *
  69096. + * The tree consists of nodes located on the disk. Node in the tree is either
  69097. + * formatted or unformatted. Formatted node is one that has structure
  69098. + * understood by the tree balancing and traversal code. Formatted nodes are
  69099. + * further classified into leaf and internal nodes. Latter distinctions is
  69100. + * (almost) of only historical importance: general structure of leaves and
  69101. + * internal nodes is the same in Reiser4. Unformatted nodes contain raw data
  69102. + * that are part of bodies of ordinary files and attributes.
  69103. + *
  69104. + * Each node in the tree spawns some interval in the key space. Key ranges for
  69105. + * all nodes in the tree are disjoint. Actually, this only holds in some weak
  69106. + * sense, because of the non-unique keys: intersection of key ranges for
  69107. + * different nodes is either empty, or consists of exactly one key.
  69108. + *
  69109. + * Formatted node consists of a sequence of items. Each item spawns some
  69110. + * interval in key space. Key ranges for all items in a tree are disjoint,
  69111. + * modulo non-unique keys again. Items within nodes are ordered in the key
  69112. + * order of the smallest key in a item.
  69113. + *
  69114. + * Particular type of item can be further split into units. Unit is piece of
  69115. + * item that can be cut from item and moved into another item of the same
  69116. + * time. Units are used by balancing code to repack data during balancing.
  69117. + *
  69118. + * Unit can be further split into smaller entities (for example, extent unit
  69119. + * represents several pages, and it is natural for extent code to operate on
  69120. + * particular pages and even bytes within one unit), but this is of no
  69121. + * relevance to the generic balancing and lookup code.
  69122. + *
  69123. + * Although item is said to "spawn" range or interval of keys, it is not
  69124. + * necessary that item contains piece of data addressable by each and every
  69125. + * key in this range. For example, compound directory item, consisting of
  69126. + * units corresponding to directory entries and keyed by hashes of file names,
  69127. + * looks more as having "discrete spectrum": only some disjoint keys inside
  69128. + * range occupied by this item really address data.
  69129. + *
  69130. + * No than less, each item always has well-defined least (minimal) key, that
  69131. + * is recorded in item header, stored in the node this item is in. Also, item
  69132. + * plugin can optionally define method ->max_key_inside() returning maximal
  69133. + * key that can _possibly_ be located within this item. This method is used
  69134. + * (mainly) to determine when given piece of data should be merged into
  69135. + * existing item, in stead of creating new one. Because of this, even though
  69136. + * ->max_key_inside() can be larger that any key actually located in the item,
  69137. + * intervals
  69138. + *
  69139. + * [ reiser4_min_key( item ), ->max_key_inside( item ) ]
  69140. + *
  69141. + * are still disjoint for all items within the _same_ node.
  69142. + *
  69143. + * In memory node is represented by znode. It plays several roles:
  69144. + *
  69145. + * . something locks are taken on
  69146. + *
  69147. + * . something tracked by transaction manager (this is going to change)
  69148. + *
  69149. + * . something used to access node data
  69150. + *
  69151. + * . something used to maintain tree structure in memory: sibling and
  69152. + * parental linkage.
  69153. + *
  69154. + * . something used to organize nodes into "slums"
  69155. + *
  69156. + * More on znodes see in znode.[ch]
  69157. + *
  69158. + * DELIMITING KEYS
  69159. + *
  69160. + * To simplify balancing, allow some flexibility in locking and speed up
  69161. + * important coord cache optimization, we keep delimiting keys of nodes in
  69162. + * memory. Depending on disk format (implemented by appropriate node plugin)
  69163. + * node on disk can record both left and right delimiting key, only one of
  69164. + * them, or none. Still, our balancing and tree traversal code keep both
  69165. + * delimiting keys for a node that is in memory stored in the znode. When
  69166. + * node is first brought into memory during tree traversal, its left
  69167. + * delimiting key is taken from its parent, and its right delimiting key is
  69168. + * either next key in its parent, or is right delimiting key of parent if
  69169. + * node is the rightmost child of parent.
  69170. + *
  69171. + * Physical consistency of delimiting key is protected by special dk
  69172. + * read-write lock. That is, delimiting keys can only be inspected or
  69173. + * modified under this lock. But dk lock is only sufficient for fast
  69174. + * "pessimistic" check, because to simplify code and to decrease lock
  69175. + * contention, balancing (carry) only updates delimiting keys right before
  69176. + * unlocking all locked nodes on the given tree level. For example,
  69177. + * coord-by-key cache scans LRU list of recently accessed znodes. For each
  69178. + * node it first does fast check under dk spin lock. If key looked for is
  69179. + * not between delimiting keys for this node, next node is inspected and so
  69180. + * on. If key is inside of the key range, long term lock is taken on node
  69181. + * and key range is rechecked.
  69182. + *
  69183. + * COORDINATES
  69184. + *
  69185. + * To find something in the tree, you supply a key, and the key is resolved
  69186. + * by coord_by_key() into a coord (coordinate) that is valid as long as the
  69187. + * node the coord points to remains locked. As mentioned above trees
  69188. + * consist of nodes that consist of items that consist of units. A unit is
  69189. + * the smallest and indivisible piece of tree as far as balancing and tree
  69190. + * search are concerned. Each node, item, and unit can be addressed by
  69191. + * giving its level in the tree and the key occupied by this entity. A node
  69192. + * knows what the key ranges are of the items within it, and how to find its
  69193. + * items and invoke their item handlers, but it does not know how to access
  69194. + * individual units within its items except through the item handlers.
  69195. + * coord is a structure containing a pointer to the node, the ordinal number
  69196. + * of the item within this node (a sort of item offset), and the ordinal
  69197. + * number of the unit within this item.
  69198. + *
  69199. + * TREE LOOKUP
  69200. + *
  69201. + * There are two types of access to the tree: lookup and modification.
  69202. + *
  69203. + * Lookup is a search for the key in the tree. Search can look for either
  69204. + * exactly the key given to it, or for the largest key that is not greater
  69205. + * than the key given to it. This distinction is determined by "bias"
  69206. + * parameter of search routine (coord_by_key()). coord_by_key() either
  69207. + * returns error (key is not in the tree, or some kind of external error
  69208. + * occurred), or successfully resolves key into coord.
  69209. + *
  69210. + * This resolution is done by traversing tree top-to-bottom from root level
  69211. + * to the desired level. On levels above twig level (level one above the
  69212. + * leaf level) nodes consist exclusively of internal items. Internal item is
  69213. + * nothing more than pointer to the tree node on the child level. On twig
  69214. + * level nodes consist of internal items intermixed with extent
  69215. + * items. Internal items form normal search tree structure used by traversal
  69216. + * to descent through the tree.
  69217. + *
  69218. + * TREE LOOKUP OPTIMIZATIONS
  69219. + *
  69220. + * Tree lookup described above is expensive even if all nodes traversed are
  69221. + * already in the memory: for each node binary search within it has to be
  69222. + * performed and binary searches are CPU consuming and tend to destroy CPU
  69223. + * caches.
  69224. + *
  69225. + * Several optimizations are used to work around this:
  69226. + *
  69227. + * . cbk_cache (look-aside cache for tree traversals, see search.c for
  69228. + * details)
  69229. + *
  69230. + * . seals (see seal.[ch])
  69231. + *
  69232. + * . vroot (see search.c)
  69233. + *
  69234. + * General search-by-key is layered thusly:
  69235. + *
  69236. + * [check seal, if any] --ok--> done
  69237. + * |
  69238. + * failed
  69239. + * |
  69240. + * V
  69241. + * [vroot defined] --no--> node = tree_root
  69242. + * | |
  69243. + * yes |
  69244. + * | |
  69245. + * V |
  69246. + * node = vroot |
  69247. + * | |
  69248. + * | |
  69249. + * | |
  69250. + * V V
  69251. + * [check cbk_cache for key] --ok--> done
  69252. + * |
  69253. + * failed
  69254. + * |
  69255. + * V
  69256. + * [start tree traversal from node]
  69257. + *
  69258. + */
  69259. +
  69260. +#include "forward.h"
  69261. +#include "debug.h"
  69262. +#include "dformat.h"
  69263. +#include "key.h"
  69264. +#include "coord.h"
  69265. +#include "plugin/item/static_stat.h"
  69266. +#include "plugin/item/item.h"
  69267. +#include "plugin/node/node.h"
  69268. +#include "plugin/plugin.h"
  69269. +#include "txnmgr.h"
  69270. +#include "jnode.h"
  69271. +#include "znode.h"
  69272. +#include "block_alloc.h"
  69273. +#include "tree_walk.h"
  69274. +#include "carry.h"
  69275. +#include "carry_ops.h"
  69276. +#include "tap.h"
  69277. +#include "tree.h"
  69278. +#include "vfs_ops.h"
  69279. +#include "page_cache.h"
  69280. +#include "super.h"
  69281. +#include "reiser4.h"
  69282. +#include "inode.h"
  69283. +
  69284. +#include <linux/fs.h> /* for struct super_block */
  69285. +#include <linux/spinlock.h>
  69286. +
  69287. +/* Disk address (block number) never ever used for any real tree node. This is
  69288. + used as block number of "uber" znode.
  69289. +
  69290. + Invalid block addresses are 0 by tradition.
  69291. +
  69292. +*/
  69293. +const reiser4_block_nr UBER_TREE_ADDR = 0ull;
  69294. +
  69295. +#define CUT_TREE_MIN_ITERATIONS 64
  69296. +
  69297. +static int find_child_by_addr(znode * parent, znode * child, coord_t *result);
  69298. +
  69299. +/* return node plugin of coord->node */
  69300. +node_plugin *node_plugin_by_coord(const coord_t *coord)
  69301. +{
  69302. + assert("vs-1", coord != NULL);
  69303. + assert("vs-2", coord->node != NULL);
  69304. +
  69305. + return coord->node->nplug;
  69306. +}
  69307. +
  69308. +/* insert item into tree. Fields of @coord are updated so that they can be
  69309. + * used by consequent insert operation. */
  69310. +insert_result insert_by_key(reiser4_tree * tree /* tree to insert new item
  69311. + * into */ ,
  69312. + const reiser4_key * key /* key of new item */ ,
  69313. + reiser4_item_data * data /* parameters for item
  69314. + * creation */ ,
  69315. + coord_t *coord /* resulting insertion coord */ ,
  69316. + lock_handle * lh /* resulting lock
  69317. + * handle */ ,
  69318. + tree_level stop_level /* level where to insert */ ,
  69319. + __u32 flags/* insertion flags */)
  69320. +{
  69321. + int result;
  69322. +
  69323. + assert("nikita-358", tree != NULL);
  69324. + assert("nikita-360", coord != NULL);
  69325. +
  69326. + result = coord_by_key(tree, key, coord, lh, ZNODE_WRITE_LOCK,
  69327. + FIND_EXACT, stop_level, stop_level,
  69328. + flags | CBK_FOR_INSERT, NULL/*ra_info */);
  69329. + switch (result) {
  69330. + default:
  69331. + break;
  69332. + case CBK_COORD_FOUND:
  69333. + result = IBK_ALREADY_EXISTS;
  69334. + break;
  69335. + case CBK_COORD_NOTFOUND:
  69336. + assert("nikita-2017", coord->node != NULL);
  69337. + result = insert_by_coord(coord, data, key, lh, 0/*flags */);
  69338. + break;
  69339. + }
  69340. + return result;
  69341. +}
  69342. +
  69343. +/* insert item by calling carry. Helper function called if short-cut
  69344. + insertion failed */
  69345. +static insert_result insert_with_carry_by_coord(coord_t *coord,
  69346. + /* coord where to insert */
  69347. + lock_handle * lh,
  69348. + /* lock handle of insertion node */
  69349. + reiser4_item_data * data,
  69350. + /* parameters of new item */
  69351. + const reiser4_key * key,
  69352. + /* key of new item */
  69353. + carry_opcode cop,
  69354. + /* carry operation to perform */
  69355. + cop_insert_flag flags
  69356. + /* carry flags */ )
  69357. +{
  69358. + int result;
  69359. + carry_pool *pool;
  69360. + carry_level *lowest_level;
  69361. + carry_insert_data *cdata;
  69362. + carry_op *op;
  69363. +
  69364. + assert("umka-314", coord != NULL);
  69365. +
  69366. + /* allocate carry_pool and 3 carry_level-s */
  69367. + pool =
  69368. + init_carry_pool(sizeof(*pool) + 3 * sizeof(*lowest_level) +
  69369. + sizeof(*cdata));
  69370. + if (IS_ERR(pool))
  69371. + return PTR_ERR(pool);
  69372. + lowest_level = (carry_level *) (pool + 1);
  69373. + init_carry_level(lowest_level, pool);
  69374. +
  69375. + op = reiser4_post_carry(lowest_level, cop, coord->node, 0);
  69376. + if (IS_ERR(op) || (op == NULL)) {
  69377. + done_carry_pool(pool);
  69378. + return RETERR(op ? PTR_ERR(op) : -EIO);
  69379. + }
  69380. + cdata = (carry_insert_data *) (lowest_level + 3);
  69381. + cdata->coord = coord;
  69382. + cdata->data = data;
  69383. + cdata->key = key;
  69384. + op->u.insert.d = cdata;
  69385. + if (flags == 0)
  69386. + flags = znode_get_tree(coord->node)->carry.insert_flags;
  69387. + op->u.insert.flags = flags;
  69388. + op->u.insert.type = COPT_ITEM_DATA;
  69389. + op->u.insert.child = NULL;
  69390. + if (lh != NULL) {
  69391. + assert("nikita-3245", lh->node == coord->node);
  69392. + lowest_level->track_type = CARRY_TRACK_CHANGE;
  69393. + lowest_level->tracked = lh;
  69394. + }
  69395. +
  69396. + result = reiser4_carry(lowest_level, NULL);
  69397. + done_carry_pool(pool);
  69398. +
  69399. + return result;
  69400. +}
  69401. +
  69402. +/* form carry queue to perform paste of @data with @key at @coord, and launch
  69403. + its execution by calling carry().
  69404. +
  69405. + Instruct carry to update @lh it after balancing insertion coord moves into
  69406. + different block.
  69407. +
  69408. +*/
  69409. +static int paste_with_carry(coord_t *coord, /* coord of paste */
  69410. + lock_handle * lh, /* lock handle of node
  69411. + * where item is
  69412. + * pasted */
  69413. + reiser4_item_data * data, /* parameters of new
  69414. + * item */
  69415. + const reiser4_key * key, /* key of new item */
  69416. + unsigned flags/* paste flags */)
  69417. +{
  69418. + int result;
  69419. + carry_pool *pool;
  69420. + carry_level *lowest_level;
  69421. + carry_insert_data *cdata;
  69422. + carry_op *op;
  69423. +
  69424. + assert("umka-315", coord != NULL);
  69425. + assert("umka-316", key != NULL);
  69426. +
  69427. + pool =
  69428. + init_carry_pool(sizeof(*pool) + 3 * sizeof(*lowest_level) +
  69429. + sizeof(*cdata));
  69430. + if (IS_ERR(pool))
  69431. + return PTR_ERR(pool);
  69432. + lowest_level = (carry_level *) (pool + 1);
  69433. + init_carry_level(lowest_level, pool);
  69434. +
  69435. + op = reiser4_post_carry(lowest_level, COP_PASTE, coord->node, 0);
  69436. + if (IS_ERR(op) || (op == NULL)) {
  69437. + done_carry_pool(pool);
  69438. + return RETERR(op ? PTR_ERR(op) : -EIO);
  69439. + }
  69440. + cdata = (carry_insert_data *) (lowest_level + 3);
  69441. + cdata->coord = coord;
  69442. + cdata->data = data;
  69443. + cdata->key = key;
  69444. + op->u.paste.d = cdata;
  69445. + if (flags == 0)
  69446. + flags = znode_get_tree(coord->node)->carry.paste_flags;
  69447. + op->u.paste.flags = flags;
  69448. + op->u.paste.type = COPT_ITEM_DATA;
  69449. + if (lh != NULL) {
  69450. + lowest_level->track_type = CARRY_TRACK_CHANGE;
  69451. + lowest_level->tracked = lh;
  69452. + }
  69453. +
  69454. + result = reiser4_carry(lowest_level, NULL);
  69455. + done_carry_pool(pool);
  69456. +
  69457. + return result;
  69458. +}
  69459. +
  69460. +/* insert item at the given coord.
  69461. +
  69462. + First try to skip carry by directly calling ->create_item() method of node
  69463. + plugin. If this is impossible (there is not enough free space in the node,
  69464. + or leftmost item in the node is created), call insert_with_carry_by_coord()
  69465. + that will do full carry().
  69466. +
  69467. +*/
  69468. +insert_result insert_by_coord(coord_t *coord /* coord where to
  69469. + * insert. coord->node has
  69470. + * to be write locked by
  69471. + * caller */ ,
  69472. + reiser4_item_data * data /* data to be
  69473. + * inserted */ ,
  69474. + const reiser4_key * key /* key of new item */ ,
  69475. + lock_handle * lh /* lock handle of write
  69476. + * lock on node */ ,
  69477. + __u32 flags/* insertion flags */)
  69478. +{
  69479. + unsigned item_size;
  69480. + int result;
  69481. + znode *node;
  69482. +
  69483. + assert("vs-247", coord != NULL);
  69484. + assert("vs-248", data != NULL);
  69485. + assert("vs-249", data->length >= 0);
  69486. + assert("nikita-1191", znode_is_write_locked(coord->node));
  69487. +
  69488. + node = coord->node;
  69489. + coord_clear_iplug(coord);
  69490. + result = zload(node);
  69491. + if (result != 0)
  69492. + return result;
  69493. +
  69494. + item_size = space_needed(node, NULL, data, 1);
  69495. + if (item_size > znode_free_space(node) &&
  69496. + (flags & COPI_DONT_SHIFT_LEFT) && (flags & COPI_DONT_SHIFT_RIGHT)
  69497. + && (flags & COPI_DONT_ALLOCATE)) {
  69498. + /* we are forced to use free space of coord->node and new item
  69499. + does not fit into it.
  69500. +
  69501. + Currently we get here only when we allocate and copy units
  69502. + of extent item from a node to its left neighbor during
  69503. + "squalloc"-ing. If @node (this is left neighbor) does not
  69504. + have enough free space - we do not want to attempt any
  69505. + shifting and allocations because we are in squeezing and
  69506. + everything to the left of @node is tightly packed.
  69507. + */
  69508. + result = -E_NODE_FULL;
  69509. + } else if ((item_size <= znode_free_space(node)) &&
  69510. + !coord_is_before_leftmost(coord) &&
  69511. + (node_plugin_by_node(node)->fast_insert != NULL)
  69512. + && node_plugin_by_node(node)->fast_insert(coord)) {
  69513. + /* shortcut insertion without carry() overhead.
  69514. +
  69515. + Only possible if:
  69516. +
  69517. + - there is enough free space
  69518. +
  69519. + - insertion is not into the leftmost position in a node
  69520. + (otherwise it would require updating of delimiting key in a
  69521. + parent)
  69522. +
  69523. + - node plugin agrees with this
  69524. +
  69525. + */
  69526. + result =
  69527. + node_plugin_by_node(node)->create_item(coord, key, data,
  69528. + NULL);
  69529. + znode_make_dirty(node);
  69530. + } else {
  69531. + /* otherwise do full-fledged carry(). */
  69532. + result =
  69533. + insert_with_carry_by_coord(coord, lh, data, key, COP_INSERT,
  69534. + flags);
  69535. + }
  69536. + zrelse(node);
  69537. + return result;
  69538. +}
  69539. +
  69540. +/* @coord is set to leaf level and @data is to be inserted to twig level */
  69541. +insert_result
  69542. +insert_extent_by_coord(coord_t *coord, /* coord where to insert.
  69543. + * coord->node has to be write
  69544. + * locked by caller */
  69545. + reiser4_item_data *data,/* data to be inserted */
  69546. + const reiser4_key *key, /* key of new item */
  69547. + lock_handle *lh /* lock handle of write lock
  69548. + on node */)
  69549. +{
  69550. + assert("vs-405", coord != NULL);
  69551. + assert("vs-406", data != NULL);
  69552. + assert("vs-407", data->length > 0);
  69553. + assert("vs-408", znode_is_write_locked(coord->node));
  69554. + assert("vs-409", znode_get_level(coord->node) == LEAF_LEVEL);
  69555. +
  69556. + return insert_with_carry_by_coord(coord, lh, data, key, COP_EXTENT,
  69557. + 0 /*flags */ );
  69558. +}
  69559. +
  69560. +/* Insert into the item at the given coord.
  69561. +
  69562. + First try to skip carry by directly calling ->paste() method of item
  69563. + plugin. If this is impossible (there is not enough free space in the node,
  69564. + or we are pasting into leftmost position in the node), call
  69565. + paste_with_carry() that will do full carry().
  69566. +
  69567. +*/
  69568. +/* paste_into_item */
  69569. +int insert_into_item(coord_t * coord /* coord of pasting */ ,
  69570. + lock_handle * lh /* lock handle on node involved */ ,
  69571. + const reiser4_key * key /* key of unit being pasted */ ,
  69572. + reiser4_item_data * data /* parameters for new unit */ ,
  69573. + unsigned flags /* insert/paste flags */ )
  69574. +{
  69575. + int result;
  69576. + int size_change;
  69577. + node_plugin *nplug;
  69578. + item_plugin *iplug;
  69579. +
  69580. + assert("umka-317", coord != NULL);
  69581. + assert("umka-318", key != NULL);
  69582. +
  69583. + iplug = item_plugin_by_coord(coord);
  69584. + nplug = node_plugin_by_coord(coord);
  69585. +
  69586. + assert("nikita-1480", iplug == data->iplug);
  69587. +
  69588. + size_change = space_needed(coord->node, coord, data, 0);
  69589. + if (size_change > (int)znode_free_space(coord->node) &&
  69590. + (flags & COPI_DONT_SHIFT_LEFT) && (flags & COPI_DONT_SHIFT_RIGHT)
  69591. + && (flags & COPI_DONT_ALLOCATE)) {
  69592. + /* we are forced to use free space of coord->node and new data
  69593. + does not fit into it. */
  69594. + return -E_NODE_FULL;
  69595. + }
  69596. +
  69597. + /* shortcut paste without carry() overhead.
  69598. +
  69599. + Only possible if:
  69600. +
  69601. + - there is enough free space
  69602. +
  69603. + - paste is not into the leftmost unit in a node (otherwise
  69604. + it would require updating of delimiting key in a parent)
  69605. +
  69606. + - node plugin agrees with this
  69607. +
  69608. + - item plugin agrees with us
  69609. + */
  69610. + if (size_change <= (int)znode_free_space(coord->node) &&
  69611. + (coord->item_pos != 0 ||
  69612. + coord->unit_pos != 0 || coord->between == AFTER_UNIT) &&
  69613. + coord->unit_pos != 0 && nplug->fast_paste != NULL &&
  69614. + nplug->fast_paste(coord) &&
  69615. + iplug->b.fast_paste != NULL && iplug->b.fast_paste(coord)) {
  69616. + if (size_change > 0)
  69617. + nplug->change_item_size(coord, size_change);
  69618. + /* NOTE-NIKITA: huh? where @key is used? */
  69619. + result = iplug->b.paste(coord, data, NULL);
  69620. + if (size_change < 0)
  69621. + nplug->change_item_size(coord, size_change);
  69622. + znode_make_dirty(coord->node);
  69623. + } else
  69624. + /* otherwise do full-fledged carry(). */
  69625. + result = paste_with_carry(coord, lh, data, key, flags);
  69626. + return result;
  69627. +}
  69628. +
  69629. +/* this either appends or truncates item @coord */
  69630. +int reiser4_resize_item(coord_t * coord /* coord of item being resized */ ,
  69631. + reiser4_item_data * data /* parameters of resize */ ,
  69632. + reiser4_key * key /* key of new unit */ ,
  69633. + lock_handle * lh /* lock handle of node
  69634. + * being modified */ ,
  69635. + cop_insert_flag flags /* carry flags */ )
  69636. +{
  69637. + int result;
  69638. + znode *node;
  69639. +
  69640. + assert("nikita-362", coord != NULL);
  69641. + assert("nikita-363", data != NULL);
  69642. + assert("vs-245", data->length != 0);
  69643. +
  69644. + node = coord->node;
  69645. + coord_clear_iplug(coord);
  69646. + result = zload(node);
  69647. + if (result != 0)
  69648. + return result;
  69649. +
  69650. + if (data->length < 0)
  69651. + result = node_plugin_by_coord(coord)->shrink_item(coord,
  69652. + -data->length);
  69653. + else
  69654. + result = insert_into_item(coord, lh, key, data, flags);
  69655. +
  69656. + zrelse(node);
  69657. + return result;
  69658. +}
  69659. +
  69660. +/* insert flow @f */
  69661. +int reiser4_insert_flow(coord_t * coord, lock_handle * lh, flow_t * f)
  69662. +{
  69663. + int result;
  69664. + carry_pool *pool;
  69665. + carry_level *lowest_level;
  69666. + reiser4_item_data *data;
  69667. + carry_op *op;
  69668. +
  69669. + pool =
  69670. + init_carry_pool(sizeof(*pool) + 3 * sizeof(*lowest_level) +
  69671. + sizeof(*data));
  69672. + if (IS_ERR(pool))
  69673. + return PTR_ERR(pool);
  69674. + lowest_level = (carry_level *) (pool + 1);
  69675. + init_carry_level(lowest_level, pool);
  69676. +
  69677. + op = reiser4_post_carry(lowest_level, COP_INSERT_FLOW, coord->node,
  69678. + 0 /* operate directly on coord -> node */ );
  69679. + if (IS_ERR(op) || (op == NULL)) {
  69680. + done_carry_pool(pool);
  69681. + return RETERR(op ? PTR_ERR(op) : -EIO);
  69682. + }
  69683. +
  69684. + /* these are permanent during insert_flow */
  69685. + data = (reiser4_item_data *) (lowest_level + 3);
  69686. + data->user = 1;
  69687. + data->iplug = item_plugin_by_id(FORMATTING_ID);
  69688. + data->arg = NULL;
  69689. + /* data.length and data.data will be set before calling paste or
  69690. + insert */
  69691. + data->length = 0;
  69692. + data->data = NULL;
  69693. +
  69694. + op->u.insert_flow.flags = 0;
  69695. + op->u.insert_flow.insert_point = coord;
  69696. + op->u.insert_flow.flow = f;
  69697. + op->u.insert_flow.data = data;
  69698. + op->u.insert_flow.new_nodes = 0;
  69699. +
  69700. + lowest_level->track_type = CARRY_TRACK_CHANGE;
  69701. + lowest_level->tracked = lh;
  69702. +
  69703. + result = reiser4_carry(lowest_level, NULL);
  69704. + done_carry_pool(pool);
  69705. +
  69706. + return result;
  69707. +}
  69708. +
  69709. +/* Given a coord in parent node, obtain a znode for the corresponding child */
  69710. +znode *child_znode(const coord_t * parent_coord /* coord of pointer to
  69711. + * child */ ,
  69712. + znode * parent /* parent of child */ ,
  69713. + int incore_p /* if !0 only return child if already in
  69714. + * memory */ ,
  69715. + int setup_dkeys_p /* if !0 update delimiting keys of
  69716. + * child */ )
  69717. +{
  69718. + znode *child;
  69719. +
  69720. + assert("nikita-1374", parent_coord != NULL);
  69721. + assert("nikita-1482", parent != NULL);
  69722. +#if REISER4_DEBUG
  69723. + if (setup_dkeys_p)
  69724. + assert_rw_not_locked(&(znode_get_tree(parent)->dk_lock));
  69725. +#endif
  69726. + assert("nikita-2947", znode_is_any_locked(parent));
  69727. +
  69728. + if (znode_get_level(parent) <= LEAF_LEVEL) {
  69729. + /* trying to get child of leaf node */
  69730. + warning("nikita-1217", "Child of maize?");
  69731. + return ERR_PTR(RETERR(-EIO));
  69732. + }
  69733. + if (item_is_internal(parent_coord)) {
  69734. + reiser4_block_nr addr;
  69735. + item_plugin *iplug;
  69736. + reiser4_tree *tree;
  69737. +
  69738. + iplug = item_plugin_by_coord(parent_coord);
  69739. + assert("vs-512", iplug->s.internal.down_link);
  69740. + iplug->s.internal.down_link(parent_coord, NULL, &addr);
  69741. +
  69742. + tree = znode_get_tree(parent);
  69743. + if (incore_p)
  69744. + child = zlook(tree, &addr);
  69745. + else
  69746. + child =
  69747. + zget(tree, &addr, parent,
  69748. + znode_get_level(parent) - 1,
  69749. + reiser4_ctx_gfp_mask_get());
  69750. + if ((child != NULL) && !IS_ERR(child) && setup_dkeys_p)
  69751. + set_child_delimiting_keys(parent, parent_coord, child);
  69752. + } else {
  69753. + warning("nikita-1483", "Internal item expected");
  69754. + child = ERR_PTR(RETERR(-EIO));
  69755. + }
  69756. + return child;
  69757. +}
  69758. +
  69759. +/* remove znode from transaction */
  69760. +static void uncapture_znode(znode * node)
  69761. +{
  69762. + struct page *page;
  69763. +
  69764. + assert("zam-1001", ZF_ISSET(node, JNODE_HEARD_BANSHEE));
  69765. +
  69766. + if (!reiser4_blocknr_is_fake(znode_get_block(node))) {
  69767. + int ret;
  69768. +
  69769. + /* An already allocated block goes right to the atom's delete set. */
  69770. + ret =
  69771. + reiser4_dealloc_block(znode_get_block(node), 0,
  69772. + BA_DEFER | BA_FORMATTED);
  69773. + if (ret)
  69774. + warning("zam-942",
  69775. + "can\'t add a block (%llu) number to atom's delete set\n",
  69776. + (unsigned long long)(*znode_get_block(node)));
  69777. +
  69778. + spin_lock_znode(node);
  69779. + /* Here we return flush reserved block which was reserved at the
  69780. + * moment when this allocated node was marked dirty and still
  69781. + * not used by flush in node relocation procedure. */
  69782. + if (ZF_ISSET(node, JNODE_FLUSH_RESERVED)) {
  69783. + txn_atom *atom;
  69784. +
  69785. + atom = jnode_get_atom(ZJNODE(node));
  69786. + assert("zam-939", atom != NULL);
  69787. + spin_unlock_znode(node);
  69788. + flush_reserved2grabbed(atom, (__u64) 1);
  69789. + spin_unlock_atom(atom);
  69790. + } else
  69791. + spin_unlock_znode(node);
  69792. + } else {
  69793. + /* znode has assigned block which is counted as "fake
  69794. + allocated". Return it back to "free blocks") */
  69795. + fake_allocated2free((__u64) 1, BA_FORMATTED);
  69796. + }
  69797. +
  69798. + /*
  69799. + * uncapture page from transaction. There is a possibility of a race
  69800. + * with ->releasepage(): reiser4_releasepage() detaches page from this
  69801. + * jnode and we have nothing to uncapture. To avoid this, get
  69802. + * reference of node->pg under jnode spin lock. reiser4_uncapture_page()
  69803. + * will deal with released page itself.
  69804. + */
  69805. + spin_lock_znode(node);
  69806. + page = znode_page(node);
  69807. + if (likely(page != NULL)) {
  69808. + /*
  69809. + * reiser4_uncapture_page() can only be called when we are sure
  69810. + * that znode is pinned in memory, which we are, because
  69811. + * forget_znode() is only called from longterm_unlock_znode().
  69812. + */
  69813. + get_page(page);
  69814. + spin_unlock_znode(node);
  69815. + lock_page(page);
  69816. + reiser4_uncapture_page(page);
  69817. + unlock_page(page);
  69818. + put_page(page);
  69819. + } else {
  69820. + txn_atom *atom;
  69821. +
  69822. + /* handle "flush queued" znodes */
  69823. + while (1) {
  69824. + atom = jnode_get_atom(ZJNODE(node));
  69825. + assert("zam-943", atom != NULL);
  69826. +
  69827. + if (!ZF_ISSET(node, JNODE_FLUSH_QUEUED)
  69828. + || !atom->nr_running_queues)
  69829. + break;
  69830. +
  69831. + spin_unlock_znode(node);
  69832. + reiser4_atom_wait_event(atom);
  69833. + spin_lock_znode(node);
  69834. + }
  69835. +
  69836. + reiser4_uncapture_block(ZJNODE(node));
  69837. + spin_unlock_atom(atom);
  69838. + zput(node);
  69839. + }
  69840. +}
  69841. +
  69842. +/* This is called from longterm_unlock_znode() when last lock is released from
  69843. + the node that has been removed from the tree. At this point node is removed
  69844. + from sibling list and its lock is invalidated. */
  69845. +void forget_znode(lock_handle * handle)
  69846. +{
  69847. + znode *node;
  69848. + reiser4_tree *tree;
  69849. +
  69850. + assert("umka-319", handle != NULL);
  69851. +
  69852. + node = handle->node;
  69853. + tree = znode_get_tree(node);
  69854. +
  69855. + assert("vs-164", znode_is_write_locked(node));
  69856. + assert("nikita-1280", ZF_ISSET(node, JNODE_HEARD_BANSHEE));
  69857. + assert_rw_locked(&(node->lock.guard));
  69858. +
  69859. + /* We assume that this node was detached from its parent before
  69860. + * unlocking, it gives no way to reach this node from parent through a
  69861. + * down link. The node should have no children and, thereby, can't be
  69862. + * reached from them by their parent pointers. The only way to obtain a
  69863. + * reference to the node is to use sibling pointers from its left and
  69864. + * right neighbors. In the next several lines we remove the node from
  69865. + * the sibling list. */
  69866. +
  69867. + write_lock_tree(tree);
  69868. + sibling_list_remove(node);
  69869. + znode_remove(node, tree);
  69870. + write_unlock_tree(tree);
  69871. +
  69872. + /* Here we set JNODE_DYING and cancel all pending lock requests. It
  69873. + * forces all lock requestor threads to repeat iterations of getting
  69874. + * lock on a child, neighbor or parent node. But, those threads can't
  69875. + * come to this node again, because this node is no longer a child,
  69876. + * neighbor or parent of any other node. This order of znode
  69877. + * invalidation does not allow other threads to waste cpu time is a busy
  69878. + * loop, trying to lock dying object. The exception is in the flush
  69879. + * code when we take node directly from atom's capture list.*/
  69880. + reiser4_invalidate_lock(handle);
  69881. + uncapture_znode(node);
  69882. +}
  69883. +
  69884. +/* Check that internal item at @pointer really contains pointer to @child. */
  69885. +int check_tree_pointer(const coord_t * pointer /* would-be pointer to
  69886. + * @child */ ,
  69887. + const znode * child /* child znode */ )
  69888. +{
  69889. + assert("nikita-1016", pointer != NULL);
  69890. + assert("nikita-1017", child != NULL);
  69891. + assert("nikita-1018", pointer->node != NULL);
  69892. +
  69893. + assert("nikita-1325", znode_is_any_locked(pointer->node));
  69894. +
  69895. + assert("nikita-2985",
  69896. + znode_get_level(pointer->node) == znode_get_level(child) + 1);
  69897. +
  69898. + coord_clear_iplug((coord_t *) pointer);
  69899. +
  69900. + if (coord_is_existing_unit(pointer)) {
  69901. + item_plugin *iplug;
  69902. + reiser4_block_nr addr;
  69903. +
  69904. + if (item_is_internal(pointer)) {
  69905. + iplug = item_plugin_by_coord(pointer);
  69906. + assert("vs-513", iplug->s.internal.down_link);
  69907. + iplug->s.internal.down_link(pointer, NULL, &addr);
  69908. + /* check that cached value is correct */
  69909. + if (disk_addr_eq(&addr, znode_get_block(child))) {
  69910. + return NS_FOUND;
  69911. + }
  69912. + }
  69913. + }
  69914. + /* warning ("jmacd-1002", "tree pointer incorrect"); */
  69915. + return NS_NOT_FOUND;
  69916. +}
  69917. +
  69918. +/* find coord of pointer to new @child in @parent.
  69919. +
  69920. + Find the &coord_t in the @parent where pointer to a given @child will
  69921. + be in.
  69922. +
  69923. +*/
  69924. +int find_new_child_ptr(znode * parent /* parent znode, passed locked */ ,
  69925. + znode *
  69926. + child UNUSED_ARG /* child znode, passed locked */ ,
  69927. + znode * left /* left brother of new node */ ,
  69928. + coord_t * result /* where result is stored in */ )
  69929. +{
  69930. + int ret;
  69931. +
  69932. + assert("nikita-1486", parent != NULL);
  69933. + assert("nikita-1487", child != NULL);
  69934. + assert("nikita-1488", result != NULL);
  69935. +
  69936. + ret = find_child_ptr(parent, left, result);
  69937. + if (ret != NS_FOUND) {
  69938. + warning("nikita-1489", "Cannot find brother position: %i", ret);
  69939. + return RETERR(-EIO);
  69940. + } else {
  69941. + result->between = AFTER_UNIT;
  69942. + return RETERR(NS_NOT_FOUND);
  69943. + }
  69944. +}
  69945. +
  69946. +/* find coord of pointer to @child in @parent.
  69947. +
  69948. + Find the &coord_t in the @parent where pointer to a given @child is in.
  69949. +
  69950. +*/
  69951. +int find_child_ptr(znode * parent /* parent znode, passed locked */ ,
  69952. + znode * child /* child znode, passed locked */ ,
  69953. + coord_t * result /* where result is stored in */ )
  69954. +{
  69955. + int lookup_res;
  69956. + node_plugin *nplug;
  69957. + /* left delimiting key of a child */
  69958. + reiser4_key ld;
  69959. + reiser4_tree *tree;
  69960. +
  69961. + assert("nikita-934", parent != NULL);
  69962. + assert("nikita-935", child != NULL);
  69963. + assert("nikita-936", result != NULL);
  69964. + assert("zam-356", znode_is_loaded(parent));
  69965. +
  69966. + coord_init_zero(result);
  69967. + result->node = parent;
  69968. +
  69969. + nplug = parent->nplug;
  69970. + assert("nikita-939", nplug != NULL);
  69971. +
  69972. + tree = znode_get_tree(parent);
  69973. + /* NOTE-NIKITA taking read-lock on tree here assumes that @result is
  69974. + * not aliased to ->in_parent of some znode. Otherwise,
  69975. + * parent_coord_to_coord() below would modify data protected by tree
  69976. + * lock. */
  69977. + read_lock_tree(tree);
  69978. + /* fast path. Try to use cached value. Lock tree to keep
  69979. + node->pos_in_parent and pos->*_blocknr consistent. */
  69980. + if (child->in_parent.item_pos + 1 != 0) {
  69981. + parent_coord_to_coord(&child->in_parent, result);
  69982. + if (check_tree_pointer(result, child) == NS_FOUND) {
  69983. + read_unlock_tree(tree);
  69984. + return NS_FOUND;
  69985. + }
  69986. +
  69987. + child->in_parent.item_pos = (unsigned short)~0;
  69988. + }
  69989. + read_unlock_tree(tree);
  69990. +
  69991. + /* is above failed, find some key from @child. We are looking for the
  69992. + least key in a child. */
  69993. + read_lock_dk(tree);
  69994. + ld = *znode_get_ld_key(child);
  69995. + read_unlock_dk(tree);
  69996. + /*
  69997. + * now, lookup parent with key just found. Note, that left delimiting
  69998. + * key doesn't identify node uniquely, because (in extremely rare
  69999. + * case) two nodes can have equal left delimiting keys, if one of them
  70000. + * is completely filled with directory entries that all happened to be
  70001. + * hash collision. But, we check block number in check_tree_pointer()
  70002. + * and, so, are safe.
  70003. + */
  70004. + lookup_res = nplug->lookup(parent, &ld, FIND_EXACT, result);
  70005. + /* update cached pos_in_node */
  70006. + if (lookup_res == NS_FOUND) {
  70007. + write_lock_tree(tree);
  70008. + coord_to_parent_coord(result, &child->in_parent);
  70009. + write_unlock_tree(tree);
  70010. + lookup_res = check_tree_pointer(result, child);
  70011. + }
  70012. + if (lookup_res == NS_NOT_FOUND)
  70013. + lookup_res = find_child_by_addr(parent, child, result);
  70014. + return lookup_res;
  70015. +}
  70016. +
  70017. +/* find coord of pointer to @child in @parent by scanning
  70018. +
  70019. + Find the &coord_t in the @parent where pointer to a given @child
  70020. + is in by scanning all internal items in @parent and comparing block
  70021. + numbers in them with that of @child.
  70022. +
  70023. +*/
  70024. +static int find_child_by_addr(znode * parent /* parent znode, passed locked */ ,
  70025. + znode * child /* child znode, passed locked */ ,
  70026. + coord_t * result /* where result is stored in */ )
  70027. +{
  70028. + int ret;
  70029. +
  70030. + assert("nikita-1320", parent != NULL);
  70031. + assert("nikita-1321", child != NULL);
  70032. + assert("nikita-1322", result != NULL);
  70033. +
  70034. + ret = NS_NOT_FOUND;
  70035. +
  70036. + for_all_units(result, parent) {
  70037. + if (check_tree_pointer(result, child) == NS_FOUND) {
  70038. + write_lock_tree(znode_get_tree(parent));
  70039. + coord_to_parent_coord(result, &child->in_parent);
  70040. + write_unlock_tree(znode_get_tree(parent));
  70041. + ret = NS_FOUND;
  70042. + break;
  70043. + }
  70044. + }
  70045. + return ret;
  70046. +}
  70047. +
  70048. +/* true, if @addr is "unallocated block number", which is just address, with
  70049. + highest bit set. */
  70050. +int is_disk_addr_unallocated(const reiser4_block_nr * addr /* address to
  70051. + * check */ )
  70052. +{
  70053. + assert("nikita-1766", addr != NULL);
  70054. + cassert(sizeof(reiser4_block_nr) == 8);
  70055. + return (*addr & REISER4_BLOCKNR_STATUS_BIT_MASK) ==
  70056. + REISER4_UNALLOCATED_STATUS_VALUE;
  70057. +}
  70058. +
  70059. +/* returns true if removing bytes of given range of key [from_key, to_key]
  70060. + causes removing of whole item @from */
  70061. +static int
  70062. +item_removed_completely(coord_t * from, const reiser4_key * from_key,
  70063. + const reiser4_key * to_key)
  70064. +{
  70065. + item_plugin *iplug;
  70066. + reiser4_key key_in_item;
  70067. +
  70068. + assert("umka-325", from != NULL);
  70069. + assert("", item_is_extent(from));
  70070. +
  70071. + /* check first key just for case */
  70072. + item_key_by_coord(from, &key_in_item);
  70073. + if (keygt(from_key, &key_in_item))
  70074. + return 0;
  70075. +
  70076. + /* check last key */
  70077. + iplug = item_plugin_by_coord(from);
  70078. + assert("vs-611", iplug && iplug->s.file.append_key);
  70079. +
  70080. + iplug->s.file.append_key(from, &key_in_item);
  70081. + set_key_offset(&key_in_item, get_key_offset(&key_in_item) - 1);
  70082. +
  70083. + if (keylt(to_key, &key_in_item))
  70084. + /* last byte is not removed */
  70085. + return 0;
  70086. + return 1;
  70087. +}
  70088. +
  70089. +/* helper function for prepare_twig_kill(): @left and @right are formatted
  70090. + * neighbors of extent item being completely removed. Load and lock neighbors
  70091. + * and store lock handles into @cdata for later use by kill_hook_extent() */
  70092. +static int
  70093. +prepare_children(znode * left, znode * right, carry_kill_data * kdata)
  70094. +{
  70095. + int result;
  70096. + int left_loaded;
  70097. + int right_loaded;
  70098. +
  70099. + result = 0;
  70100. + left_loaded = right_loaded = 0;
  70101. +
  70102. + if (left != NULL) {
  70103. + result = zload(left);
  70104. + if (result == 0) {
  70105. + left_loaded = 1;
  70106. + result = longterm_lock_znode(kdata->left, left,
  70107. + ZNODE_READ_LOCK,
  70108. + ZNODE_LOCK_LOPRI);
  70109. + }
  70110. + }
  70111. + if (result == 0 && right != NULL) {
  70112. + result = zload(right);
  70113. + if (result == 0) {
  70114. + right_loaded = 1;
  70115. + result = longterm_lock_znode(kdata->right, right,
  70116. + ZNODE_READ_LOCK,
  70117. + ZNODE_LOCK_HIPRI |
  70118. + ZNODE_LOCK_NONBLOCK);
  70119. + }
  70120. + }
  70121. + if (result != 0) {
  70122. + done_lh(kdata->left);
  70123. + done_lh(kdata->right);
  70124. + if (left_loaded != 0)
  70125. + zrelse(left);
  70126. + if (right_loaded != 0)
  70127. + zrelse(right);
  70128. + }
  70129. + return result;
  70130. +}
  70131. +
  70132. +static void done_children(carry_kill_data * kdata)
  70133. +{
  70134. + if (kdata->left != NULL && kdata->left->node != NULL) {
  70135. + zrelse(kdata->left->node);
  70136. + done_lh(kdata->left);
  70137. + }
  70138. + if (kdata->right != NULL && kdata->right->node != NULL) {
  70139. + zrelse(kdata->right->node);
  70140. + done_lh(kdata->right);
  70141. + }
  70142. +}
  70143. +
  70144. +/* part of cut_node. It is called when cut_node is called to remove or cut part
  70145. + of extent item. When head of that item is removed - we have to update right
  70146. + delimiting of left neighbor of extent. When item is removed completely - we
  70147. + have to set sibling link between left and right neighbor of removed
  70148. + extent. This may return -E_DEADLOCK because of trying to get left neighbor
  70149. + locked. So, caller should repeat an attempt
  70150. +*/
  70151. +/* Audited by: umka (2002.06.16) */
  70152. +static int
  70153. +prepare_twig_kill(carry_kill_data * kdata, znode * locked_left_neighbor)
  70154. +{
  70155. + int result;
  70156. + reiser4_key key;
  70157. + lock_handle left_lh;
  70158. + lock_handle right_lh;
  70159. + coord_t left_coord;
  70160. + coord_t *from;
  70161. + znode *left_child;
  70162. + znode *right_child;
  70163. + reiser4_tree *tree;
  70164. + int left_zloaded_here, right_zloaded_here;
  70165. +
  70166. + from = kdata->params.from;
  70167. + assert("umka-326", from != NULL);
  70168. + assert("umka-327", kdata->params.to != NULL);
  70169. +
  70170. + /* for one extent item only yet */
  70171. + assert("vs-591", item_is_extent(from));
  70172. + assert("vs-592", from->item_pos == kdata->params.to->item_pos);
  70173. +
  70174. + if ((kdata->params.from_key
  70175. + && keygt(kdata->params.from_key, item_key_by_coord(from, &key)))
  70176. + || from->unit_pos != 0) {
  70177. + /* head of item @from is not removed, there is nothing to
  70178. + worry about */
  70179. + return 0;
  70180. + }
  70181. +
  70182. + result = 0;
  70183. + left_zloaded_here = 0;
  70184. + right_zloaded_here = 0;
  70185. +
  70186. + left_child = right_child = NULL;
  70187. +
  70188. + coord_dup(&left_coord, from);
  70189. + init_lh(&left_lh);
  70190. + init_lh(&right_lh);
  70191. + if (coord_prev_unit(&left_coord)) {
  70192. + /* @from is leftmost item in its node */
  70193. + if (!locked_left_neighbor) {
  70194. + result =
  70195. + reiser4_get_left_neighbor(&left_lh, from->node,
  70196. + ZNODE_READ_LOCK,
  70197. + GN_CAN_USE_UPPER_LEVELS);
  70198. + switch (result) {
  70199. + case 0:
  70200. + break;
  70201. + case -E_NO_NEIGHBOR:
  70202. + /* there is no formatted node to the left of
  70203. + from->node */
  70204. + warning("vs-605",
  70205. + "extent item has smallest key in "
  70206. + "the tree and it is about to be removed");
  70207. + return 0;
  70208. + case -E_DEADLOCK:
  70209. + /* need to restart */
  70210. + default:
  70211. + return result;
  70212. + }
  70213. +
  70214. + /* we have acquired left neighbor of from->node */
  70215. + result = zload(left_lh.node);
  70216. + if (result)
  70217. + goto done;
  70218. +
  70219. + locked_left_neighbor = left_lh.node;
  70220. + } else {
  70221. + /* squalloc_right_twig_cut should have supplied locked
  70222. + * left neighbor */
  70223. + assert("vs-834",
  70224. + znode_is_write_locked(locked_left_neighbor));
  70225. + result = zload(locked_left_neighbor);
  70226. + if (result)
  70227. + return result;
  70228. + }
  70229. +
  70230. + left_zloaded_here = 1;
  70231. + coord_init_last_unit(&left_coord, locked_left_neighbor);
  70232. + }
  70233. +
  70234. + if (!item_is_internal(&left_coord)) {
  70235. + /* what else but extent can be on twig level */
  70236. + assert("vs-606", item_is_extent(&left_coord));
  70237. +
  70238. + /* there is no left formatted child */
  70239. + if (left_zloaded_here)
  70240. + zrelse(locked_left_neighbor);
  70241. + done_lh(&left_lh);
  70242. + return 0;
  70243. + }
  70244. +
  70245. + tree = znode_get_tree(left_coord.node);
  70246. + left_child = child_znode(&left_coord, left_coord.node, 1, 0);
  70247. +
  70248. + if (IS_ERR(left_child)) {
  70249. + result = PTR_ERR(left_child);
  70250. + goto done;
  70251. + }
  70252. +
  70253. + /* left child is acquired, calculate new right delimiting key for it
  70254. + and get right child if it is necessary */
  70255. + if (item_removed_completely
  70256. + (from, kdata->params.from_key, kdata->params.to_key)) {
  70257. + /* try to get right child of removed item */
  70258. + coord_t right_coord;
  70259. +
  70260. + assert("vs-607",
  70261. + kdata->params.to->unit_pos ==
  70262. + coord_last_unit_pos(kdata->params.to));
  70263. + coord_dup(&right_coord, kdata->params.to);
  70264. + if (coord_next_unit(&right_coord)) {
  70265. + /* @to is rightmost unit in the node */
  70266. + result =
  70267. + reiser4_get_right_neighbor(&right_lh, from->node,
  70268. + ZNODE_READ_LOCK,
  70269. + GN_CAN_USE_UPPER_LEVELS);
  70270. + switch (result) {
  70271. + case 0:
  70272. + result = zload(right_lh.node);
  70273. + if (result)
  70274. + goto done;
  70275. +
  70276. + right_zloaded_here = 1;
  70277. + coord_init_first_unit(&right_coord,
  70278. + right_lh.node);
  70279. + item_key_by_coord(&right_coord, &key);
  70280. + break;
  70281. +
  70282. + case -E_NO_NEIGHBOR:
  70283. + /* there is no formatted node to the right of
  70284. + from->node */
  70285. + read_lock_dk(tree);
  70286. + key = *znode_get_rd_key(from->node);
  70287. + read_unlock_dk(tree);
  70288. + right_coord.node = NULL;
  70289. + result = 0;
  70290. + break;
  70291. + default:
  70292. + /* real error */
  70293. + goto done;
  70294. + }
  70295. + } else {
  70296. + /* there is an item to the right of @from - take its key */
  70297. + item_key_by_coord(&right_coord, &key);
  70298. + }
  70299. +
  70300. + /* try to get right child of @from */
  70301. + if (right_coord.node && /* there is right neighbor of @from */
  70302. + item_is_internal(&right_coord)) { /* it is internal item */
  70303. + right_child = child_znode(&right_coord,
  70304. + right_coord.node, 1, 0);
  70305. +
  70306. + if (IS_ERR(right_child)) {
  70307. + result = PTR_ERR(right_child);
  70308. + goto done;
  70309. + }
  70310. +
  70311. + }
  70312. + /* whole extent is removed between znodes left_child and right_child. Prepare them for linking and
  70313. + update of right delimiting key of left_child */
  70314. + result = prepare_children(left_child, right_child, kdata);
  70315. + } else {
  70316. + /* head of item @to is removed. left_child has to get right delimting key update. Prepare it for that */
  70317. + result = prepare_children(left_child, NULL, kdata);
  70318. + }
  70319. +
  70320. + done:
  70321. + if (right_child)
  70322. + zput(right_child);
  70323. + if (right_zloaded_here)
  70324. + zrelse(right_lh.node);
  70325. + done_lh(&right_lh);
  70326. +
  70327. + if (left_child)
  70328. + zput(left_child);
  70329. + if (left_zloaded_here)
  70330. + zrelse(locked_left_neighbor);
  70331. + done_lh(&left_lh);
  70332. + return result;
  70333. +}
  70334. +
  70335. +/* this is used to remove part of node content between coordinates @from and @to. Units to which @from and @to are set
  70336. + are to be cut completely */
  70337. +/* for try_to_merge_with_left, delete_copied, reiser4_delete_node */
  70338. +int cut_node_content(coord_t * from, coord_t * to, const reiser4_key * from_key, /* first key to be removed */
  70339. + const reiser4_key * to_key, /* last key to be removed */
  70340. + reiser4_key *
  70341. + smallest_removed /* smallest key actually removed */ )
  70342. +{
  70343. + int result;
  70344. + carry_pool *pool;
  70345. + carry_level *lowest_level;
  70346. + carry_cut_data *cut_data;
  70347. + carry_op *op;
  70348. +
  70349. + assert("vs-1715", coord_compare(from, to) != COORD_CMP_ON_RIGHT);
  70350. +
  70351. + pool =
  70352. + init_carry_pool(sizeof(*pool) + 3 * sizeof(*lowest_level) +
  70353. + sizeof(*cut_data));
  70354. + if (IS_ERR(pool))
  70355. + return PTR_ERR(pool);
  70356. + lowest_level = (carry_level *) (pool + 1);
  70357. + init_carry_level(lowest_level, pool);
  70358. +
  70359. + op = reiser4_post_carry(lowest_level, COP_CUT, from->node, 0);
  70360. + assert("vs-1509", op != 0);
  70361. + if (IS_ERR(op)) {
  70362. + done_carry_pool(pool);
  70363. + return PTR_ERR(op);
  70364. + }
  70365. +
  70366. + cut_data = (carry_cut_data *) (lowest_level + 3);
  70367. + cut_data->params.from = from;
  70368. + cut_data->params.to = to;
  70369. + cut_data->params.from_key = from_key;
  70370. + cut_data->params.to_key = to_key;
  70371. + cut_data->params.smallest_removed = smallest_removed;
  70372. +
  70373. + op->u.cut_or_kill.is_cut = 1;
  70374. + op->u.cut_or_kill.u.cut = cut_data;
  70375. +
  70376. + result = reiser4_carry(lowest_level, NULL);
  70377. + done_carry_pool(pool);
  70378. +
  70379. + return result;
  70380. +}
  70381. +
  70382. +/* cut part of the node
  70383. +
  70384. + Cut part or whole content of node.
  70385. +
  70386. + cut data between @from and @to of @from->node and call carry() to make
  70387. + corresponding changes in the tree. @from->node may become empty. If so -
  70388. + pointer to it will be removed. Neighboring nodes are not changed. Smallest
  70389. + removed key is stored in @smallest_removed
  70390. +
  70391. +*/
  70392. +int kill_node_content(coord_t * from, /* coord of the first unit/item that will be eliminated */
  70393. + coord_t * to, /* coord of the last unit/item that will be eliminated */
  70394. + const reiser4_key * from_key, /* first key to be removed */
  70395. + const reiser4_key * to_key, /* last key to be removed */
  70396. + reiser4_key * smallest_removed, /* smallest key actually removed */
  70397. + znode * locked_left_neighbor, /* this is set when kill_node_content is called with left neighbor
  70398. + * locked (in squalloc_right_twig_cut, namely) */
  70399. + struct inode *inode, /* inode of file whose item (or its part) is to be killed. This is necessary to
  70400. + invalidate pages together with item pointing to them */
  70401. + int truncate)
  70402. +{ /* this call is made for file truncate) */
  70403. + int result;
  70404. + carry_pool *pool;
  70405. + carry_level *lowest_level;
  70406. + carry_kill_data *kdata;
  70407. + lock_handle *left_child;
  70408. + lock_handle *right_child;
  70409. + carry_op *op;
  70410. +
  70411. + assert("umka-328", from != NULL);
  70412. + assert("vs-316", !node_is_empty(from->node));
  70413. + assert("nikita-1812", coord_is_existing_unit(from)
  70414. + && coord_is_existing_unit(to));
  70415. +
  70416. + /* allocate carry_pool, 3 carry_level-s, carry_kill_data and structures for kill_hook_extent */
  70417. + pool = init_carry_pool(sizeof(*pool) + 3 * sizeof(*lowest_level) +
  70418. + sizeof(carry_kill_data) +
  70419. + 2 * sizeof(lock_handle) +
  70420. + 5 * sizeof(reiser4_key) + 2 * sizeof(coord_t));
  70421. + if (IS_ERR(pool))
  70422. + return PTR_ERR(pool);
  70423. +
  70424. + lowest_level = (carry_level *) (pool + 1);
  70425. + init_carry_level(lowest_level, pool);
  70426. +
  70427. + kdata = (carry_kill_data *) (lowest_level + 3);
  70428. + left_child = (lock_handle *) (kdata + 1);
  70429. + right_child = left_child + 1;
  70430. +
  70431. + init_lh(left_child);
  70432. + init_lh(right_child);
  70433. +
  70434. + kdata->params.from = from;
  70435. + kdata->params.to = to;
  70436. + kdata->params.from_key = from_key;
  70437. + kdata->params.to_key = to_key;
  70438. + kdata->params.smallest_removed = smallest_removed;
  70439. + kdata->params.truncate = truncate;
  70440. + kdata->flags = 0;
  70441. + kdata->inode = inode;
  70442. + kdata->left = left_child;
  70443. + kdata->right = right_child;
  70444. + /* memory for 5 reiser4_key and 2 coord_t will be used in kill_hook_extent */
  70445. + kdata->buf = (char *)(right_child + 1);
  70446. +
  70447. + if (znode_get_level(from->node) == TWIG_LEVEL && item_is_extent(from)) {
  70448. + /* left child of extent item may have to get updated right
  70449. + delimiting key and to get linked with right child of extent
  70450. + @from if it will be removed completely */
  70451. + result = prepare_twig_kill(kdata, locked_left_neighbor);
  70452. + if (result) {
  70453. + done_children(kdata);
  70454. + done_carry_pool(pool);
  70455. + return result;
  70456. + }
  70457. + }
  70458. +
  70459. + op = reiser4_post_carry(lowest_level, COP_CUT, from->node, 0);
  70460. + if (IS_ERR(op) || (op == NULL)) {
  70461. + done_children(kdata);
  70462. + done_carry_pool(pool);
  70463. + return RETERR(op ? PTR_ERR(op) : -EIO);
  70464. + }
  70465. +
  70466. + op->u.cut_or_kill.is_cut = 0;
  70467. + op->u.cut_or_kill.u.kill = kdata;
  70468. +
  70469. + result = reiser4_carry(lowest_level, NULL);
  70470. +
  70471. + done_children(kdata);
  70472. + done_carry_pool(pool);
  70473. + return result;
  70474. +}
  70475. +
  70476. +void
  70477. +fake_kill_hook_tail(struct inode *inode, loff_t start, loff_t end, int truncate)
  70478. +{
  70479. + if (reiser4_inode_get_flag(inode, REISER4_HAS_MMAP)) {
  70480. + pgoff_t start_pg, end_pg;
  70481. +
  70482. + start_pg = start >> PAGE_SHIFT;
  70483. + end_pg = (end - 1) >> PAGE_SHIFT;
  70484. +
  70485. + if ((start & (PAGE_SIZE - 1)) == 0) {
  70486. + /*
  70487. + * kill up to the page boundary.
  70488. + */
  70489. + assert("vs-123456", start_pg == end_pg);
  70490. + reiser4_invalidate_pages(inode->i_mapping, start_pg, 1,
  70491. + truncate);
  70492. + } else if (start_pg != end_pg) {
  70493. + /*
  70494. + * page boundary is within killed portion of node.
  70495. + */
  70496. + assert("vs-654321", end_pg - start_pg == 1);
  70497. + reiser4_invalidate_pages(inode->i_mapping, end_pg,
  70498. + end_pg - start_pg, 1);
  70499. + }
  70500. + }
  70501. + inode_sub_bytes(inode, end - start);
  70502. +}
  70503. +
  70504. +/**
  70505. + * Delete whole @node from the reiser4 tree without loading it.
  70506. + *
  70507. + * @left: locked left neighbor,
  70508. + * @node: node to be deleted,
  70509. + * @smallest_removed: leftmost key of deleted node,
  70510. + * @object: inode pointer, if we truncate a file body.
  70511. + * @truncate: true if called for file truncate.
  70512. + *
  70513. + * @return: 0 if success, error code otherwise.
  70514. + *
  70515. + * NOTE: if @object!=NULL we assume that @smallest_removed != NULL and it
  70516. + * contains the right value of the smallest removed key from the previous
  70517. + * cut_worker() iteration. This is needed for proper accounting of
  70518. + * "i_blocks" and "i_bytes" fields of the @object.
  70519. + */
  70520. +int reiser4_delete_node(znode * node, reiser4_key * smallest_removed,
  70521. + struct inode *object, int truncate)
  70522. +{
  70523. + lock_handle parent_lock;
  70524. + coord_t cut_from;
  70525. + coord_t cut_to;
  70526. + reiser4_tree *tree;
  70527. + int ret;
  70528. +
  70529. + assert("zam-937", node != NULL);
  70530. + assert("zam-933", znode_is_write_locked(node));
  70531. + assert("zam-999", smallest_removed != NULL);
  70532. +
  70533. + init_lh(&parent_lock);
  70534. +
  70535. + ret = reiser4_get_parent(&parent_lock, node, ZNODE_WRITE_LOCK);
  70536. + if (ret)
  70537. + return ret;
  70538. +
  70539. + assert("zam-934", !znode_above_root(parent_lock.node));
  70540. +
  70541. + ret = zload(parent_lock.node);
  70542. + if (ret)
  70543. + goto failed_nozrelse;
  70544. +
  70545. + ret = find_child_ptr(parent_lock.node, node, &cut_from);
  70546. + if (ret)
  70547. + goto failed;
  70548. +
  70549. + /* decrement child counter and set parent pointer to NULL before
  70550. + deleting the list from parent node because of checks in
  70551. + internal_kill_item_hook (we can delete the last item from the parent
  70552. + node, the parent node is going to be deleted and its c_count should
  70553. + be zero). */
  70554. +
  70555. + tree = znode_get_tree(node);
  70556. + write_lock_tree(tree);
  70557. + init_parent_coord(&node->in_parent, NULL);
  70558. + --parent_lock.node->c_count;
  70559. + write_unlock_tree(tree);
  70560. +
  70561. + assert("zam-989", item_is_internal(&cut_from));
  70562. +
  70563. + /* @node should be deleted after unlocking. */
  70564. + ZF_SET(node, JNODE_HEARD_BANSHEE);
  70565. +
  70566. + /* remove a pointer from the parent node to the node being deleted. */
  70567. + coord_dup(&cut_to, &cut_from);
  70568. + /* FIXME: shouldn't this be kill_node_content */
  70569. + ret = cut_node_content(&cut_from, &cut_to, NULL, NULL, NULL);
  70570. + if (ret)
  70571. + /* FIXME(Zam): Should we re-connect the node to its parent if
  70572. + * cut_node fails? */
  70573. + goto failed;
  70574. +
  70575. + {
  70576. + reiser4_tree *tree = current_tree;
  70577. + __u64 start_offset = 0, end_offset = 0;
  70578. +
  70579. + read_lock_tree(tree);
  70580. + write_lock_dk(tree);
  70581. + if (object) {
  70582. + /* We use @smallest_removed and the left delimiting of
  70583. + * the current node for @object->i_blocks, i_bytes
  70584. + * calculation. We assume that the items after the
  70585. + * *@smallest_removed key have been deleted from the
  70586. + * file body. */
  70587. + start_offset = get_key_offset(znode_get_ld_key(node));
  70588. + end_offset = get_key_offset(smallest_removed);
  70589. + }
  70590. +
  70591. + assert("zam-1021", znode_is_connected(node));
  70592. + if (node->left)
  70593. + znode_set_rd_key(node->left, znode_get_rd_key(node));
  70594. +
  70595. + *smallest_removed = *znode_get_ld_key(node);
  70596. +
  70597. + write_unlock_dk(tree);
  70598. + read_unlock_tree(tree);
  70599. +
  70600. + if (object) {
  70601. + /* we used to perform actions which are to be performed on items on their removal from tree in
  70602. + special item method - kill_hook. Here for optimization reasons we avoid reading node
  70603. + containing item we remove and can not call item's kill hook. Instead we call function which
  70604. + does exactly the same things as tail kill hook in assumption that node we avoid reading
  70605. + contains only one item and that item is a tail one. */
  70606. + fake_kill_hook_tail(object, start_offset, end_offset,
  70607. + truncate);
  70608. + }
  70609. + }
  70610. + failed:
  70611. + zrelse(parent_lock.node);
  70612. + failed_nozrelse:
  70613. + done_lh(&parent_lock);
  70614. +
  70615. + return ret;
  70616. +}
  70617. +
  70618. +static int can_delete(const reiser4_key *key, znode *node)
  70619. +{
  70620. + int result;
  70621. +
  70622. + read_lock_dk(current_tree);
  70623. + result = keyle(key, znode_get_ld_key(node));
  70624. + read_unlock_dk(current_tree);
  70625. + return result;
  70626. +}
  70627. +
  70628. +/**
  70629. + * This subroutine is not optimal but implementation seems to
  70630. + * be easier).
  70631. + *
  70632. + * @tap: the point deletion process begins from,
  70633. + * @from_key: the beginning of the deleted key range,
  70634. + * @to_key: the end of the deleted key range,
  70635. + * @smallest_removed: the smallest removed key,
  70636. + * @truncate: true if called for file truncate.
  70637. + * @progress: return true if a progress in file items deletions was made,
  70638. + * @smallest_removed value is actual in that case.
  70639. + *
  70640. + * @return: 0 if success, error code otherwise, -E_REPEAT means that long
  70641. + * reiser4_cut_tree operation was interrupted for allowing atom commit.
  70642. + */
  70643. +int
  70644. +cut_tree_worker_common(tap_t * tap, const reiser4_key * from_key,
  70645. + const reiser4_key * to_key,
  70646. + reiser4_key * smallest_removed, struct inode *object,
  70647. + int truncate, int *progress)
  70648. +{
  70649. + lock_handle next_node_lock;
  70650. + coord_t left_coord;
  70651. + int result;
  70652. +
  70653. + assert("zam-931", tap->coord->node != NULL);
  70654. + assert("zam-932", znode_is_write_locked(tap->coord->node));
  70655. +
  70656. + *progress = 0;
  70657. + init_lh(&next_node_lock);
  70658. +
  70659. + while (1) {
  70660. + znode *node; /* node from which items are cut */
  70661. + node_plugin *nplug; /* node plugin for @node */
  70662. +
  70663. + node = tap->coord->node;
  70664. +
  70665. + /* Move next_node_lock to the next node on the left. */
  70666. + result =
  70667. + reiser4_get_left_neighbor(&next_node_lock, node,
  70668. + ZNODE_WRITE_LOCK,
  70669. + GN_CAN_USE_UPPER_LEVELS);
  70670. + if (result != 0 && result != -E_NO_NEIGHBOR)
  70671. + break;
  70672. + /* Check can we delete the node as a whole. */
  70673. + if (*progress && znode_get_level(node) == LEAF_LEVEL &&
  70674. + can_delete(from_key, node)) {
  70675. + result = reiser4_delete_node(node, smallest_removed,
  70676. + object, truncate);
  70677. + } else {
  70678. + result = reiser4_tap_load(tap);
  70679. + if (result)
  70680. + return result;
  70681. +
  70682. + /* Prepare the second (right) point for cut_node() */
  70683. + if (*progress)
  70684. + coord_init_last_unit(tap->coord, node);
  70685. +
  70686. + else if (item_plugin_by_coord(tap->coord)->b.lookup ==
  70687. + NULL)
  70688. + /* set rightmost unit for the items without lookup method */
  70689. + tap->coord->unit_pos =
  70690. + coord_last_unit_pos(tap->coord);
  70691. +
  70692. + nplug = node->nplug;
  70693. +
  70694. + assert("vs-686", nplug);
  70695. + assert("vs-687", nplug->lookup);
  70696. +
  70697. + /* left_coord is leftmost unit cut from @node */
  70698. + result = nplug->lookup(node, from_key,
  70699. + FIND_MAX_NOT_MORE_THAN,
  70700. + &left_coord);
  70701. +
  70702. + if (IS_CBKERR(result))
  70703. + break;
  70704. +
  70705. + /* adjust coordinates so that they are set to existing units */
  70706. + if (coord_set_to_right(&left_coord)
  70707. + || coord_set_to_left(tap->coord)) {
  70708. + result = 0;
  70709. + break;
  70710. + }
  70711. +
  70712. + if (coord_compare(&left_coord, tap->coord) ==
  70713. + COORD_CMP_ON_RIGHT) {
  70714. + /* keys from @from_key to @to_key are not in the tree */
  70715. + result = 0;
  70716. + break;
  70717. + }
  70718. +
  70719. + if (left_coord.item_pos != tap->coord->item_pos) {
  70720. + /* do not allow to cut more than one item. It is added to solve problem of truncating
  70721. + partially converted files. If file is partially converted there may exist a twig node
  70722. + containing both internal item or items pointing to leaf nodes with formatting items
  70723. + and extent item. We do not want to kill internal items being at twig node here
  70724. + because cut_tree_worker assumes killing them from level level */
  70725. + coord_dup(&left_coord, tap->coord);
  70726. + assert("vs-1652",
  70727. + coord_is_existing_unit(&left_coord));
  70728. + left_coord.unit_pos = 0;
  70729. + }
  70730. +
  70731. + /* cut data from one node */
  70732. + /* *smallest_removed = *reiser4_min_key(); */
  70733. + result =
  70734. + kill_node_content(&left_coord, tap->coord, from_key,
  70735. + to_key, smallest_removed,
  70736. + next_node_lock.node, object,
  70737. + truncate);
  70738. + reiser4_tap_relse(tap);
  70739. + }
  70740. + if (result)
  70741. + break;
  70742. +
  70743. + ++(*progress);
  70744. +
  70745. + /* Check whether all items with keys >= from_key were removed
  70746. + * from the tree. */
  70747. + if (keyle(smallest_removed, from_key))
  70748. + /* result = 0; */
  70749. + break;
  70750. +
  70751. + if (next_node_lock.node == NULL)
  70752. + break;
  70753. +
  70754. + result = reiser4_tap_move(tap, &next_node_lock);
  70755. + done_lh(&next_node_lock);
  70756. + if (result)
  70757. + break;
  70758. +
  70759. + /* Break long reiser4_cut_tree operation (deletion of a large
  70760. + file) if atom requires commit. */
  70761. + if (*progress > CUT_TREE_MIN_ITERATIONS
  70762. + && current_atom_should_commit()) {
  70763. + result = -E_REPEAT;
  70764. + break;
  70765. + }
  70766. + }
  70767. + done_lh(&next_node_lock);
  70768. + /* assert("vs-301", !keyeq(&smallest_removed, reiser4_min_key())); */
  70769. + return result;
  70770. +}
  70771. +
  70772. +/* there is a fundamental problem with optimizing deletes: VFS does it
  70773. + one file at a time. Another problem is that if an item can be
  70774. + anything, then deleting items must be done one at a time. It just
  70775. + seems clean to writes this to specify a from and a to key, and cut
  70776. + everything between them though. */
  70777. +
  70778. +/* use this function with care if deleting more than what is part of a single file. */
  70779. +/* do not use this when cutting a single item, it is suboptimal for that */
  70780. +
  70781. +/* You are encouraged to write plugin specific versions of this. It
  70782. + cannot be optimal for all plugins because it works item at a time,
  70783. + and some plugins could sometimes work node at a time. Regular files
  70784. + however are not optimizable to work node at a time because of
  70785. + extents needing to free the blocks they point to.
  70786. +
  70787. + Optimizations compared to v3 code:
  70788. +
  70789. + It does not balance (that task is left to memory pressure code).
  70790. +
  70791. + Nodes are deleted only if empty.
  70792. +
  70793. + Uses extents.
  70794. +
  70795. + Performs read-ahead of formatted nodes whose contents are part of
  70796. + the deletion.
  70797. +*/
  70798. +
  70799. +/**
  70800. + * Delete everything from the reiser4 tree between two keys: @from_key and
  70801. + * @to_key.
  70802. + *
  70803. + * @from_key: the beginning of the deleted key range,
  70804. + * @to_key: the end of the deleted key range,
  70805. + * @smallest_removed: the smallest removed key,
  70806. + * @object: owner of cutting items.
  70807. + * @truncate: true if called for file truncate.
  70808. + * @progress: return true if a progress in file items deletions was made,
  70809. + * @smallest_removed value is actual in that case.
  70810. + *
  70811. + * @return: 0 if success, error code otherwise, -E_REPEAT means that long cut_tree
  70812. + * operation was interrupted for allowing atom commit .
  70813. + */
  70814. +
  70815. +int reiser4_cut_tree_object(reiser4_tree * tree, const reiser4_key * from_key,
  70816. + const reiser4_key * to_key,
  70817. + reiser4_key * smallest_removed_p,
  70818. + struct inode *object, int truncate, int *progress)
  70819. +{
  70820. + lock_handle lock;
  70821. + int result;
  70822. + tap_t tap;
  70823. + coord_t right_coord;
  70824. + reiser4_key smallest_removed;
  70825. + int (*cut_tree_worker) (tap_t *, const reiser4_key *,
  70826. + const reiser4_key *, reiser4_key *,
  70827. + struct inode *, int, int *);
  70828. + STORE_COUNTERS;
  70829. +
  70830. + assert("umka-329", tree != NULL);
  70831. + assert("umka-330", from_key != NULL);
  70832. + assert("umka-331", to_key != NULL);
  70833. + assert("zam-936", keyle(from_key, to_key));
  70834. +
  70835. + if (smallest_removed_p == NULL)
  70836. + smallest_removed_p = &smallest_removed;
  70837. +
  70838. + init_lh(&lock);
  70839. +
  70840. + do {
  70841. + /* Find rightmost item to cut away from the tree. */
  70842. + result = reiser4_object_lookup(object, to_key, &right_coord,
  70843. + &lock, ZNODE_WRITE_LOCK,
  70844. + FIND_MAX_NOT_MORE_THAN,
  70845. + TWIG_LEVEL, LEAF_LEVEL,
  70846. + CBK_UNIQUE, NULL /*ra_info */);
  70847. + if (result != CBK_COORD_FOUND)
  70848. + break;
  70849. + if (object == NULL
  70850. + || inode_file_plugin(object)->cut_tree_worker == NULL)
  70851. + cut_tree_worker = cut_tree_worker_common;
  70852. + else
  70853. + cut_tree_worker =
  70854. + inode_file_plugin(object)->cut_tree_worker;
  70855. + reiser4_tap_init(&tap, &right_coord, &lock, ZNODE_WRITE_LOCK);
  70856. + result =
  70857. + cut_tree_worker(&tap, from_key, to_key, smallest_removed_p,
  70858. + object, truncate, progress);
  70859. + reiser4_tap_done(&tap);
  70860. +
  70861. + reiser4_preempt_point();
  70862. +
  70863. + } while (0);
  70864. +
  70865. + done_lh(&lock);
  70866. +
  70867. + if (result) {
  70868. + switch (result) {
  70869. + case -E_NO_NEIGHBOR:
  70870. + result = 0;
  70871. + break;
  70872. + case -E_DEADLOCK:
  70873. + result = -E_REPEAT;
  70874. + case -E_REPEAT:
  70875. + case -ENOMEM:
  70876. + case -ENOENT:
  70877. + break;
  70878. + default:
  70879. + warning("nikita-2861", "failure: %i", result);
  70880. + }
  70881. + }
  70882. +
  70883. + CHECK_COUNTERS;
  70884. + return result;
  70885. +}
  70886. +
  70887. +/* repeat reiser4_cut_tree_object until everything is deleted.
  70888. + * unlike cut_file_items, it does not end current transaction if -E_REPEAT
  70889. + * is returned by cut_tree_object. */
  70890. +int reiser4_cut_tree(reiser4_tree * tree, const reiser4_key * from,
  70891. + const reiser4_key * to, struct inode *inode, int truncate)
  70892. +{
  70893. + int result;
  70894. + int progress;
  70895. +
  70896. + do {
  70897. + result = reiser4_cut_tree_object(tree, from, to, NULL,
  70898. + inode, truncate, &progress);
  70899. + } while (result == -E_REPEAT);
  70900. +
  70901. + return result;
  70902. +}
  70903. +
  70904. +/* finishing reiser4 initialization */
  70905. +int reiser4_init_tree(reiser4_tree * tree /* pointer to structure being
  70906. + * initialized */ ,
  70907. + const reiser4_block_nr * root_block /* address of a root block
  70908. + * on a disk */ ,
  70909. + tree_level height /* height of a tree */ ,
  70910. + node_plugin * nplug /* default node plugin */ )
  70911. +{
  70912. + int result;
  70913. +
  70914. + assert("nikita-306", tree != NULL);
  70915. + assert("nikita-307", root_block != NULL);
  70916. + assert("nikita-308", height > 0);
  70917. + assert("nikita-309", nplug != NULL);
  70918. + assert("zam-587", tree->super != NULL);
  70919. + assert("edward-171", get_current_context() != NULL);
  70920. + /*
  70921. + * We'll perform costly memory allocations for znode hash table, etc.
  70922. + * So, set proper allocation flags
  70923. + */
  70924. + get_current_context()->gfp_mask |= (__GFP_NOWARN);
  70925. +
  70926. + tree->root_block = *root_block;
  70927. + tree->height = height;
  70928. + tree->estimate_one_insert = calc_estimate_one_insert(height);
  70929. + tree->nplug = nplug;
  70930. +
  70931. + tree->znode_epoch = 1ull;
  70932. +
  70933. + cbk_cache_init(&tree->cbk_cache);
  70934. +
  70935. + result = znodes_tree_init(tree);
  70936. + if (result == 0)
  70937. + result = jnodes_tree_init(tree);
  70938. + if (result == 0) {
  70939. + tree->uber = zget(tree, &UBER_TREE_ADDR, NULL, 0,
  70940. + reiser4_ctx_gfp_mask_get());
  70941. + if (IS_ERR(tree->uber)) {
  70942. + result = PTR_ERR(tree->uber);
  70943. + tree->uber = NULL;
  70944. + }
  70945. + }
  70946. + return result;
  70947. +}
  70948. +
  70949. +/* release resources associated with @tree */
  70950. +void reiser4_done_tree(reiser4_tree * tree /* tree to release */ )
  70951. +{
  70952. + if (tree == NULL)
  70953. + return;
  70954. +
  70955. + if (tree->uber != NULL) {
  70956. + zput(tree->uber);
  70957. + tree->uber = NULL;
  70958. + }
  70959. + znodes_tree_done(tree);
  70960. + jnodes_tree_done(tree);
  70961. + cbk_cache_done(&tree->cbk_cache);
  70962. +}
  70963. +
  70964. +/* Make Linus happy.
  70965. + Local variables:
  70966. + c-indentation-style: "K&R"
  70967. + mode-name: "LC"
  70968. + c-basic-offset: 8
  70969. + tab-width: 8
  70970. + fill-column: 120
  70971. + scroll-step: 1
  70972. + End:
  70973. +*/
  70974. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/tree.h linux-4.14.2/fs/reiser4/tree.h
  70975. --- linux-4.14.2.orig/fs/reiser4/tree.h 1970-01-01 01:00:00.000000000 +0100
  70976. +++ linux-4.14.2/fs/reiser4/tree.h 2017-11-26 22:13:09.000000000 +0100
  70977. @@ -0,0 +1,577 @@
  70978. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  70979. + * reiser4/README */
  70980. +
  70981. +/* Tree operations. See fs/reiser4/tree.c for comments */
  70982. +
  70983. +#if !defined( __REISER4_TREE_H__ )
  70984. +#define __REISER4_TREE_H__
  70985. +
  70986. +#include "forward.h"
  70987. +#include "debug.h"
  70988. +#include "dformat.h"
  70989. +#include "plugin/node/node.h"
  70990. +#include "plugin/plugin.h"
  70991. +#include "znode.h"
  70992. +#include "tap.h"
  70993. +
  70994. +#include <linux/types.h> /* for __u?? */
  70995. +#include <linux/fs.h> /* for struct super_block */
  70996. +#include <linux/spinlock.h>
  70997. +#include <linux/sched.h> /* for struct task_struct */
  70998. +
  70999. +/* fictive block number never actually used */
  71000. +extern const reiser4_block_nr UBER_TREE_ADDR;
  71001. +
  71002. +/* &cbk_cache_slot - entry in a coord cache.
  71003. +
  71004. + This is entry in a coord_by_key (cbk) cache, represented by
  71005. + &cbk_cache.
  71006. +
  71007. +*/
  71008. +typedef struct cbk_cache_slot {
  71009. + /* cached node */
  71010. + znode *node;
  71011. + /* linkage to the next cbk cache slot in a LRU order */
  71012. + struct list_head lru;
  71013. +} cbk_cache_slot;
  71014. +
  71015. +/* &cbk_cache - coord cache. This is part of reiser4_tree.
  71016. +
  71017. + cbk_cache is supposed to speed up tree lookups by caching results of recent
  71018. + successful lookups (we don't cache negative results as dentry cache
  71019. + does). Cache consists of relatively small number of entries kept in a LRU
  71020. + order. Each entry (&cbk_cache_slot) contains a pointer to znode, from
  71021. + which we can obtain a range of keys that covered by this znode. Before
  71022. + embarking into real tree traversal we scan cbk_cache slot by slot and for
  71023. + each slot check whether key we are looking for is between minimal and
  71024. + maximal keys for node pointed to by this slot. If no match is found, real
  71025. + tree traversal is performed and if result is successful, appropriate entry
  71026. + is inserted into cache, possibly pulling least recently used entry out of
  71027. + it.
  71028. +
  71029. + Tree spin lock is used to protect coord cache. If contention for this
  71030. + lock proves to be too high, more finer grained locking can be added.
  71031. +
  71032. + Invariants involving parts of this data-type:
  71033. +
  71034. + [cbk-cache-invariant]
  71035. +*/
  71036. +typedef struct cbk_cache {
  71037. + /* serializator */
  71038. + rwlock_t guard;
  71039. + int nr_slots;
  71040. + /* head of LRU list of cache slots */
  71041. + struct list_head lru;
  71042. + /* actual array of slots */
  71043. + cbk_cache_slot *slot;
  71044. +} cbk_cache;
  71045. +
  71046. +/* level_lookup_result - possible outcome of looking up key at some level.
  71047. + This is used by coord_by_key when traversing tree downward. */
  71048. +typedef enum {
  71049. + /* continue to the next level */
  71050. + LOOKUP_CONT,
  71051. + /* done. Either required item was found, or we can prove it
  71052. + doesn't exist, or some error occurred. */
  71053. + LOOKUP_DONE,
  71054. + /* restart traversal from the root. Infamous "repetition". */
  71055. + LOOKUP_REST
  71056. +} level_lookup_result;
  71057. +
  71058. +/* This is representation of internal reiser4 tree where all file-system
  71059. + data and meta-data are stored. This structure is passed to all tree
  71060. + manipulation functions. It's different from the super block because:
  71061. + we don't want to limit ourselves to strictly one to one mapping
  71062. + between super blocks and trees, and, because they are logically
  71063. + different: there are things in a super block that have no relation to
  71064. + the tree (bitmaps, journalling area, mount options, etc.) and there
  71065. + are things in a tree that bear no relation to the super block, like
  71066. + tree of znodes.
  71067. +
  71068. + At this time, there is only one tree
  71069. + per filesystem, and this struct is part of the super block. We only
  71070. + call the super block the super block for historical reasons (most
  71071. + other filesystems call the per filesystem metadata the super block).
  71072. +*/
  71073. +
  71074. +struct reiser4_tree {
  71075. + /* block_nr == 0 is fake znode. Write lock it, while changing
  71076. + tree height. */
  71077. + /* disk address of root node of a tree */
  71078. + reiser4_block_nr root_block;
  71079. +
  71080. + /* level of the root node. If this is 1, tree consists of root
  71081. + node only */
  71082. + tree_level height;
  71083. +
  71084. + /*
  71085. + * this is cached here avoid calling plugins through function
  71086. + * dereference all the time.
  71087. + */
  71088. + __u64 estimate_one_insert;
  71089. +
  71090. + /* cache of recent tree lookup results */
  71091. + cbk_cache cbk_cache;
  71092. +
  71093. + /* hash table to look up znodes by block number. */
  71094. + z_hash_table zhash_table;
  71095. + z_hash_table zfake_table;
  71096. + /* hash table to look up jnodes by inode and offset. */
  71097. + j_hash_table jhash_table;
  71098. +
  71099. + /* lock protecting:
  71100. + - parent pointers,
  71101. + - sibling pointers,
  71102. + - znode hash table
  71103. + - coord cache
  71104. + */
  71105. + /* NOTE: The "giant" tree lock can be replaced by more spin locks,
  71106. + hoping they will be less contented. We can use one spin lock per one
  71107. + znode hash bucket. With adding of some code complexity, sibling
  71108. + pointers can be protected by both znode spin locks. However it looks
  71109. + more SMP scalable we should test this locking change on n-ways (n >
  71110. + 4) SMP machines. Current 4-ways machine test does not show that tree
  71111. + lock is contented and it is a bottleneck (2003.07.25). */
  71112. +
  71113. + rwlock_t tree_lock;
  71114. +
  71115. + /* lock protecting delimiting keys */
  71116. + rwlock_t dk_lock;
  71117. +
  71118. + /* spin lock protecting znode_epoch */
  71119. + spinlock_t epoch_lock;
  71120. + /* version stamp used to mark znode updates. See seal.[ch] for more
  71121. + * information. */
  71122. + __u64 znode_epoch;
  71123. +
  71124. + znode *uber;
  71125. + node_plugin *nplug;
  71126. + struct super_block *super;
  71127. + struct {
  71128. + /* carry flags used for insertion of new nodes */
  71129. + __u32 new_node_flags;
  71130. + /* carry flags used for insertion of new extents */
  71131. + __u32 new_extent_flags;
  71132. + /* carry flags used for paste operations */
  71133. + __u32 paste_flags;
  71134. + /* carry flags used for insert operations */
  71135. + __u32 insert_flags;
  71136. + } carry;
  71137. +};
  71138. +
  71139. +extern int reiser4_init_tree(reiser4_tree * tree,
  71140. + const reiser4_block_nr * root_block,
  71141. + tree_level height, node_plugin * default_plugin);
  71142. +extern void reiser4_done_tree(reiser4_tree * tree);
  71143. +
  71144. +/* cbk flags: options for coord_by_key() */
  71145. +typedef enum {
  71146. + /* coord_by_key() is called for insertion. This is necessary because
  71147. + of extents being located at the twig level. For explanation, see
  71148. + comment just above is_next_item_internal().
  71149. + */
  71150. + CBK_FOR_INSERT = (1 << 0),
  71151. + /* coord_by_key() is called with key that is known to be unique */
  71152. + CBK_UNIQUE = (1 << 1),
  71153. + /* coord_by_key() can trust delimiting keys. This options is not user
  71154. + accessible. coord_by_key() will set it automatically. It will be
  71155. + only cleared by special-case in extents-on-the-twig-level handling
  71156. + where it is necessary to insert item with a key smaller than
  71157. + leftmost key in a node. This is necessary because of extents being
  71158. + located at the twig level. For explanation, see comment just above
  71159. + is_next_item_internal().
  71160. + */
  71161. + CBK_TRUST_DK = (1 << 2),
  71162. + CBK_READA = (1 << 3), /* original: readahead leaves which contain items of certain file */
  71163. + CBK_READDIR_RA = (1 << 4), /* readdir: readahead whole directory and all its stat datas */
  71164. + CBK_DKSET = (1 << 5),
  71165. + CBK_EXTENDED_COORD = (1 << 6), /* coord_t is actually */
  71166. + CBK_IN_CACHE = (1 << 7), /* node is already in cache */
  71167. + CBK_USE_CRABLOCK = (1 << 8) /* use crab_lock in stead of long term
  71168. + * lock */
  71169. +} cbk_flags;
  71170. +
  71171. +/* insertion outcome. IBK = insert by key */
  71172. +typedef enum {
  71173. + IBK_INSERT_OK = 0,
  71174. + IBK_ALREADY_EXISTS = -EEXIST,
  71175. + IBK_IO_ERROR = -EIO,
  71176. + IBK_NO_SPACE = -E_NODE_FULL,
  71177. + IBK_OOM = -ENOMEM
  71178. +} insert_result;
  71179. +
  71180. +#define IS_CBKERR(err) ((err) != CBK_COORD_FOUND && (err) != CBK_COORD_NOTFOUND)
  71181. +
  71182. +typedef int (*tree_iterate_actor_t) (reiser4_tree * tree, coord_t * coord,
  71183. + lock_handle * lh, void *arg);
  71184. +extern int reiser4_iterate_tree(reiser4_tree * tree, coord_t * coord,
  71185. + lock_handle * lh,
  71186. + tree_iterate_actor_t actor, void *arg,
  71187. + znode_lock_mode mode, int through_units_p);
  71188. +extern int get_uber_znode(reiser4_tree * tree, znode_lock_mode mode,
  71189. + znode_lock_request pri, lock_handle * lh);
  71190. +
  71191. +/* return node plugin of @node */
  71192. +static inline node_plugin *node_plugin_by_node(const znode *
  71193. + node /* node to query */ )
  71194. +{
  71195. + assert("vs-213", node != NULL);
  71196. + assert("vs-214", znode_is_loaded(node));
  71197. +
  71198. + return node->nplug;
  71199. +}
  71200. +
  71201. +/* number of items in @node */
  71202. +static inline pos_in_node_t node_num_items(const znode * node)
  71203. +{
  71204. + assert("nikita-2754", znode_is_loaded(node));
  71205. + assert("nikita-2468",
  71206. + node_plugin_by_node(node)->num_of_items(node) == node->nr_items);
  71207. +
  71208. + return node->nr_items;
  71209. +}
  71210. +
  71211. +/* Return the number of items at the present node. Asserts coord->node !=
  71212. + NULL. */
  71213. +static inline unsigned coord_num_items(const coord_t * coord)
  71214. +{
  71215. + assert("jmacd-9805", coord->node != NULL);
  71216. +
  71217. + return node_num_items(coord->node);
  71218. +}
  71219. +
  71220. +/* true if @node is empty */
  71221. +static inline int node_is_empty(const znode * node)
  71222. +{
  71223. + return node_num_items(node) == 0;
  71224. +}
  71225. +
  71226. +typedef enum {
  71227. + SHIFTED_SOMETHING = 0,
  71228. + SHIFT_NO_SPACE = -E_NODE_FULL,
  71229. + SHIFT_IO_ERROR = -EIO,
  71230. + SHIFT_OOM = -ENOMEM,
  71231. +} shift_result;
  71232. +
  71233. +extern node_plugin *node_plugin_by_coord(const coord_t * coord);
  71234. +extern int is_coord_in_node(const coord_t * coord);
  71235. +extern int key_in_node(const reiser4_key *, const coord_t *);
  71236. +extern void coord_item_move_to(coord_t * coord, int items);
  71237. +extern void coord_unit_move_to(coord_t * coord, int units);
  71238. +
  71239. +/* there are two types of repetitive accesses (ra): intra-syscall
  71240. + (local) and inter-syscall (global). Local ra is used when
  71241. + during single syscall we add/delete several items and units in the
  71242. + same place in a tree. Note that plan-A fragments local ra by
  71243. + separating stat-data and file body in key-space. Global ra is
  71244. + used when user does repetitive modifications in the same place in a
  71245. + tree.
  71246. +
  71247. + Our ra implementation serves following purposes:
  71248. + 1 it affects balancing decisions so that next operation in a row
  71249. + can be performed faster;
  71250. + 2 it affects lower-level read-ahead in page-cache;
  71251. + 3 it allows to avoid unnecessary lookups by maintaining some state
  71252. + across several operations (this is only for local ra);
  71253. + 4 it leaves room for lazy-micro-balancing: when we start a sequence of
  71254. + operations they are performed without actually doing any intra-node
  71255. + shifts, until we finish sequence or scope of sequence leaves
  71256. + current node, only then we really pack node (local ra only).
  71257. +*/
  71258. +
  71259. +/* another thing that can be useful is to keep per-tree and/or
  71260. + per-process cache of recent lookups. This cache can be organised as a
  71261. + list of block numbers of formatted nodes sorted by starting key in
  71262. + this node. Balancings should invalidate appropriate parts of this
  71263. + cache.
  71264. +*/
  71265. +
  71266. +lookup_result coord_by_key(reiser4_tree * tree, const reiser4_key * key,
  71267. + coord_t * coord, lock_handle * handle,
  71268. + znode_lock_mode lock, lookup_bias bias,
  71269. + tree_level lock_level, tree_level stop_level,
  71270. + __u32 flags, ra_info_t *);
  71271. +
  71272. +lookup_result reiser4_object_lookup(struct inode *object,
  71273. + const reiser4_key * key,
  71274. + coord_t * coord,
  71275. + lock_handle * lh,
  71276. + znode_lock_mode lock_mode,
  71277. + lookup_bias bias,
  71278. + tree_level lock_level,
  71279. + tree_level stop_level,
  71280. + __u32 flags, ra_info_t * info);
  71281. +
  71282. +insert_result insert_by_key(reiser4_tree * tree, const reiser4_key * key,
  71283. + reiser4_item_data * data, coord_t * coord,
  71284. + lock_handle * lh,
  71285. + tree_level stop_level, __u32 flags);
  71286. +insert_result insert_by_coord(coord_t * coord,
  71287. + reiser4_item_data * data, const reiser4_key * key,
  71288. + lock_handle * lh, __u32);
  71289. +insert_result insert_extent_by_coord(coord_t * coord,
  71290. + reiser4_item_data * data,
  71291. + const reiser4_key * key, lock_handle * lh);
  71292. +int cut_node_content(coord_t * from, coord_t * to, const reiser4_key * from_key,
  71293. + const reiser4_key * to_key,
  71294. + reiser4_key * smallest_removed);
  71295. +int kill_node_content(coord_t * from, coord_t * to,
  71296. + const reiser4_key * from_key, const reiser4_key * to_key,
  71297. + reiser4_key * smallest_removed,
  71298. + znode * locked_left_neighbor, struct inode *inode,
  71299. + int truncate);
  71300. +
  71301. +int reiser4_resize_item(coord_t * coord, reiser4_item_data * data,
  71302. + reiser4_key * key, lock_handle * lh, cop_insert_flag);
  71303. +int insert_into_item(coord_t * coord, lock_handle * lh, const reiser4_key * key,
  71304. + reiser4_item_data * data, unsigned);
  71305. +int reiser4_insert_flow(coord_t * coord, lock_handle * lh, flow_t * f);
  71306. +int find_new_child_ptr(znode * parent, znode * child, znode * left,
  71307. + coord_t * result);
  71308. +
  71309. +int shift_right_of_but_excluding_insert_coord(coord_t * insert_coord);
  71310. +int shift_left_of_and_including_insert_coord(coord_t * insert_coord);
  71311. +
  71312. +void fake_kill_hook_tail(struct inode *, loff_t start, loff_t end, int);
  71313. +
  71314. +extern int cut_tree_worker_common(tap_t *, const reiser4_key *,
  71315. + const reiser4_key *, reiser4_key *,
  71316. + struct inode *, int, int *);
  71317. +extern int reiser4_cut_tree_object(reiser4_tree *, const reiser4_key *,
  71318. + const reiser4_key *, reiser4_key *,
  71319. + struct inode *, int, int *);
  71320. +extern int reiser4_cut_tree(reiser4_tree * tree, const reiser4_key * from,
  71321. + const reiser4_key * to, struct inode *, int);
  71322. +
  71323. +extern int reiser4_delete_node(znode *, reiser4_key *, struct inode *, int);
  71324. +extern int check_tree_pointer(const coord_t * pointer, const znode * child);
  71325. +extern int find_new_child_ptr(znode * parent, znode * child UNUSED_ARG,
  71326. + znode * left, coord_t * result);
  71327. +extern int find_child_ptr(znode * parent, znode * child, coord_t * result);
  71328. +extern int set_child_delimiting_keys(znode * parent, const coord_t * in_parent,
  71329. + znode * child);
  71330. +extern znode *child_znode(const coord_t * in_parent, znode * parent,
  71331. + int incore_p, int setup_dkeys_p);
  71332. +
  71333. +extern int cbk_cache_init(cbk_cache * cache);
  71334. +extern void cbk_cache_done(cbk_cache * cache);
  71335. +extern void cbk_cache_invalidate(const znode * node, reiser4_tree * tree);
  71336. +
  71337. +extern char *sprint_address(const reiser4_block_nr * block);
  71338. +
  71339. +#if REISER4_DEBUG
  71340. +extern void print_coord_content(const char *prefix, coord_t * p);
  71341. +extern void reiser4_print_address(const char *prefix,
  71342. + const reiser4_block_nr * block);
  71343. +extern void print_tree_rec(const char *prefix, reiser4_tree * tree,
  71344. + __u32 flags);
  71345. +extern void check_dkeys(znode *node);
  71346. +#else
  71347. +#define print_coord_content(p, c) noop
  71348. +#define reiser4_print_address(p, b) noop
  71349. +#endif
  71350. +
  71351. +extern void forget_znode(lock_handle * handle);
  71352. +extern int deallocate_znode(znode * node);
  71353. +
  71354. +extern int is_disk_addr_unallocated(const reiser4_block_nr * addr);
  71355. +
  71356. +/* struct used internally to pack all numerous arguments of tree lookup.
  71357. + Used to avoid passing a lot of arguments to helper functions. */
  71358. +typedef struct cbk_handle {
  71359. + /* tree we are in */
  71360. + reiser4_tree *tree;
  71361. + /* key we are going after */
  71362. + const reiser4_key *key;
  71363. + /* coord we will store result in */
  71364. + coord_t *coord;
  71365. + /* type of lock to take on target node */
  71366. + znode_lock_mode lock_mode;
  71367. + /* lookup bias. See comments at the declaration of lookup_bias */
  71368. + lookup_bias bias;
  71369. + /* lock level: level starting from which tree traversal starts taking
  71370. + * write locks. */
  71371. + tree_level lock_level;
  71372. + /* level where search will stop. Either item will be found between
  71373. + lock_level and stop_level, or CBK_COORD_NOTFOUND will be
  71374. + returned.
  71375. + */
  71376. + tree_level stop_level;
  71377. + /* level we are currently at */
  71378. + tree_level level;
  71379. + /* block number of @active node. Tree traversal operates on two
  71380. + nodes: active and parent. */
  71381. + reiser4_block_nr block;
  71382. + /* put here error message to be printed by caller */
  71383. + const char *error;
  71384. + /* result passed back to caller */
  71385. + int result;
  71386. + /* lock handles for active and parent */
  71387. + lock_handle *parent_lh;
  71388. + lock_handle *active_lh;
  71389. + reiser4_key ld_key;
  71390. + reiser4_key rd_key;
  71391. + /* flags, passed to the cbk routine. Bits of this bitmask are defined
  71392. + in tree.h:cbk_flags enum. */
  71393. + __u32 flags;
  71394. + ra_info_t *ra_info;
  71395. + struct inode *object;
  71396. +} cbk_handle;
  71397. +
  71398. +extern znode_lock_mode cbk_lock_mode(tree_level level, cbk_handle * h);
  71399. +
  71400. +/* eottl.c */
  71401. +extern int handle_eottl(cbk_handle *h, int *outcome);
  71402. +
  71403. +int lookup_multikey(cbk_handle * handle, int nr_keys);
  71404. +int lookup_couple(reiser4_tree * tree,
  71405. + const reiser4_key * key1, const reiser4_key * key2,
  71406. + coord_t * coord1, coord_t * coord2,
  71407. + lock_handle * lh1, lock_handle * lh2,
  71408. + znode_lock_mode lock_mode, lookup_bias bias,
  71409. + tree_level lock_level, tree_level stop_level, __u32 flags,
  71410. + int *result1, int *result2);
  71411. +
  71412. +static inline void read_lock_tree(reiser4_tree *tree)
  71413. +{
  71414. + /* check that tree is not locked */
  71415. + assert("", (LOCK_CNT_NIL(rw_locked_tree) &&
  71416. + LOCK_CNT_NIL(read_locked_tree) &&
  71417. + LOCK_CNT_NIL(write_locked_tree)));
  71418. + /* check that spinlocks of lower priorities are not held */
  71419. + assert("", (LOCK_CNT_NIL(spin_locked_txnh) &&
  71420. + LOCK_CNT_NIL(rw_locked_dk) &&
  71421. + LOCK_CNT_NIL(spin_locked_stack)));
  71422. +
  71423. + read_lock(&(tree->tree_lock));
  71424. +
  71425. + LOCK_CNT_INC(read_locked_tree);
  71426. + LOCK_CNT_INC(rw_locked_tree);
  71427. + LOCK_CNT_INC(spin_locked);
  71428. +}
  71429. +
  71430. +static inline void read_unlock_tree(reiser4_tree *tree)
  71431. +{
  71432. + assert("nikita-1375", LOCK_CNT_GTZ(read_locked_tree));
  71433. + assert("nikita-1376", LOCK_CNT_GTZ(rw_locked_tree));
  71434. + assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
  71435. +
  71436. + LOCK_CNT_DEC(read_locked_tree);
  71437. + LOCK_CNT_DEC(rw_locked_tree);
  71438. + LOCK_CNT_DEC(spin_locked);
  71439. +
  71440. + read_unlock(&(tree->tree_lock));
  71441. +}
  71442. +
  71443. +static inline void write_lock_tree(reiser4_tree *tree)
  71444. +{
  71445. + /* check that tree is not locked */
  71446. + assert("", (LOCK_CNT_NIL(rw_locked_tree) &&
  71447. + LOCK_CNT_NIL(read_locked_tree) &&
  71448. + LOCK_CNT_NIL(write_locked_tree)));
  71449. + /* check that spinlocks of lower priorities are not held */
  71450. + assert("", (LOCK_CNT_NIL(spin_locked_txnh) &&
  71451. + LOCK_CNT_NIL(rw_locked_dk) &&
  71452. + LOCK_CNT_NIL(spin_locked_stack)));
  71453. +
  71454. + write_lock(&(tree->tree_lock));
  71455. +
  71456. + LOCK_CNT_INC(write_locked_tree);
  71457. + LOCK_CNT_INC(rw_locked_tree);
  71458. + LOCK_CNT_INC(spin_locked);
  71459. +}
  71460. +
  71461. +static inline void write_unlock_tree(reiser4_tree *tree)
  71462. +{
  71463. + assert("nikita-1375", LOCK_CNT_GTZ(write_locked_tree));
  71464. + assert("nikita-1376", LOCK_CNT_GTZ(rw_locked_tree));
  71465. + assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
  71466. +
  71467. + LOCK_CNT_DEC(write_locked_tree);
  71468. + LOCK_CNT_DEC(rw_locked_tree);
  71469. + LOCK_CNT_DEC(spin_locked);
  71470. +
  71471. + write_unlock(&(tree->tree_lock));
  71472. +}
  71473. +
  71474. +static inline void read_lock_dk(reiser4_tree *tree)
  71475. +{
  71476. + /* check that dk is not locked */
  71477. + assert("", (LOCK_CNT_NIL(rw_locked_dk) &&
  71478. + LOCK_CNT_NIL(read_locked_dk) &&
  71479. + LOCK_CNT_NIL(write_locked_dk)));
  71480. + /* check that spinlocks of lower priorities are not held */
  71481. + assert("", LOCK_CNT_NIL(spin_locked_stack));
  71482. +
  71483. + read_lock(&((tree)->dk_lock));
  71484. +
  71485. + LOCK_CNT_INC(read_locked_dk);
  71486. + LOCK_CNT_INC(rw_locked_dk);
  71487. + LOCK_CNT_INC(spin_locked);
  71488. +}
  71489. +
  71490. +static inline void read_unlock_dk(reiser4_tree *tree)
  71491. +{
  71492. + assert("nikita-1375", LOCK_CNT_GTZ(read_locked_dk));
  71493. + assert("nikita-1376", LOCK_CNT_GTZ(rw_locked_dk));
  71494. + assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
  71495. +
  71496. + LOCK_CNT_DEC(read_locked_dk);
  71497. + LOCK_CNT_DEC(rw_locked_dk);
  71498. + LOCK_CNT_DEC(spin_locked);
  71499. +
  71500. + read_unlock(&(tree->dk_lock));
  71501. +}
  71502. +
  71503. +static inline void write_lock_dk(reiser4_tree *tree)
  71504. +{
  71505. + /* check that dk is not locked */
  71506. + assert("", (LOCK_CNT_NIL(rw_locked_dk) &&
  71507. + LOCK_CNT_NIL(read_locked_dk) &&
  71508. + LOCK_CNT_NIL(write_locked_dk)));
  71509. + /* check that spinlocks of lower priorities are not held */
  71510. + assert("", LOCK_CNT_NIL(spin_locked_stack));
  71511. +
  71512. + write_lock(&((tree)->dk_lock));
  71513. +
  71514. + LOCK_CNT_INC(write_locked_dk);
  71515. + LOCK_CNT_INC(rw_locked_dk);
  71516. + LOCK_CNT_INC(spin_locked);
  71517. +}
  71518. +
  71519. +static inline void write_unlock_dk(reiser4_tree *tree)
  71520. +{
  71521. + assert("nikita-1375", LOCK_CNT_GTZ(write_locked_dk));
  71522. + assert("nikita-1376", LOCK_CNT_GTZ(rw_locked_dk));
  71523. + assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
  71524. +
  71525. + LOCK_CNT_DEC(write_locked_dk);
  71526. + LOCK_CNT_DEC(rw_locked_dk);
  71527. + LOCK_CNT_DEC(spin_locked);
  71528. +
  71529. + write_unlock(&(tree->dk_lock));
  71530. +}
  71531. +
  71532. +/* estimate api. Implementation is in estimate.c */
  71533. +reiser4_block_nr estimate_one_insert_item(reiser4_tree *);
  71534. +reiser4_block_nr estimate_one_insert_into_item(reiser4_tree *);
  71535. +reiser4_block_nr estimate_insert_flow(tree_level);
  71536. +reiser4_block_nr estimate_one_item_removal(reiser4_tree *);
  71537. +reiser4_block_nr calc_estimate_one_insert(tree_level);
  71538. +reiser4_block_nr estimate_dirty_cluster(struct inode *);
  71539. +reiser4_block_nr estimate_insert_cluster(struct inode *);
  71540. +reiser4_block_nr estimate_update_cluster(struct inode *);
  71541. +
  71542. +/* __REISER4_TREE_H__ */
  71543. +#endif
  71544. +
  71545. +/* Make Linus happy.
  71546. + Local variables:
  71547. + c-indentation-style: "K&R"
  71548. + mode-name: "LC"
  71549. + c-basic-offset: 8
  71550. + tab-width: 8
  71551. + fill-column: 120
  71552. + scroll-step: 1
  71553. + End:
  71554. +*/
  71555. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/tree_mod.c linux-4.14.2/fs/reiser4/tree_mod.c
  71556. --- linux-4.14.2.orig/fs/reiser4/tree_mod.c 1970-01-01 01:00:00.000000000 +0100
  71557. +++ linux-4.14.2/fs/reiser4/tree_mod.c 2017-11-26 22:13:09.000000000 +0100
  71558. @@ -0,0 +1,387 @@
  71559. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  71560. + * reiser4/README */
  71561. +
  71562. +/*
  71563. + * Functions to add/delete new nodes to/from the tree.
  71564. + *
  71565. + * Functions from this file are used by carry (see carry*) to handle:
  71566. + *
  71567. + * . insertion of new formatted node into tree
  71568. + *
  71569. + * . addition of new tree root, increasing tree height
  71570. + *
  71571. + * . removing tree root, decreasing tree height
  71572. + *
  71573. + */
  71574. +
  71575. +#include "forward.h"
  71576. +#include "debug.h"
  71577. +#include "dformat.h"
  71578. +#include "key.h"
  71579. +#include "coord.h"
  71580. +#include "plugin/plugin.h"
  71581. +#include "jnode.h"
  71582. +#include "znode.h"
  71583. +#include "tree_mod.h"
  71584. +#include "block_alloc.h"
  71585. +#include "tree_walk.h"
  71586. +#include "tree.h"
  71587. +#include "super.h"
  71588. +
  71589. +#include <linux/err.h>
  71590. +
  71591. +static int add_child_ptr(znode * parent, znode * child);
  71592. +/* warning only issued if error is not -E_REPEAT */
  71593. +#define ewarning( error, ... ) \
  71594. + if( ( error ) != -E_REPEAT ) \
  71595. + warning( __VA_ARGS__ )
  71596. +
  71597. +/* allocate new node on the @level and immediately on the right of @brother. */
  71598. +znode * reiser4_new_node(znode * brother /* existing left neighbor
  71599. + * of new node */,
  71600. + tree_level level /* tree level at which new node is to
  71601. + * be allocated */)
  71602. +{
  71603. + znode *result;
  71604. + int retcode;
  71605. + reiser4_block_nr blocknr;
  71606. +
  71607. + assert("nikita-930", brother != NULL);
  71608. + assert("umka-264", level < REAL_MAX_ZTREE_HEIGHT);
  71609. +
  71610. + retcode = assign_fake_blocknr_formatted(&blocknr);
  71611. + if (retcode == 0) {
  71612. + result =
  71613. + zget(znode_get_tree(brother), &blocknr, NULL, level,
  71614. + reiser4_ctx_gfp_mask_get());
  71615. + if (IS_ERR(result)) {
  71616. + ewarning(PTR_ERR(result), "nikita-929",
  71617. + "Cannot allocate znode for carry: %li",
  71618. + PTR_ERR(result));
  71619. + return result;
  71620. + }
  71621. + /* cheap test, can be executed even when debugging is off */
  71622. + if (!znode_just_created(result)) {
  71623. + warning("nikita-2213",
  71624. + "Allocated already existing block: %llu",
  71625. + (unsigned long long)blocknr);
  71626. + zput(result);
  71627. + return ERR_PTR(RETERR(-EIO));
  71628. + }
  71629. +
  71630. + assert("nikita-931", result != NULL);
  71631. + result->nplug = znode_get_tree(brother)->nplug;
  71632. + assert("nikita-933", result->nplug != NULL);
  71633. +
  71634. + retcode = zinit_new(result, reiser4_ctx_gfp_mask_get());
  71635. + if (retcode == 0) {
  71636. + ZF_SET(result, JNODE_CREATED);
  71637. + zrelse(result);
  71638. + } else {
  71639. + zput(result);
  71640. + result = ERR_PTR(retcode);
  71641. + }
  71642. + } else {
  71643. + /* failure to allocate new node during balancing.
  71644. + This should never happen. Ever. Returning -E_REPEAT
  71645. + is not viable solution, because "out of disk space"
  71646. + is not transient error that will go away by itself.
  71647. + */
  71648. + ewarning(retcode, "nikita-928",
  71649. + "Cannot allocate block for carry: %i", retcode);
  71650. + result = ERR_PTR(retcode);
  71651. + }
  71652. + assert("nikita-1071", result != NULL);
  71653. + return result;
  71654. +}
  71655. +
  71656. +/* allocate new root and add it to the tree
  71657. +
  71658. + This helper function is called by add_new_root().
  71659. +
  71660. +*/
  71661. +znode *reiser4_add_tree_root(znode * old_root /* existing tree root */ ,
  71662. + znode * fake /* "fake" znode */ )
  71663. +{
  71664. + reiser4_tree *tree = znode_get_tree(old_root);
  71665. + znode *new_root = NULL; /* to shut gcc up */
  71666. + int result;
  71667. +
  71668. + assert("nikita-1069", old_root != NULL);
  71669. + assert("umka-262", fake != NULL);
  71670. + assert("umka-263", tree != NULL);
  71671. +
  71672. + /* "fake" znode---one always hanging just above current root. This
  71673. + node is locked when new root is created or existing root is
  71674. + deleted. Downward tree traversal takes lock on it before taking
  71675. + lock on a root node. This avoids race conditions with root
  71676. + manipulations.
  71677. +
  71678. + */
  71679. + assert("nikita-1348", znode_above_root(fake));
  71680. + assert("nikita-1211", znode_is_root(old_root));
  71681. +
  71682. + result = 0;
  71683. + if (tree->height >= REAL_MAX_ZTREE_HEIGHT) {
  71684. + warning("nikita-1344", "Tree is too tall: %i", tree->height);
  71685. + /* ext2 returns -ENOSPC when it runs out of free inodes with a
  71686. + following comment (fs/ext2/ialloc.c:441): Is it really
  71687. + ENOSPC?
  71688. +
  71689. + -EXFULL? -EINVAL?
  71690. + */
  71691. + result = RETERR(-ENOSPC);
  71692. + } else {
  71693. + /* Allocate block for new root. It's not that
  71694. + important where it will be allocated, as root is
  71695. + almost always in memory. Moreover, allocate on
  71696. + flush can be going here.
  71697. + */
  71698. + assert("nikita-1448", znode_is_root(old_root));
  71699. + new_root = reiser4_new_node(fake, tree->height + 1);
  71700. + if (!IS_ERR(new_root) && (result = zload(new_root)) == 0) {
  71701. + lock_handle rlh;
  71702. +
  71703. + init_lh(&rlh);
  71704. + result =
  71705. + longterm_lock_znode(&rlh, new_root,
  71706. + ZNODE_WRITE_LOCK,
  71707. + ZNODE_LOCK_LOPRI);
  71708. + if (result == 0) {
  71709. + parent_coord_t *in_parent;
  71710. +
  71711. + znode_make_dirty(fake);
  71712. +
  71713. + /* new root is a child of "fake" node */
  71714. + write_lock_tree(tree);
  71715. +
  71716. + ++tree->height;
  71717. +
  71718. + /* recalculate max balance overhead */
  71719. + tree->estimate_one_insert =
  71720. + calc_estimate_one_insert(tree->height);
  71721. +
  71722. + tree->root_block = *znode_get_block(new_root);
  71723. + in_parent = &new_root->in_parent;
  71724. + init_parent_coord(in_parent, fake);
  71725. + /* manually insert new root into sibling
  71726. + * list. With this all nodes involved into
  71727. + * balancing are connected after balancing is
  71728. + * done---useful invariant to check. */
  71729. + sibling_list_insert_nolock(new_root, NULL);
  71730. + write_unlock_tree(tree);
  71731. +
  71732. + /* insert into new root pointer to the
  71733. + @old_root. */
  71734. + assert("nikita-1110",
  71735. + WITH_DATA(new_root,
  71736. + node_is_empty(new_root)));
  71737. + write_lock_dk(tree);
  71738. + znode_set_ld_key(new_root, reiser4_min_key());
  71739. + znode_set_rd_key(new_root, reiser4_max_key());
  71740. + write_unlock_dk(tree);
  71741. + if (REISER4_DEBUG) {
  71742. + ZF_CLR(old_root, JNODE_LEFT_CONNECTED);
  71743. + ZF_CLR(old_root, JNODE_RIGHT_CONNECTED);
  71744. + ZF_SET(old_root, JNODE_ORPHAN);
  71745. + }
  71746. + result = add_child_ptr(new_root, old_root);
  71747. + done_lh(&rlh);
  71748. + }
  71749. + zrelse(new_root);
  71750. + }
  71751. + }
  71752. + if (result != 0)
  71753. + new_root = ERR_PTR(result);
  71754. + return new_root;
  71755. +}
  71756. +
  71757. +/* build &reiser4_item_data for inserting child pointer
  71758. +
  71759. + Build &reiser4_item_data that can be later used to insert pointer to @child
  71760. + in its parent.
  71761. +
  71762. +*/
  71763. +void build_child_ptr_data(znode * child /* node pointer to which will be
  71764. + * inserted */ ,
  71765. + reiser4_item_data * data /* where to store result */ )
  71766. +{
  71767. + assert("nikita-1116", child != NULL);
  71768. + assert("nikita-1117", data != NULL);
  71769. +
  71770. + /*
  71771. + * NOTE: use address of child's blocknr as address of data to be
  71772. + * inserted. As result of this data gets into on-disk structure in cpu
  71773. + * byte order. internal's create_hook converts it to little endian byte
  71774. + * order.
  71775. + */
  71776. + data->data = (char *)znode_get_block(child);
  71777. + /* data -> data is kernel space */
  71778. + data->user = 0;
  71779. + data->length = sizeof(reiser4_block_nr);
  71780. + /* FIXME-VS: hardcoded internal item? */
  71781. +
  71782. + /* AUDIT: Is it possible that "item_plugin_by_id" may find nothing? */
  71783. + data->iplug = item_plugin_by_id(NODE_POINTER_ID);
  71784. +}
  71785. +
  71786. +/* add pointer to @child into empty @parent.
  71787. +
  71788. + This is used when pointer to old root is inserted into new root which is
  71789. + empty.
  71790. +*/
  71791. +static int add_child_ptr(znode * parent, znode * child)
  71792. +{
  71793. + coord_t coord;
  71794. + reiser4_item_data data;
  71795. + int result;
  71796. + reiser4_key key;
  71797. +
  71798. + assert("nikita-1111", parent != NULL);
  71799. + assert("nikita-1112", child != NULL);
  71800. + assert("nikita-1115",
  71801. + znode_get_level(parent) == znode_get_level(child) + 1);
  71802. +
  71803. + result = zload(parent);
  71804. + if (result != 0)
  71805. + return result;
  71806. + assert("nikita-1113", node_is_empty(parent));
  71807. + coord_init_first_unit(&coord, parent);
  71808. +
  71809. + build_child_ptr_data(child, &data);
  71810. + data.arg = NULL;
  71811. +
  71812. + read_lock_dk(znode_get_tree(parent));
  71813. + key = *znode_get_ld_key(child);
  71814. + read_unlock_dk(znode_get_tree(parent));
  71815. +
  71816. + result = node_plugin_by_node(parent)->create_item(&coord, &key, &data,
  71817. + NULL);
  71818. + znode_make_dirty(parent);
  71819. + zrelse(parent);
  71820. + return result;
  71821. +}
  71822. +
  71823. +/* actually remove tree root */
  71824. +static int reiser4_kill_root(reiser4_tree * tree /* tree from which root is
  71825. + * being removed */,
  71826. + znode * old_root /* root node that is being
  71827. + * removed */ ,
  71828. + znode * new_root /* new root---sole child of
  71829. + * @old_root */,
  71830. + const reiser4_block_nr * new_root_blk /* disk address of
  71831. + * @new_root */)
  71832. +{
  71833. + znode *uber;
  71834. + int result;
  71835. + lock_handle handle_for_uber;
  71836. +
  71837. + assert("umka-265", tree != NULL);
  71838. + assert("nikita-1198", new_root != NULL);
  71839. + assert("nikita-1199",
  71840. + znode_get_level(new_root) + 1 == znode_get_level(old_root));
  71841. +
  71842. + assert("nikita-1201", znode_is_write_locked(old_root));
  71843. +
  71844. + assert("nikita-1203",
  71845. + disk_addr_eq(new_root_blk, znode_get_block(new_root)));
  71846. +
  71847. + init_lh(&handle_for_uber);
  71848. + /* obtain and lock "fake" znode protecting changes in tree height. */
  71849. + result = get_uber_znode(tree, ZNODE_WRITE_LOCK, ZNODE_LOCK_HIPRI,
  71850. + &handle_for_uber);
  71851. + if (result == 0) {
  71852. + uber = handle_for_uber.node;
  71853. +
  71854. + znode_make_dirty(uber);
  71855. +
  71856. + /* don't take long term lock a @new_root. Take spinlock. */
  71857. +
  71858. + write_lock_tree(tree);
  71859. +
  71860. + tree->root_block = *new_root_blk;
  71861. + --tree->height;
  71862. +
  71863. + /* recalculate max balance overhead */
  71864. + tree->estimate_one_insert =
  71865. + calc_estimate_one_insert(tree->height);
  71866. +
  71867. + assert("nikita-1202",
  71868. + tree->height == znode_get_level(new_root));
  71869. +
  71870. + /* new root is child on "fake" node */
  71871. + init_parent_coord(&new_root->in_parent, uber);
  71872. + ++uber->c_count;
  71873. +
  71874. + /* sibling_list_insert_nolock(new_root, NULL); */
  71875. + write_unlock_tree(tree);
  71876. +
  71877. + /* reinitialise old root. */
  71878. + result = init_znode(ZJNODE(old_root));
  71879. + znode_make_dirty(old_root);
  71880. + if (result == 0) {
  71881. + assert("nikita-1279", node_is_empty(old_root));
  71882. + ZF_SET(old_root, JNODE_HEARD_BANSHEE);
  71883. + old_root->c_count = 0;
  71884. + }
  71885. + }
  71886. + done_lh(&handle_for_uber);
  71887. +
  71888. + return result;
  71889. +}
  71890. +
  71891. +/* remove tree root
  71892. +
  71893. + This function removes tree root, decreasing tree height by one. Tree root
  71894. + and its only child (that is going to become new tree root) are write locked
  71895. + at the entry.
  71896. +
  71897. + To remove tree root we need to take lock on special "fake" znode that
  71898. + protects changes of tree height. See comments in reiser4_add_tree_root() for
  71899. + more on this.
  71900. +
  71901. + Also parent pointers have to be updated in
  71902. + old and new root. To simplify code, function is split into two parts: outer
  71903. + reiser4_kill_tree_root() collects all necessary arguments and calls
  71904. + reiser4_kill_root() to do the actual job.
  71905. +
  71906. +*/
  71907. +int reiser4_kill_tree_root(znode * old_root /* tree root that we are
  71908. + removing*/)
  71909. +{
  71910. + int result;
  71911. + coord_t down_link;
  71912. + znode *new_root;
  71913. + reiser4_tree *tree;
  71914. +
  71915. + assert("umka-266", current_tree != NULL);
  71916. + assert("nikita-1194", old_root != NULL);
  71917. + assert("nikita-1196", znode_is_root(old_root));
  71918. + assert("nikita-1200", node_num_items(old_root) == 1);
  71919. + assert("nikita-1401", znode_is_write_locked(old_root));
  71920. +
  71921. + coord_init_first_unit(&down_link, old_root);
  71922. +
  71923. + tree = znode_get_tree(old_root);
  71924. + new_root = child_znode(&down_link, old_root, 0, 1);
  71925. + if (!IS_ERR(new_root)) {
  71926. + result =
  71927. + reiser4_kill_root(tree, old_root, new_root,
  71928. + znode_get_block(new_root));
  71929. + zput(new_root);
  71930. + } else
  71931. + result = PTR_ERR(new_root);
  71932. +
  71933. + return result;
  71934. +}
  71935. +
  71936. +/* Make Linus happy.
  71937. + Local variables:
  71938. + c-indentation-style: "K&R"
  71939. + mode-name: "LC"
  71940. + c-basic-offset: 8
  71941. + tab-width: 8
  71942. + fill-column: 120
  71943. + scroll-step: 1
  71944. + End:
  71945. +*/
  71946. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/tree_mod.h linux-4.14.2/fs/reiser4/tree_mod.h
  71947. --- linux-4.14.2.orig/fs/reiser4/tree_mod.h 1970-01-01 01:00:00.000000000 +0100
  71948. +++ linux-4.14.2/fs/reiser4/tree_mod.h 2017-11-26 22:13:09.000000000 +0100
  71949. @@ -0,0 +1,29 @@
  71950. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  71951. + * reiser4/README */
  71952. +
  71953. +/* Functions to add/delete new nodes to/from the tree. See tree_mod.c for
  71954. + * comments. */
  71955. +
  71956. +#if !defined( __REISER4_TREE_MOD_H__ )
  71957. +#define __REISER4_TREE_MOD_H__
  71958. +
  71959. +#include "forward.h"
  71960. +
  71961. +znode *reiser4_new_node(znode * brother, tree_level level);
  71962. +znode *reiser4_add_tree_root(znode * old_root, znode * fake);
  71963. +int reiser4_kill_tree_root(znode * old_root);
  71964. +void build_child_ptr_data(znode * child, reiser4_item_data * data);
  71965. +
  71966. +/* __REISER4_TREE_MOD_H__ */
  71967. +#endif
  71968. +
  71969. +/* Make Linus happy.
  71970. + Local variables:
  71971. + c-indentation-style: "K&R"
  71972. + mode-name: "LC"
  71973. + c-basic-offset: 8
  71974. + tab-width: 8
  71975. + fill-column: 120
  71976. + scroll-step: 1
  71977. + End:
  71978. +*/
  71979. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/tree_walk.c linux-4.14.2/fs/reiser4/tree_walk.c
  71980. --- linux-4.14.2.orig/fs/reiser4/tree_walk.c 1970-01-01 01:00:00.000000000 +0100
  71981. +++ linux-4.14.2/fs/reiser4/tree_walk.c 2017-11-26 22:13:09.000000000 +0100
  71982. @@ -0,0 +1,927 @@
  71983. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  71984. + * reiser4/README */
  71985. +
  71986. +/* Routines and macros to:
  71987. +
  71988. + get_left_neighbor()
  71989. +
  71990. + get_right_neighbor()
  71991. +
  71992. + get_parent()
  71993. +
  71994. + get_first_child()
  71995. +
  71996. + get_last_child()
  71997. +
  71998. + various routines to walk the whole tree and do things to it like
  71999. + repack it, or move it to tertiary storage. Please make them as
  72000. + generic as is reasonable.
  72001. +
  72002. +*/
  72003. +
  72004. +#include "forward.h"
  72005. +#include "debug.h"
  72006. +#include "dformat.h"
  72007. +#include "coord.h"
  72008. +#include "plugin/item/item.h"
  72009. +#include "jnode.h"
  72010. +#include "znode.h"
  72011. +#include "tree_walk.h"
  72012. +#include "tree.h"
  72013. +#include "super.h"
  72014. +
  72015. +/* These macros are used internally in tree_walk.c in attempt to make
  72016. + lock_neighbor() code usable to build lock_parent(), lock_right_neighbor,
  72017. + lock_left_neighbor */
  72018. +#define GET_NODE_BY_PTR_OFFSET(node, off) (*(znode**)(((unsigned long)(node)) + (off)))
  72019. +#define FIELD_OFFSET(name) offsetof(znode, name)
  72020. +#define PARENT_PTR_OFFSET FIELD_OFFSET(in_parent.node)
  72021. +#define LEFT_PTR_OFFSET FIELD_OFFSET(left)
  72022. +#define RIGHT_PTR_OFFSET FIELD_OFFSET(right)
  72023. +
  72024. +/* This is the generic procedure to get and lock `generic' neighbor (left or
  72025. + right neighbor or parent). It implements common algorithm for all cases of
  72026. + getting lock on neighbor node, only znode structure field is different in
  72027. + each case. This is parameterized by ptr_offset argument, which is byte
  72028. + offset for the pointer to the desired neighbor within the current node's
  72029. + znode structure. This function should be called with the tree lock held */
  72030. +static int lock_neighbor(
  72031. + /* resulting lock handle */
  72032. + lock_handle * result,
  72033. + /* znode to lock */
  72034. + znode * node,
  72035. + /* pointer to neighbor (or parent) znode field offset, in bytes from
  72036. + the base address of znode structure */
  72037. + int ptr_offset,
  72038. + /* lock mode for longterm_lock_znode call */
  72039. + znode_lock_mode mode,
  72040. + /* lock request for longterm_lock_znode call */
  72041. + znode_lock_request req,
  72042. + /* GN_* flags */
  72043. + int flags, int rlocked)
  72044. +{
  72045. + reiser4_tree *tree = znode_get_tree(node);
  72046. + znode *neighbor;
  72047. + int ret;
  72048. +
  72049. + assert("umka-236", node != NULL);
  72050. + assert("umka-237", tree != NULL);
  72051. + assert_rw_locked(&(tree->tree_lock));
  72052. +
  72053. + if (flags & GN_TRY_LOCK)
  72054. + req |= ZNODE_LOCK_NONBLOCK;
  72055. + if (flags & GN_SAME_ATOM)
  72056. + req |= ZNODE_LOCK_DONT_FUSE;
  72057. +
  72058. + /* get neighbor's address by using of sibling link, quit while loop
  72059. + (and return) if link is not available. */
  72060. + while (1) {
  72061. + neighbor = GET_NODE_BY_PTR_OFFSET(node, ptr_offset);
  72062. +
  72063. + /* return -E_NO_NEIGHBOR if parent or side pointer is NULL or if
  72064. + * node pointed by it is not connected.
  72065. + *
  72066. + * However, GN_ALLOW_NOT_CONNECTED option masks "connected"
  72067. + * check and allows passing reference to not connected znode to
  72068. + * subsequent longterm_lock_znode() call. This kills possible
  72069. + * busy loop if we are trying to get longterm lock on locked but
  72070. + * not yet connected parent node. */
  72071. + if (neighbor == NULL || !((flags & GN_ALLOW_NOT_CONNECTED)
  72072. + || znode_is_connected(neighbor))) {
  72073. + return RETERR(-E_NO_NEIGHBOR);
  72074. + }
  72075. +
  72076. + /* protect it from deletion. */
  72077. + zref(neighbor);
  72078. +
  72079. + rlocked ? read_unlock_tree(tree) : write_unlock_tree(tree);
  72080. +
  72081. + ret = longterm_lock_znode(result, neighbor, mode, req);
  72082. +
  72083. + /* The lock handle obtains its own reference, release the one from above. */
  72084. + zput(neighbor);
  72085. +
  72086. + rlocked ? read_lock_tree(tree) : write_lock_tree(tree);
  72087. +
  72088. + /* restart if node we got reference to is being
  72089. + invalidated. we should not get reference to this node
  72090. + again. */
  72091. + if (ret == -EINVAL)
  72092. + continue;
  72093. + if (ret)
  72094. + return ret;
  72095. +
  72096. + /* check if neighbor link still points to just locked znode;
  72097. + the link could have been changed while the process slept. */
  72098. + if (neighbor == GET_NODE_BY_PTR_OFFSET(node, ptr_offset))
  72099. + return 0;
  72100. +
  72101. + /* znode was locked by mistake; unlock it and restart locking
  72102. + process from beginning. */
  72103. + rlocked ? read_unlock_tree(tree) : write_unlock_tree(tree);
  72104. + longterm_unlock_znode(result);
  72105. + rlocked ? read_lock_tree(tree) : write_lock_tree(tree);
  72106. + }
  72107. +}
  72108. +
  72109. +/* get parent node with longterm lock, accepts GN* flags. */
  72110. +int reiser4_get_parent_flags(lock_handle * lh /* resulting lock handle */ ,
  72111. + znode * node /* child node */ ,
  72112. + znode_lock_mode mode
  72113. + /* type of lock: read or write */ ,
  72114. + int flags /* GN_* flags */ )
  72115. +{
  72116. + int result;
  72117. +
  72118. + read_lock_tree(znode_get_tree(node));
  72119. + result = lock_neighbor(lh, node, PARENT_PTR_OFFSET, mode,
  72120. + ZNODE_LOCK_HIPRI, flags, 1);
  72121. + read_unlock_tree(znode_get_tree(node));
  72122. + return result;
  72123. +}
  72124. +
  72125. +/* wrapper function to lock right or left neighbor depending on GN_GO_LEFT
  72126. + bit in @flags parameter */
  72127. +/* Audited by: umka (2002.06.14) */
  72128. +static inline int
  72129. +lock_side_neighbor(lock_handle * result,
  72130. + znode * node, znode_lock_mode mode, int flags, int rlocked)
  72131. +{
  72132. + int ret;
  72133. + int ptr_offset;
  72134. + znode_lock_request req;
  72135. +
  72136. + if (flags & GN_GO_LEFT) {
  72137. + ptr_offset = LEFT_PTR_OFFSET;
  72138. + req = ZNODE_LOCK_LOPRI;
  72139. + } else {
  72140. + ptr_offset = RIGHT_PTR_OFFSET;
  72141. + req = ZNODE_LOCK_HIPRI;
  72142. + }
  72143. +
  72144. + ret =
  72145. + lock_neighbor(result, node, ptr_offset, mode, req, flags, rlocked);
  72146. +
  72147. + if (ret == -E_NO_NEIGHBOR) /* if we walk left or right -E_NO_NEIGHBOR does not
  72148. + * guarantee that neighbor is absent in the
  72149. + * tree; in this case we return -ENOENT --
  72150. + * means neighbor at least not found in
  72151. + * cache */
  72152. + return RETERR(-ENOENT);
  72153. +
  72154. + return ret;
  72155. +}
  72156. +
  72157. +#if REISER4_DEBUG
  72158. +
  72159. +int check_sibling_list(znode * node)
  72160. +{
  72161. + znode *scan;
  72162. + znode *next;
  72163. +
  72164. + assert("nikita-3283", LOCK_CNT_GTZ(write_locked_tree));
  72165. +
  72166. + if (node == NULL)
  72167. + return 1;
  72168. +
  72169. + if (ZF_ISSET(node, JNODE_RIP))
  72170. + return 1;
  72171. +
  72172. + assert("nikita-3270", node != NULL);
  72173. + assert_rw_write_locked(&(znode_get_tree(node)->tree_lock));
  72174. +
  72175. + for (scan = node; znode_is_left_connected(scan); scan = next) {
  72176. + next = scan->left;
  72177. + if (next != NULL && !ZF_ISSET(next, JNODE_RIP)) {
  72178. + assert("nikita-3271", znode_is_right_connected(next));
  72179. + assert("nikita-3272", next->right == scan);
  72180. + } else
  72181. + break;
  72182. + }
  72183. + for (scan = node; znode_is_right_connected(scan); scan = next) {
  72184. + next = scan->right;
  72185. + if (next != NULL && !ZF_ISSET(next, JNODE_RIP)) {
  72186. + assert("nikita-3273", znode_is_left_connected(next));
  72187. + assert("nikita-3274", next->left == scan);
  72188. + } else
  72189. + break;
  72190. + }
  72191. + return 1;
  72192. +}
  72193. +
  72194. +#endif
  72195. +
  72196. +/* Znode sibling pointers maintenence. */
  72197. +
  72198. +/* Znode sibling pointers are established between any neighbored nodes which are
  72199. + in cache. There are two znode state bits (JNODE_LEFT_CONNECTED,
  72200. + JNODE_RIGHT_CONNECTED), if left or right sibling pointer contains actual
  72201. + value (even NULL), corresponded JNODE_*_CONNECTED bit is set.
  72202. +
  72203. + Reiser4 tree operations which may allocate new znodes (CBK, tree balancing)
  72204. + take care about searching (hash table lookup may be required) of znode
  72205. + neighbors, establishing sibling pointers between them and setting
  72206. + JNODE_*_CONNECTED state bits. */
  72207. +
  72208. +/* adjusting of sibling pointers and `connected' states for two
  72209. + neighbors; works if one neighbor is NULL (was not found). */
  72210. +
  72211. +/* FIXME-VS: this is unstatic-ed to use in tree.c in prepare_twig_cut */
  72212. +void link_left_and_right(znode * left, znode * right)
  72213. +{
  72214. + assert("nikita-3275", check_sibling_list(left));
  72215. + assert("nikita-3275", check_sibling_list(right));
  72216. +
  72217. + if (left != NULL) {
  72218. + if (left->right == NULL) {
  72219. + left->right = right;
  72220. + ZF_SET(left, JNODE_RIGHT_CONNECTED);
  72221. +
  72222. + ON_DEBUG(left->right_version =
  72223. + atomic_inc_return(&delim_key_version);
  72224. + );
  72225. +
  72226. + } else if (ZF_ISSET(left->right, JNODE_HEARD_BANSHEE)
  72227. + && left->right != right) {
  72228. +
  72229. + ON_DEBUG(left->right->left_version =
  72230. + atomic_inc_return(&delim_key_version);
  72231. + left->right_version =
  72232. + atomic_inc_return(&delim_key_version););
  72233. +
  72234. + left->right->left = NULL;
  72235. + left->right = right;
  72236. + ZF_SET(left, JNODE_RIGHT_CONNECTED);
  72237. + } else
  72238. + /*
  72239. + * there is a race condition in renew_sibling_link()
  72240. + * and assertions below check that it is only one
  72241. + * there. Thread T1 calls renew_sibling_link() without
  72242. + * GN_NO_ALLOC flag. zlook() doesn't find neighbor
  72243. + * node, but before T1 gets to the
  72244. + * link_left_and_right(), another thread T2 creates
  72245. + * neighbor node and connects it. check for
  72246. + * left->right == NULL above protects T1 from
  72247. + * overwriting correct left->right pointer installed
  72248. + * by T2.
  72249. + */
  72250. + assert("nikita-3302",
  72251. + right == NULL || left->right == right);
  72252. + }
  72253. + if (right != NULL) {
  72254. + if (right->left == NULL) {
  72255. + right->left = left;
  72256. + ZF_SET(right, JNODE_LEFT_CONNECTED);
  72257. +
  72258. + ON_DEBUG(right->left_version =
  72259. + atomic_inc_return(&delim_key_version);
  72260. + );
  72261. +
  72262. + } else if (ZF_ISSET(right->left, JNODE_HEARD_BANSHEE)
  72263. + && right->left != left) {
  72264. +
  72265. + ON_DEBUG(right->left->right_version =
  72266. + atomic_inc_return(&delim_key_version);
  72267. + right->left_version =
  72268. + atomic_inc_return(&delim_key_version););
  72269. +
  72270. + right->left->right = NULL;
  72271. + right->left = left;
  72272. + ZF_SET(right, JNODE_LEFT_CONNECTED);
  72273. +
  72274. + } else
  72275. + assert("nikita-3303",
  72276. + left == NULL || right->left == left);
  72277. + }
  72278. + assert("nikita-3275", check_sibling_list(left));
  72279. + assert("nikita-3275", check_sibling_list(right));
  72280. +}
  72281. +
  72282. +/* Audited by: umka (2002.06.14) */
  72283. +static void link_znodes(znode * first, znode * second, int to_left)
  72284. +{
  72285. + if (to_left)
  72286. + link_left_and_right(second, first);
  72287. + else
  72288. + link_left_and_right(first, second);
  72289. +}
  72290. +
  72291. +/* getting of next (to left or to right, depend on gn_to_left bit in flags)
  72292. + coord's unit position in horizontal direction, even across node
  72293. + boundary. Should be called under tree lock, it protects nonexistence of
  72294. + sibling link on parent level, if lock_side_neighbor() fails with
  72295. + -ENOENT. */
  72296. +static int far_next_coord(coord_t * coord, lock_handle * handle, int flags)
  72297. +{
  72298. + int ret;
  72299. + znode *node;
  72300. + reiser4_tree *tree;
  72301. +
  72302. + assert("umka-243", coord != NULL);
  72303. + assert("umka-244", handle != NULL);
  72304. + assert("zam-1069", handle->node == NULL);
  72305. +
  72306. + ret =
  72307. + (flags & GN_GO_LEFT) ? coord_prev_unit(coord) :
  72308. + coord_next_unit(coord);
  72309. + if (!ret)
  72310. + return 0;
  72311. +
  72312. + ret =
  72313. + lock_side_neighbor(handle, coord->node, ZNODE_READ_LOCK, flags, 0);
  72314. + if (ret)
  72315. + return ret;
  72316. +
  72317. + node = handle->node;
  72318. + tree = znode_get_tree(node);
  72319. + write_unlock_tree(tree);
  72320. +
  72321. + coord_init_zero(coord);
  72322. +
  72323. + /* We avoid synchronous read here if it is specified by flag. */
  72324. + if ((flags & GN_ASYNC) && znode_page(handle->node) == NULL) {
  72325. + ret = jstartio(ZJNODE(handle->node));
  72326. + if (!ret)
  72327. + ret = -E_REPEAT;
  72328. + goto error_locked;
  72329. + }
  72330. +
  72331. + /* corresponded zrelse() should be called by the clients of
  72332. + far_next_coord(), in place when this node gets unlocked. */
  72333. + ret = zload(handle->node);
  72334. + if (ret)
  72335. + goto error_locked;
  72336. +
  72337. + if (flags & GN_GO_LEFT)
  72338. + coord_init_last_unit(coord, node);
  72339. + else
  72340. + coord_init_first_unit(coord, node);
  72341. +
  72342. + if (0) {
  72343. + error_locked:
  72344. + longterm_unlock_znode(handle);
  72345. + }
  72346. + write_lock_tree(tree);
  72347. + return ret;
  72348. +}
  72349. +
  72350. +/* Very significant function which performs a step in horizontal direction
  72351. + when sibling pointer is not available. Actually, it is only function which
  72352. + does it.
  72353. + Note: this function does not restore locking status at exit,
  72354. + caller should does care about proper unlocking and zrelsing */
  72355. +static int
  72356. +renew_sibling_link(coord_t * coord, lock_handle * handle, znode * child,
  72357. + tree_level level, int flags, int *nr_locked)
  72358. +{
  72359. + int ret;
  72360. + int to_left = flags & GN_GO_LEFT;
  72361. + reiser4_block_nr da;
  72362. + /* parent of the neighbor node; we set it to parent until not sharing
  72363. + of one parent between child and neighbor node is detected */
  72364. + znode *side_parent = coord->node;
  72365. + reiser4_tree *tree = znode_get_tree(child);
  72366. + znode *neighbor = NULL;
  72367. +
  72368. + assert("umka-245", coord != NULL);
  72369. + assert("umka-246", handle != NULL);
  72370. + assert("umka-247", child != NULL);
  72371. + assert("umka-303", tree != NULL);
  72372. +
  72373. + init_lh(handle);
  72374. + write_lock_tree(tree);
  72375. + ret = far_next_coord(coord, handle, flags);
  72376. +
  72377. + if (ret) {
  72378. + if (ret != -ENOENT) {
  72379. + write_unlock_tree(tree);
  72380. + return ret;
  72381. + }
  72382. + } else {
  72383. + item_plugin *iplug;
  72384. +
  72385. + if (handle->node != NULL) {
  72386. + (*nr_locked)++;
  72387. + side_parent = handle->node;
  72388. + }
  72389. +
  72390. + /* does coord object points to internal item? We do not
  72391. + support sibling pointers between znode for formatted and
  72392. + unformatted nodes and return -E_NO_NEIGHBOR in that case. */
  72393. + iplug = item_plugin_by_coord(coord);
  72394. + if (!item_is_internal(coord)) {
  72395. + link_znodes(child, NULL, to_left);
  72396. + write_unlock_tree(tree);
  72397. + /* we know there can't be formatted neighbor */
  72398. + return RETERR(-E_NO_NEIGHBOR);
  72399. + }
  72400. + write_unlock_tree(tree);
  72401. +
  72402. + iplug->s.internal.down_link(coord, NULL, &da);
  72403. +
  72404. + if (flags & GN_NO_ALLOC) {
  72405. + neighbor = zlook(tree, &da);
  72406. + } else {
  72407. + neighbor =
  72408. + zget(tree, &da, side_parent, level,
  72409. + reiser4_ctx_gfp_mask_get());
  72410. + }
  72411. +
  72412. + if (IS_ERR(neighbor)) {
  72413. + ret = PTR_ERR(neighbor);
  72414. + return ret;
  72415. + }
  72416. +
  72417. + if (neighbor)
  72418. + /* update delimiting keys */
  72419. + set_child_delimiting_keys(coord->node, coord, neighbor);
  72420. +
  72421. + write_lock_tree(tree);
  72422. + }
  72423. +
  72424. + if (likely(neighbor == NULL ||
  72425. + (znode_get_level(child) == znode_get_level(neighbor)
  72426. + && child != neighbor)))
  72427. + link_znodes(child, neighbor, to_left);
  72428. + else {
  72429. + warning("nikita-3532",
  72430. + "Sibling nodes on the different levels: %i != %i\n",
  72431. + znode_get_level(child), znode_get_level(neighbor));
  72432. + ret = RETERR(-EIO);
  72433. + }
  72434. +
  72435. + write_unlock_tree(tree);
  72436. +
  72437. + /* if GN_NO_ALLOC isn't set we keep reference to neighbor znode */
  72438. + if (neighbor != NULL && (flags & GN_NO_ALLOC))
  72439. + /* atomic_dec(&ZJNODE(neighbor)->x_count); */
  72440. + zput(neighbor);
  72441. +
  72442. + return ret;
  72443. +}
  72444. +
  72445. +/* This function is for establishing of one side relation. */
  72446. +/* Audited by: umka (2002.06.14) */
  72447. +static int connect_one_side(coord_t * coord, znode * node, int flags)
  72448. +{
  72449. + coord_t local;
  72450. + lock_handle handle;
  72451. + int nr_locked;
  72452. + int ret;
  72453. +
  72454. + assert("umka-248", coord != NULL);
  72455. + assert("umka-249", node != NULL);
  72456. +
  72457. + coord_dup_nocheck(&local, coord);
  72458. +
  72459. + init_lh(&handle);
  72460. +
  72461. + ret =
  72462. + renew_sibling_link(&local, &handle, node, znode_get_level(node),
  72463. + flags | GN_NO_ALLOC, &nr_locked);
  72464. +
  72465. + if (handle.node != NULL) {
  72466. + /* complementary operations for zload() and lock() in far_next_coord() */
  72467. + zrelse(handle.node);
  72468. + longterm_unlock_znode(&handle);
  72469. + }
  72470. +
  72471. + /* we catch error codes which are not interesting for us because we
  72472. + run renew_sibling_link() only for znode connection. */
  72473. + if (ret == -ENOENT || ret == -E_NO_NEIGHBOR)
  72474. + return 0;
  72475. +
  72476. + return ret;
  72477. +}
  72478. +
  72479. +/* if @child is not in `connected' state, performs hash searches for left and
  72480. + right neighbor nodes and establishes horizontal sibling links */
  72481. +/* Audited by: umka (2002.06.14), umka (2002.06.15) */
  72482. +int connect_znode(coord_t * parent_coord, znode * child)
  72483. +{
  72484. + reiser4_tree *tree = znode_get_tree(child);
  72485. + int ret = 0;
  72486. +
  72487. + assert("zam-330", parent_coord != NULL);
  72488. + assert("zam-331", child != NULL);
  72489. + assert("zam-332", parent_coord->node != NULL);
  72490. + assert("umka-305", tree != NULL);
  72491. +
  72492. + /* it is trivial to `connect' root znode because it can't have
  72493. + neighbors */
  72494. + if (znode_above_root(parent_coord->node)) {
  72495. + child->left = NULL;
  72496. + child->right = NULL;
  72497. + ZF_SET(child, JNODE_LEFT_CONNECTED);
  72498. + ZF_SET(child, JNODE_RIGHT_CONNECTED);
  72499. +
  72500. + ON_DEBUG(child->left_version =
  72501. + atomic_inc_return(&delim_key_version);
  72502. + child->right_version =
  72503. + atomic_inc_return(&delim_key_version););
  72504. +
  72505. + return 0;
  72506. + }
  72507. +
  72508. + /* load parent node */
  72509. + coord_clear_iplug(parent_coord);
  72510. + ret = zload(parent_coord->node);
  72511. +
  72512. + if (ret != 0)
  72513. + return ret;
  72514. +
  72515. + /* protect `connected' state check by tree_lock */
  72516. + read_lock_tree(tree);
  72517. +
  72518. + if (!znode_is_right_connected(child)) {
  72519. + read_unlock_tree(tree);
  72520. + /* connect right (default is right) */
  72521. + ret = connect_one_side(parent_coord, child, GN_NO_ALLOC);
  72522. + if (ret)
  72523. + goto zrelse_and_ret;
  72524. +
  72525. + read_lock_tree(tree);
  72526. + }
  72527. +
  72528. + ret = znode_is_left_connected(child);
  72529. +
  72530. + read_unlock_tree(tree);
  72531. +
  72532. + if (!ret) {
  72533. + ret =
  72534. + connect_one_side(parent_coord, child,
  72535. + GN_NO_ALLOC | GN_GO_LEFT);
  72536. + } else
  72537. + ret = 0;
  72538. +
  72539. + zrelse_and_ret:
  72540. + zrelse(parent_coord->node);
  72541. +
  72542. + return ret;
  72543. +}
  72544. +
  72545. +/* this function is like renew_sibling_link() but allocates neighbor node if
  72546. + it doesn't exist and `connects' it. It may require making two steps in
  72547. + horizontal direction, first one for neighbor node finding/allocation,
  72548. + second one is for finding neighbor of neighbor to connect freshly allocated
  72549. + znode. */
  72550. +/* Audited by: umka (2002.06.14), umka (2002.06.15) */
  72551. +static int
  72552. +renew_neighbor(coord_t * coord, znode * node, tree_level level, int flags)
  72553. +{
  72554. + coord_t local;
  72555. + lock_handle empty[2];
  72556. + reiser4_tree *tree = znode_get_tree(node);
  72557. + znode *neighbor = NULL;
  72558. + int nr_locked = 0;
  72559. + int ret;
  72560. +
  72561. + assert("umka-250", coord != NULL);
  72562. + assert("umka-251", node != NULL);
  72563. + assert("umka-307", tree != NULL);
  72564. + assert("umka-308", level <= tree->height);
  72565. +
  72566. + /* umka (2002.06.14)
  72567. + Here probably should be a check for given "level" validness.
  72568. + Something like assert("xxx-yyy", level < REAL_MAX_ZTREE_HEIGHT);
  72569. + */
  72570. +
  72571. + coord_dup(&local, coord);
  72572. +
  72573. + ret =
  72574. + renew_sibling_link(&local, &empty[0], node, level,
  72575. + flags & ~GN_NO_ALLOC, &nr_locked);
  72576. + if (ret)
  72577. + goto out;
  72578. +
  72579. + /* tree lock is not needed here because we keep parent node(s) locked
  72580. + and reference to neighbor znode incremented */
  72581. + neighbor = (flags & GN_GO_LEFT) ? node->left : node->right;
  72582. +
  72583. + read_lock_tree(tree);
  72584. + ret = znode_is_connected(neighbor);
  72585. + read_unlock_tree(tree);
  72586. + if (ret) {
  72587. + ret = 0;
  72588. + goto out;
  72589. + }
  72590. +
  72591. + ret =
  72592. + renew_sibling_link(&local, &empty[nr_locked], neighbor, level,
  72593. + flags | GN_NO_ALLOC, &nr_locked);
  72594. + /* second renew_sibling_link() call is used for znode connection only,
  72595. + so we can live with these errors */
  72596. + if (-ENOENT == ret || -E_NO_NEIGHBOR == ret)
  72597. + ret = 0;
  72598. +
  72599. + out:
  72600. +
  72601. + for (--nr_locked; nr_locked >= 0; --nr_locked) {
  72602. + zrelse(empty[nr_locked].node);
  72603. + longterm_unlock_znode(&empty[nr_locked]);
  72604. + }
  72605. +
  72606. + if (neighbor != NULL)
  72607. + /* decrement znode reference counter without actually
  72608. + releasing it. */
  72609. + atomic_dec(&ZJNODE(neighbor)->x_count);
  72610. +
  72611. + return ret;
  72612. +}
  72613. +
  72614. +/*
  72615. + reiser4_get_neighbor() -- lock node's neighbor.
  72616. +
  72617. + reiser4_get_neighbor() locks node's neighbor (left or right one, depends on
  72618. + given parameter) using sibling link to it. If sibling link is not available
  72619. + (i.e. neighbor znode is not in cache) and flags allow read blocks, we go one
  72620. + level up for information about neighbor's disk address. We lock node's
  72621. + parent, if it is common parent for both 'node' and its neighbor, neighbor's
  72622. + disk address is in next (to left or to right) down link from link that points
  72623. + to original node. If not, we need to lock parent's neighbor, read its content
  72624. + and take first(last) downlink with neighbor's disk address. That locking
  72625. + could be done by using sibling link and lock_neighbor() function, if sibling
  72626. + link exists. In another case we have to go level up again until we find
  72627. + common parent or valid sibling link. Then go down
  72628. + allocating/connecting/locking/reading nodes until neighbor of first one is
  72629. + locked.
  72630. +
  72631. + @neighbor: result lock handle,
  72632. + @node: a node which we lock neighbor of,
  72633. + @lock_mode: lock mode {LM_READ, LM_WRITE},
  72634. + @flags: logical OR of {GN_*} (see description above) subset.
  72635. +
  72636. + @return: 0 if success, negative value if lock was impossible due to an error
  72637. + or lack of neighbor node.
  72638. +*/
  72639. +
  72640. +/* Audited by: umka (2002.06.14), umka (2002.06.15) */
  72641. +int
  72642. +reiser4_get_neighbor(lock_handle * neighbor, znode * node,
  72643. + znode_lock_mode lock_mode, int flags)
  72644. +{
  72645. + reiser4_tree *tree = znode_get_tree(node);
  72646. + lock_handle path[REAL_MAX_ZTREE_HEIGHT];
  72647. +
  72648. + coord_t coord;
  72649. +
  72650. + tree_level base_level;
  72651. + tree_level h = 0;
  72652. + int ret;
  72653. +
  72654. + assert("umka-252", tree != NULL);
  72655. + assert("umka-253", neighbor != NULL);
  72656. + assert("umka-254", node != NULL);
  72657. +
  72658. + base_level = znode_get_level(node);
  72659. +
  72660. + assert("umka-310", base_level <= tree->height);
  72661. +
  72662. + coord_init_zero(&coord);
  72663. +
  72664. + again:
  72665. + /* first, we try to use simple lock_neighbor() which requires sibling
  72666. + link existence */
  72667. + read_lock_tree(tree);
  72668. + ret = lock_side_neighbor(neighbor, node, lock_mode, flags, 1);
  72669. + read_unlock_tree(tree);
  72670. + if (!ret) {
  72671. + /* load znode content if it was specified */
  72672. + if (flags & GN_LOAD_NEIGHBOR) {
  72673. + ret = zload(node);
  72674. + if (ret)
  72675. + longterm_unlock_znode(neighbor);
  72676. + }
  72677. + return ret;
  72678. + }
  72679. +
  72680. + /* only -ENOENT means we may look upward and try to connect
  72681. + @node with its neighbor (if @flags allow us to do it) */
  72682. + if (ret != -ENOENT || !(flags & GN_CAN_USE_UPPER_LEVELS))
  72683. + return ret;
  72684. +
  72685. + /* before establishing of sibling link we lock parent node; it is
  72686. + required by renew_neighbor() to work. */
  72687. + init_lh(&path[0]);
  72688. + ret = reiser4_get_parent(&path[0], node, ZNODE_READ_LOCK);
  72689. + if (ret)
  72690. + return ret;
  72691. + if (znode_above_root(path[0].node)) {
  72692. + longterm_unlock_znode(&path[0]);
  72693. + return RETERR(-E_NO_NEIGHBOR);
  72694. + }
  72695. +
  72696. + while (1) {
  72697. + znode *child = (h == 0) ? node : path[h - 1].node;
  72698. + znode *parent = path[h].node;
  72699. +
  72700. + ret = zload(parent);
  72701. + if (ret)
  72702. + break;
  72703. +
  72704. + ret = find_child_ptr(parent, child, &coord);
  72705. +
  72706. + if (ret) {
  72707. + zrelse(parent);
  72708. + break;
  72709. + }
  72710. +
  72711. + /* try to establish missing sibling link */
  72712. + ret = renew_neighbor(&coord, child, h + base_level, flags);
  72713. +
  72714. + zrelse(parent);
  72715. +
  72716. + switch (ret) {
  72717. + case 0:
  72718. + /* unlocking of parent znode prevents simple
  72719. + deadlock situation */
  72720. + done_lh(&path[h]);
  72721. +
  72722. + /* depend on tree level we stay on we repeat first
  72723. + locking attempt ... */
  72724. + if (h == 0)
  72725. + goto again;
  72726. +
  72727. + /* ... or repeat establishing of sibling link at
  72728. + one level below. */
  72729. + --h;
  72730. + break;
  72731. +
  72732. + case -ENOENT:
  72733. + /* sibling link is not available -- we go
  72734. + upward. */
  72735. + init_lh(&path[h + 1]);
  72736. + ret =
  72737. + reiser4_get_parent(&path[h + 1], parent,
  72738. + ZNODE_READ_LOCK);
  72739. + if (ret)
  72740. + goto fail;
  72741. + ++h;
  72742. + if (znode_above_root(path[h].node)) {
  72743. + ret = RETERR(-E_NO_NEIGHBOR);
  72744. + goto fail;
  72745. + }
  72746. + break;
  72747. +
  72748. + case -E_DEADLOCK:
  72749. + /* there was lock request from hi-pri locker. if
  72750. + it is possible we unlock last parent node and
  72751. + re-lock it again. */
  72752. + for (; reiser4_check_deadlock(); h--) {
  72753. + done_lh(&path[h]);
  72754. + if (h == 0)
  72755. + goto fail;
  72756. + }
  72757. +
  72758. + break;
  72759. +
  72760. + default: /* other errors. */
  72761. + goto fail;
  72762. + }
  72763. + }
  72764. + fail:
  72765. + ON_DEBUG(check_lock_node_data(node));
  72766. + ON_DEBUG(check_lock_data());
  72767. +
  72768. + /* unlock path */
  72769. + do {
  72770. + /* FIXME-Zam: when we get here from case -E_DEADLOCK's goto
  72771. + fail; path[0] is already done_lh-ed, therefore
  72772. + longterm_unlock_znode(&path[h]); is not applicable */
  72773. + done_lh(&path[h]);
  72774. + --h;
  72775. + } while (h + 1 != 0);
  72776. +
  72777. + return ret;
  72778. +}
  72779. +
  72780. +/* remove node from sibling list */
  72781. +/* Audited by: umka (2002.06.14) */
  72782. +void sibling_list_remove(znode * node)
  72783. +{
  72784. + reiser4_tree *tree;
  72785. +
  72786. + tree = znode_get_tree(node);
  72787. + assert("umka-255", node != NULL);
  72788. + assert_rw_write_locked(&(tree->tree_lock));
  72789. + assert("nikita-3275", check_sibling_list(node));
  72790. +
  72791. + write_lock_dk(tree);
  72792. + if (znode_is_right_connected(node) && node->right != NULL &&
  72793. + znode_is_left_connected(node) && node->left != NULL) {
  72794. + assert("zam-32245",
  72795. + keyeq(znode_get_rd_key(node),
  72796. + znode_get_ld_key(node->right)));
  72797. + znode_set_rd_key(node->left, znode_get_ld_key(node->right));
  72798. + }
  72799. + write_unlock_dk(tree);
  72800. +
  72801. + if (znode_is_right_connected(node) && node->right != NULL) {
  72802. + assert("zam-322", znode_is_left_connected(node->right));
  72803. + node->right->left = node->left;
  72804. + ON_DEBUG(node->right->left_version =
  72805. + atomic_inc_return(&delim_key_version);
  72806. + );
  72807. + }
  72808. + if (znode_is_left_connected(node) && node->left != NULL) {
  72809. + assert("zam-323", znode_is_right_connected(node->left));
  72810. + node->left->right = node->right;
  72811. + ON_DEBUG(node->left->right_version =
  72812. + atomic_inc_return(&delim_key_version);
  72813. + );
  72814. + }
  72815. +
  72816. + ZF_CLR(node, JNODE_LEFT_CONNECTED);
  72817. + ZF_CLR(node, JNODE_RIGHT_CONNECTED);
  72818. + ON_DEBUG(node->left = node->right = NULL;
  72819. + node->left_version = atomic_inc_return(&delim_key_version);
  72820. + node->right_version = atomic_inc_return(&delim_key_version););
  72821. + assert("nikita-3276", check_sibling_list(node));
  72822. +}
  72823. +
  72824. +/* disconnect node from sibling list */
  72825. +void sibling_list_drop(znode * node)
  72826. +{
  72827. + znode *right;
  72828. + znode *left;
  72829. +
  72830. + assert("nikita-2464", node != NULL);
  72831. + assert("nikita-3277", check_sibling_list(node));
  72832. +
  72833. + right = node->right;
  72834. + if (right != NULL) {
  72835. + assert("nikita-2465", znode_is_left_connected(right));
  72836. + right->left = NULL;
  72837. + ON_DEBUG(right->left_version =
  72838. + atomic_inc_return(&delim_key_version);
  72839. + );
  72840. + }
  72841. + left = node->left;
  72842. + if (left != NULL) {
  72843. + assert("zam-323", znode_is_right_connected(left));
  72844. + left->right = NULL;
  72845. + ON_DEBUG(left->right_version =
  72846. + atomic_inc_return(&delim_key_version);
  72847. + );
  72848. + }
  72849. + ZF_CLR(node, JNODE_LEFT_CONNECTED);
  72850. + ZF_CLR(node, JNODE_RIGHT_CONNECTED);
  72851. + ON_DEBUG(node->left = node->right = NULL;
  72852. + node->left_version = atomic_inc_return(&delim_key_version);
  72853. + node->right_version = atomic_inc_return(&delim_key_version););
  72854. +}
  72855. +
  72856. +/* Insert new node into sibling list. Regular balancing inserts new node
  72857. + after (at right side) existing and locked node (@before), except one case
  72858. + of adding new tree root node. @before should be NULL in that case. */
  72859. +void sibling_list_insert_nolock(znode * new, znode * before)
  72860. +{
  72861. + assert("zam-334", new != NULL);
  72862. + assert("nikita-3298", !znode_is_left_connected(new));
  72863. + assert("nikita-3299", !znode_is_right_connected(new));
  72864. + assert("nikita-3300", new->left == NULL);
  72865. + assert("nikita-3301", new->right == NULL);
  72866. + assert("nikita-3278", check_sibling_list(new));
  72867. + assert("nikita-3279", check_sibling_list(before));
  72868. +
  72869. + if (before != NULL) {
  72870. + assert("zam-333", znode_is_connected(before));
  72871. + new->right = before->right;
  72872. + new->left = before;
  72873. + ON_DEBUG(new->right_version =
  72874. + atomic_inc_return(&delim_key_version);
  72875. + new->left_version =
  72876. + atomic_inc_return(&delim_key_version););
  72877. + if (before->right != NULL) {
  72878. + before->right->left = new;
  72879. + ON_DEBUG(before->right->left_version =
  72880. + atomic_inc_return(&delim_key_version);
  72881. + );
  72882. + }
  72883. + before->right = new;
  72884. + ON_DEBUG(before->right_version =
  72885. + atomic_inc_return(&delim_key_version);
  72886. + );
  72887. + } else {
  72888. + new->right = NULL;
  72889. + new->left = NULL;
  72890. + ON_DEBUG(new->right_version =
  72891. + atomic_inc_return(&delim_key_version);
  72892. + new->left_version =
  72893. + atomic_inc_return(&delim_key_version););
  72894. + }
  72895. + ZF_SET(new, JNODE_LEFT_CONNECTED);
  72896. + ZF_SET(new, JNODE_RIGHT_CONNECTED);
  72897. + assert("nikita-3280", check_sibling_list(new));
  72898. + assert("nikita-3281", check_sibling_list(before));
  72899. +}
  72900. +
  72901. +/*
  72902. + Local variables:
  72903. + c-indentation-style: "K&R"
  72904. + mode-name: "LC"
  72905. + c-basic-offset: 8
  72906. + tab-width: 8
  72907. + fill-column: 80
  72908. + End:
  72909. +*/
  72910. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/tree_walk.h linux-4.14.2/fs/reiser4/tree_walk.h
  72911. --- linux-4.14.2.orig/fs/reiser4/tree_walk.h 1970-01-01 01:00:00.000000000 +0100
  72912. +++ linux-4.14.2/fs/reiser4/tree_walk.h 2017-11-26 22:13:09.000000000 +0100
  72913. @@ -0,0 +1,125 @@
  72914. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  72915. +
  72916. +/* definitions of reiser4 tree walk functions */
  72917. +
  72918. +#ifndef __FS_REISER4_TREE_WALK_H__
  72919. +#define __FS_REISER4_TREE_WALK_H__
  72920. +
  72921. +#include "debug.h"
  72922. +#include "forward.h"
  72923. +
  72924. +/* establishes horizontal links between cached znodes */
  72925. +int connect_znode(coord_t * coord, znode * node);
  72926. +
  72927. +/* tree traversal functions (reiser4_get_parent(), reiser4_get_neighbor())
  72928. + have the following common arguments:
  72929. +
  72930. + return codes:
  72931. +
  72932. + @return : 0 - OK,
  72933. +
  72934. +ZAM-FIXME-HANS: wrong return code name. Change them all.
  72935. + -ENOENT - neighbor is not in cache, what is detected by sibling
  72936. + link absence.
  72937. +
  72938. + -E_NO_NEIGHBOR - we are sure that neighbor (or parent) node cannot be
  72939. + found (because we are left-/right- most node of the
  72940. + tree, for example). Also, this return code is for
  72941. + reiser4_get_parent() when we see no parent link -- it
  72942. + means that our node is root node.
  72943. +
  72944. + -E_DEADLOCK - deadlock detected (request from high-priority process
  72945. + received), other error codes are conformed to
  72946. + /usr/include/asm/errno.h .
  72947. +*/
  72948. +
  72949. +int
  72950. +reiser4_get_parent_flags(lock_handle * result, znode * node,
  72951. + znode_lock_mode mode, int flags);
  72952. +
  72953. +/* bits definition for reiser4_get_neighbor function `flags' arg. */
  72954. +typedef enum {
  72955. + /* If sibling pointer is NULL, this flag allows get_neighbor() to try to
  72956. + * find not allocated not connected neigbor by going though upper
  72957. + * levels */
  72958. + GN_CAN_USE_UPPER_LEVELS = 0x1,
  72959. + /* locking left neighbor instead of right one */
  72960. + GN_GO_LEFT = 0x2,
  72961. + /* automatically load neighbor node content */
  72962. + GN_LOAD_NEIGHBOR = 0x4,
  72963. + /* return -E_REPEAT if can't lock */
  72964. + GN_TRY_LOCK = 0x8,
  72965. + /* used internally in tree_walk.c, causes renew_sibling to not
  72966. + allocate neighbor znode, but only search for it in znode cache */
  72967. + GN_NO_ALLOC = 0x10,
  72968. + /* do not go across atom boundaries */
  72969. + GN_SAME_ATOM = 0x20,
  72970. + /* allow to lock not connected nodes */
  72971. + GN_ALLOW_NOT_CONNECTED = 0x40,
  72972. + /* Avoid synchronous jload, instead, call jstartio() and return -E_REPEAT. */
  72973. + GN_ASYNC = 0x80
  72974. +} znode_get_neigbor_flags;
  72975. +
  72976. +/* A commonly used wrapper for reiser4_get_parent_flags(). */
  72977. +static inline int reiser4_get_parent(lock_handle * result, znode * node,
  72978. + znode_lock_mode mode)
  72979. +{
  72980. + return reiser4_get_parent_flags(result, node, mode,
  72981. + GN_ALLOW_NOT_CONNECTED);
  72982. +}
  72983. +
  72984. +int reiser4_get_neighbor(lock_handle * neighbor, znode * node,
  72985. + znode_lock_mode lock_mode, int flags);
  72986. +
  72987. +/* there are wrappers for most common usages of reiser4_get_neighbor() */
  72988. +static inline int
  72989. +reiser4_get_left_neighbor(lock_handle * result, znode * node, int lock_mode,
  72990. + int flags)
  72991. +{
  72992. + return reiser4_get_neighbor(result, node, lock_mode,
  72993. + flags | GN_GO_LEFT);
  72994. +}
  72995. +
  72996. +static inline int
  72997. +reiser4_get_right_neighbor(lock_handle * result, znode * node, int lock_mode,
  72998. + int flags)
  72999. +{
  73000. + ON_DEBUG(check_lock_node_data(node));
  73001. + ON_DEBUG(check_lock_data());
  73002. + return reiser4_get_neighbor(result, node, lock_mode,
  73003. + flags & (~GN_GO_LEFT));
  73004. +}
  73005. +
  73006. +extern void sibling_list_remove(znode * node);
  73007. +extern void sibling_list_drop(znode * node);
  73008. +extern void sibling_list_insert_nolock(znode * new, znode * before);
  73009. +extern void link_left_and_right(znode * left, znode * right);
  73010. +
  73011. +/* Functions called by tree_walk() when tree_walk() ... */
  73012. +struct tree_walk_actor {
  73013. + /* ... meets a formatted node, */
  73014. + int (*process_znode) (tap_t *, void *);
  73015. + /* ... meets an extent, */
  73016. + int (*process_extent) (tap_t *, void *);
  73017. + /* ... begins tree traversal or repeats it after -E_REPEAT was returned by
  73018. + * node or extent processing functions. */
  73019. + int (*before) (void *);
  73020. +};
  73021. +
  73022. +#if REISER4_DEBUG
  73023. +int check_sibling_list(znode * node);
  73024. +#else
  73025. +#define check_sibling_list(n) (1)
  73026. +#endif
  73027. +
  73028. +#endif /* __FS_REISER4_TREE_WALK_H__ */
  73029. +
  73030. +/*
  73031. + Local variables:
  73032. + c-indentation-style: "K&R"
  73033. + mode-name: "LC"
  73034. + c-basic-offset: 8
  73035. + tab-width: 8
  73036. + fill-column: 120
  73037. + End:
  73038. +*/
  73039. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/txnmgr.c linux-4.14.2/fs/reiser4/txnmgr.c
  73040. --- linux-4.14.2.orig/fs/reiser4/txnmgr.c 1970-01-01 01:00:00.000000000 +0100
  73041. +++ linux-4.14.2/fs/reiser4/txnmgr.c 2017-11-26 22:13:09.000000000 +0100
  73042. @@ -0,0 +1,3163 @@
  73043. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  73044. + * reiser4/README */
  73045. +
  73046. +/* Joshua MacDonald wrote the first draft of this code. */
  73047. +
  73048. +/* ZAM-LONGTERM-FIXME-HANS: The locking in this file is badly designed, and a
  73049. +filesystem scales only as well as its worst locking design. You need to
  73050. +substantially restructure this code. Josh was not as experienced a programmer
  73051. +as you. Particularly review how the locking style differs from what you did
  73052. +for znodes usingt hi-lo priority locking, and present to me an opinion on
  73053. +whether the differences are well founded. */
  73054. +
  73055. +/* I cannot help but to disagree with the sentiment above. Locking of
  73056. + * transaction manager is _not_ badly designed, and, at the very least, is not
  73057. + * the scaling bottleneck. Scaling bottleneck is _exactly_ hi-lo priority
  73058. + * locking on znodes, especially on the root node of the tree. --nikita,
  73059. + * 2003.10.13 */
  73060. +
  73061. +/* The txnmgr is a set of interfaces that keep track of atoms and transcrash handles. The
  73062. + txnmgr processes capture_block requests and manages the relationship between jnodes and
  73063. + atoms through the various stages of a transcrash, and it also oversees the fusion and
  73064. + capture-on-copy processes. The main difficulty with this task is maintaining a
  73065. + deadlock-free lock ordering between atoms and jnodes/handles. The reason for the
  73066. + difficulty is that jnodes, handles, and atoms contain pointer circles, and the cycle
  73067. + must be broken. The main requirement is that atom-fusion be deadlock free, so once you
  73068. + hold the atom_lock you may then wait to acquire any jnode or handle lock. This implies
  73069. + that any time you check the atom-pointer of a jnode or handle and then try to lock that
  73070. + atom, you must use trylock() and possibly reverse the order.
  73071. +
  73072. + This code implements the design documented at:
  73073. +
  73074. + http://namesys.com/txn-doc.html
  73075. +
  73076. +ZAM-FIXME-HANS: update v4.html to contain all of the information present in the above (but updated), and then remove the
  73077. +above document and reference the new. Be sure to provide some credit to Josh. I already have some writings on this
  73078. +topic in v4.html, but they are lacking in details present in the above. Cure that. Remember to write for the bright 12
  73079. +year old --- define all technical terms used.
  73080. +
  73081. +*/
  73082. +
  73083. +/* Thoughts on the external transaction interface:
  73084. +
  73085. + In the current code, a TRANSCRASH handle is created implicitly by reiser4_init_context() (which
  73086. + creates state that lasts for the duration of a system call and is called at the start
  73087. + of ReiserFS methods implementing VFS operations), and closed by reiser4_exit_context(),
  73088. + occupying the scope of a single system call. We wish to give certain applications an
  73089. + interface to begin and close (commit) transactions. Since our implementation of
  73090. + transactions does not yet support isolation, allowing an application to open a
  73091. + transaction implies trusting it to later close the transaction. Part of the
  73092. + transaction interface will be aimed at enabling that trust, but the interface for
  73093. + actually using transactions is fairly narrow.
  73094. +
  73095. + BEGIN_TRANSCRASH: Returns a transcrash identifier. It should be possible to translate
  73096. + this identifier into a string that a shell-script could use, allowing you to start a
  73097. + transaction by issuing a command. Once open, the transcrash should be set in the task
  73098. + structure, and there should be options (I suppose) to allow it to be carried across
  73099. + fork/exec. A transcrash has several options:
  73100. +
  73101. + - READ_FUSING or WRITE_FUSING: The default policy is for txn-capture to capture only
  73102. + on writes (WRITE_FUSING) and allow "dirty reads". If the application wishes to
  73103. + capture on reads as well, it should set READ_FUSING.
  73104. +
  73105. + - TIMEOUT: Since a non-isolated transcrash cannot be undone, every transcrash must
  73106. + eventually close (or else the machine must crash). If the application dies an
  73107. + unexpected death with an open transcrash, for example, or if it hangs for a long
  73108. + duration, one solution (to avoid crashing the machine) is to simply close it anyway.
  73109. + This is a dangerous option, but it is one way to solve the problem until isolated
  73110. + transcrashes are available for untrusted applications.
  73111. +
  73112. + It seems to be what databases do, though it is unclear how one avoids a DoS attack
  73113. + creating a vulnerability based on resource starvation. Guaranteeing that some
  73114. + minimum amount of computational resources are made available would seem more correct
  73115. + than guaranteeing some amount of time. When we again have someone to code the work,
  73116. + this issue should be considered carefully. -Hans
  73117. +
  73118. + RESERVE_BLOCKS: A running transcrash should indicate to the transaction manager how
  73119. + many dirty blocks it expects. The reserve_blocks interface should be called at a point
  73120. + where it is safe for the application to fail, because the system may not be able to
  73121. + grant the allocation and the application must be able to back-out. For this reason,
  73122. + the number of reserve-blocks can also be passed as an argument to BEGIN_TRANSCRASH, but
  73123. + the application may also wish to extend the allocation after beginning its transcrash.
  73124. +
  73125. + CLOSE_TRANSCRASH: The application closes the transcrash when it is finished making
  73126. + modifications that require transaction protection. When isolated transactions are
  73127. + supported the CLOSE operation is replaced by either COMMIT or ABORT. For example, if a
  73128. + RESERVE_BLOCKS call fails for the application, it should "abort" by calling
  73129. + CLOSE_TRANSCRASH, even though it really commits any changes that were made (which is
  73130. + why, for safety, the application should call RESERVE_BLOCKS before making any changes).
  73131. +
  73132. + For actually implementing these out-of-system-call-scopped transcrashes, the
  73133. + reiser4_context has a "txn_handle *trans" pointer that may be set to an open
  73134. + transcrash. Currently there are no dynamically-allocated transcrashes, but there is a
  73135. + "struct kmem_cache *_txnh_slab" created for that purpose in this file.
  73136. +*/
  73137. +
  73138. +/* Extending the other system call interfaces for future transaction features:
  73139. +
  73140. + Specialized applications may benefit from passing flags to the ordinary system call
  73141. + interface such as read(), write(), or stat(). For example, the application specifies
  73142. + WRITE_FUSING by default but wishes to add that a certain read() command should be
  73143. + treated as READ_FUSING. But which read? Is it the directory-entry read, the stat-data
  73144. + read, or the file-data read? These issues are straight-forward, but there are a lot of
  73145. + them and adding the necessary flags-passing code will be tedious.
  73146. +
  73147. + When supporting isolated transactions, there is a corresponding READ_MODIFY_WRITE (RMW)
  73148. + flag, which specifies that although it is a read operation being requested, a
  73149. + write-lock should be taken. The reason is that read-locks are shared while write-locks
  73150. + are exclusive, so taking a read-lock when a later-write is known in advance will often
  73151. + leads to deadlock. If a reader knows it will write later, it should issue read
  73152. + requests with the RMW flag set.
  73153. +*/
  73154. +
  73155. +/*
  73156. + The znode/atom deadlock avoidance.
  73157. +
  73158. + FIXME(Zam): writing of this comment is in progress.
  73159. +
  73160. + The atom's special stage ASTAGE_CAPTURE_WAIT introduces a kind of atom's
  73161. + long-term locking, which makes reiser4 locking scheme more complex. It had
  73162. + deadlocks until we implement deadlock avoidance algorithms. That deadlocks
  73163. + looked as the following: one stopped thread waits for a long-term lock on
  73164. + znode, the thread who owns that lock waits when fusion with another atom will
  73165. + be allowed.
  73166. +
  73167. + The source of the deadlocks is an optimization of not capturing index nodes
  73168. + for read. Let's prove it. Suppose we have dumb node capturing scheme which
  73169. + unconditionally captures each block before locking it.
  73170. +
  73171. + That scheme has no deadlocks. Let's begin with the thread which stage is
  73172. + ASTAGE_CAPTURE_WAIT and it waits for a znode lock. The thread can't wait for
  73173. + a capture because it's stage allows fusion with any atom except which are
  73174. + being committed currently. A process of atom commit can't deadlock because
  73175. + atom commit procedure does not acquire locks and does not fuse with other
  73176. + atoms. Reiser4 does capturing right before going to sleep inside the
  73177. + longtertm_lock_znode() function, it means the znode which we want to lock is
  73178. + already captured and its atom is in ASTAGE_CAPTURE_WAIT stage. If we
  73179. + continue the analysis we understand that no one process in the sequence may
  73180. + waits atom fusion. Thereby there are no deadlocks of described kind.
  73181. +
  73182. + The capturing optimization makes the deadlocks possible. A thread can wait a
  73183. + lock which owner did not captured that node. The lock owner's current atom
  73184. + is not fused with the first atom and it does not get a ASTAGE_CAPTURE_WAIT
  73185. + state. A deadlock is possible when that atom meets another one which is in
  73186. + ASTAGE_CAPTURE_WAIT already.
  73187. +
  73188. + The deadlock avoidance scheme includes two algorithms:
  73189. +
  73190. + First algorithm is used when a thread captures a node which is locked but not
  73191. + captured by another thread. Those nodes are marked MISSED_IN_CAPTURE at the
  73192. + moment we skip their capturing. If such a node (marked MISSED_IN_CAPTURE) is
  73193. + being captured by a thread with current atom is in ASTAGE_CAPTURE_WAIT, the
  73194. + routine which forces all lock owners to join with current atom is executed.
  73195. +
  73196. + Second algorithm does not allow to skip capturing of already captured nodes.
  73197. +
  73198. + Both algorithms together prevent waiting a longterm lock without atom fusion
  73199. + with atoms of all lock owners, which is a key thing for getting atom/znode
  73200. + locking deadlocks.
  73201. +*/
  73202. +
  73203. +/*
  73204. + * Transactions and mmap(2).
  73205. + *
  73206. + * 1. Transactions are not supported for accesses through mmap(2), because
  73207. + * this would effectively amount to user-level transactions whose duration
  73208. + * is beyond control of the kernel.
  73209. + *
  73210. + * 2. That said, we still want to preserve some decency with regard to
  73211. + * mmap(2). During normal write(2) call, following sequence of events
  73212. + * happens:
  73213. + *
  73214. + * 1. page is created;
  73215. + *
  73216. + * 2. jnode is created, dirtied and captured into current atom.
  73217. + *
  73218. + * 3. extent is inserted and modified.
  73219. + *
  73220. + * Steps (2) and (3) take place under long term lock on the twig node.
  73221. + *
  73222. + * When file is accessed through mmap(2) page is always created during
  73223. + * page fault.
  73224. + * After this (in reiser4_readpage_dispatch()->reiser4_readpage_extent()):
  73225. + *
  73226. + * 1. if access is made to non-hole page new jnode is created, (if
  73227. + * necessary)
  73228. + *
  73229. + * 2. if access is made to the hole page, jnode is not created (XXX
  73230. + * not clear why).
  73231. + *
  73232. + * Also, even if page is created by write page fault it is not marked
  73233. + * dirty immediately by handle_mm_fault(). Probably this is to avoid races
  73234. + * with page write-out.
  73235. + *
  73236. + * Dirty bit installed by hardware is only transferred to the struct page
  73237. + * later, when page is unmapped (in zap_pte_range(), or
  73238. + * try_to_unmap_one()).
  73239. + *
  73240. + * So, with mmap(2) we have to handle following irksome situations:
  73241. + *
  73242. + * 1. there exists modified page (clean or dirty) without jnode
  73243. + *
  73244. + * 2. there exists modified page (clean or dirty) with clean jnode
  73245. + *
  73246. + * 3. clean page which is a part of atom can be transparently modified
  73247. + * at any moment through mapping without becoming dirty.
  73248. + *
  73249. + * (1) and (2) can lead to the out-of-memory situation: ->writepage()
  73250. + * doesn't know what to do with such pages and ->sync_sb()/->writepages()
  73251. + * don't see them, because these methods operate on atoms.
  73252. + *
  73253. + * (3) can lead to the loss of data: suppose we have dirty page with dirty
  73254. + * captured jnode captured by some atom. As part of early flush (for
  73255. + * example) page was written out. Dirty bit was cleared on both page and
  73256. + * jnode. After this page is modified through mapping, but kernel doesn't
  73257. + * notice and just discards page and jnode as part of commit. (XXX
  73258. + * actually it doesn't, because to reclaim page ->releasepage() has to be
  73259. + * called and before this dirty bit will be transferred to the struct
  73260. + * page).
  73261. + *
  73262. + */
  73263. +
  73264. +#include "debug.h"
  73265. +#include "txnmgr.h"
  73266. +#include "jnode.h"
  73267. +#include "znode.h"
  73268. +#include "block_alloc.h"
  73269. +#include "tree.h"
  73270. +#include "wander.h"
  73271. +#include "ktxnmgrd.h"
  73272. +#include "super.h"
  73273. +#include "page_cache.h"
  73274. +#include "reiser4.h"
  73275. +#include "vfs_ops.h"
  73276. +#include "inode.h"
  73277. +#include "flush.h"
  73278. +#include "discard.h"
  73279. +
  73280. +#include <asm/atomic.h>
  73281. +#include <linux/types.h>
  73282. +#include <linux/fs.h>
  73283. +#include <linux/mm.h>
  73284. +#include <linux/slab.h>
  73285. +#include <linux/pagemap.h>
  73286. +#include <linux/writeback.h>
  73287. +#include <linux/swap.h> /* for totalram_pages */
  73288. +
  73289. +static void atom_free(txn_atom * atom);
  73290. +
  73291. +static int commit_txnh(txn_handle * txnh);
  73292. +
  73293. +static void wakeup_atom_waitfor_list(txn_atom * atom);
  73294. +static void wakeup_atom_waiting_list(txn_atom * atom);
  73295. +
  73296. +static void capture_assign_txnh_nolock(txn_atom * atom, txn_handle * txnh);
  73297. +
  73298. +static void capture_assign_block_nolock(txn_atom * atom, jnode * node);
  73299. +
  73300. +static void fuse_not_fused_lock_owners(txn_handle * txnh, znode * node);
  73301. +
  73302. +static int capture_init_fusion(jnode * node, txn_handle * txnh,
  73303. + txn_capture mode);
  73304. +
  73305. +static int capture_fuse_wait(txn_handle *, txn_atom *, txn_atom *, txn_capture);
  73306. +
  73307. +static void capture_fuse_into(txn_atom * small, txn_atom * large);
  73308. +
  73309. +void reiser4_invalidate_list(struct list_head *);
  73310. +
  73311. +/* GENERIC STRUCTURES */
  73312. +
  73313. +typedef struct _txn_wait_links txn_wait_links;
  73314. +
  73315. +struct _txn_wait_links {
  73316. + lock_stack *_lock_stack;
  73317. + struct list_head _fwaitfor_link;
  73318. + struct list_head _fwaiting_link;
  73319. + int (*waitfor_cb) (txn_atom * atom, struct _txn_wait_links * wlinks);
  73320. + int (*waiting_cb) (txn_atom * atom, struct _txn_wait_links * wlinks);
  73321. +};
  73322. +
  73323. +/* FIXME: In theory, we should be using the slab cache init & destructor
  73324. + methods instead of, e.g., jnode_init, etc. */
  73325. +static struct kmem_cache *_atom_slab = NULL;
  73326. +/* this is for user-visible, cross system-call transactions. */
  73327. +static struct kmem_cache *_txnh_slab = NULL;
  73328. +
  73329. +/**
  73330. + * init_txnmgr_static - create transaction manager slab caches
  73331. + *
  73332. + * Initializes caches of txn-atoms and txn_handle. It is part of reiser4 module
  73333. + * initialization.
  73334. + */
  73335. +int init_txnmgr_static(void)
  73336. +{
  73337. + assert("jmacd-600", _atom_slab == NULL);
  73338. + assert("jmacd-601", _txnh_slab == NULL);
  73339. +
  73340. + ON_DEBUG(atomic_set(&flush_cnt, 0));
  73341. +
  73342. + _atom_slab = kmem_cache_create("txn_atom", sizeof(txn_atom), 0,
  73343. + SLAB_HWCACHE_ALIGN |
  73344. + SLAB_RECLAIM_ACCOUNT, NULL);
  73345. + if (_atom_slab == NULL)
  73346. + return RETERR(-ENOMEM);
  73347. +
  73348. + _txnh_slab = kmem_cache_create("txn_handle", sizeof(txn_handle), 0,
  73349. + SLAB_HWCACHE_ALIGN, NULL);
  73350. + if (_txnh_slab == NULL) {
  73351. + kmem_cache_destroy(_atom_slab);
  73352. + _atom_slab = NULL;
  73353. + return RETERR(-ENOMEM);
  73354. + }
  73355. +
  73356. + return 0;
  73357. +}
  73358. +
  73359. +/**
  73360. + * done_txnmgr_static - delete txn_atom and txn_handle caches
  73361. + *
  73362. + * This is called on reiser4 module unloading or system shutdown.
  73363. + */
  73364. +void done_txnmgr_static(void)
  73365. +{
  73366. + destroy_reiser4_cache(&_atom_slab);
  73367. + destroy_reiser4_cache(&_txnh_slab);
  73368. +}
  73369. +
  73370. +/**
  73371. + * init_txnmgr - initialize a new transaction manager
  73372. + * @mgr: pointer to transaction manager embedded in reiser4 super block
  73373. + *
  73374. + * This is called on mount. Makes necessary initializations.
  73375. + */
  73376. +void reiser4_init_txnmgr(txn_mgr *mgr)
  73377. +{
  73378. + assert("umka-169", mgr != NULL);
  73379. +
  73380. + mgr->atom_count = 0;
  73381. + mgr->id_count = 1;
  73382. + INIT_LIST_HEAD(&mgr->atoms_list);
  73383. + spin_lock_init(&mgr->tmgr_lock);
  73384. + mutex_init(&mgr->commit_mutex);
  73385. +}
  73386. +
  73387. +/**
  73388. + * reiser4_done_txnmgr - stop transaction manager
  73389. + * @mgr: pointer to transaction manager embedded in reiser4 super block
  73390. + *
  73391. + * This is called on umount. Does sanity checks.
  73392. + */
  73393. +void reiser4_done_txnmgr(txn_mgr *mgr)
  73394. +{
  73395. + assert("umka-170", mgr != NULL);
  73396. + assert("umka-1701", list_empty_careful(&mgr->atoms_list));
  73397. + assert("umka-1702", mgr->atom_count == 0);
  73398. +}
  73399. +
  73400. +/* Initialize a transaction handle. */
  73401. +/* Audited by: umka (2002.06.13) */
  73402. +static void txnh_init(txn_handle * txnh, txn_mode mode)
  73403. +{
  73404. + assert("umka-171", txnh != NULL);
  73405. +
  73406. + txnh->mode = mode;
  73407. + txnh->atom = NULL;
  73408. + reiser4_ctx_gfp_mask_set();
  73409. + txnh->flags = 0;
  73410. + spin_lock_init(&txnh->hlock);
  73411. + INIT_LIST_HEAD(&txnh->txnh_link);
  73412. +}
  73413. +
  73414. +#if REISER4_DEBUG
  73415. +/* Check if a transaction handle is clean. */
  73416. +static int txnh_isclean(txn_handle * txnh)
  73417. +{
  73418. + assert("umka-172", txnh != NULL);
  73419. + return txnh->atom == NULL &&
  73420. + LOCK_CNT_NIL(spin_locked_txnh);
  73421. +}
  73422. +#endif
  73423. +
  73424. +/* Initialize an atom. */
  73425. +static void atom_init(txn_atom * atom)
  73426. +{
  73427. + int level;
  73428. +
  73429. + assert("umka-173", atom != NULL);
  73430. +
  73431. + memset(atom, 0, sizeof(txn_atom));
  73432. +
  73433. + atom->stage = ASTAGE_FREE;
  73434. + atom->start_time = jiffies;
  73435. +
  73436. + for (level = 0; level < REAL_MAX_ZTREE_HEIGHT + 1; level += 1)
  73437. + INIT_LIST_HEAD(ATOM_DIRTY_LIST(atom, level));
  73438. +
  73439. + INIT_LIST_HEAD(ATOM_CLEAN_LIST(atom));
  73440. + INIT_LIST_HEAD(ATOM_OVRWR_LIST(atom));
  73441. + INIT_LIST_HEAD(ATOM_WB_LIST(atom));
  73442. + INIT_LIST_HEAD(&atom->inodes);
  73443. + spin_lock_init(&(atom->alock));
  73444. + /* list of transaction handles */
  73445. + INIT_LIST_HEAD(&atom->txnh_list);
  73446. + /* link to transaction manager's list of atoms */
  73447. + INIT_LIST_HEAD(&atom->atom_link);
  73448. + INIT_LIST_HEAD(&atom->fwaitfor_list);
  73449. + INIT_LIST_HEAD(&atom->fwaiting_list);
  73450. + blocknr_set_init(&atom->wandered_map);
  73451. +
  73452. + atom_dset_init(atom);
  73453. +
  73454. + init_atom_fq_parts(atom);
  73455. +}
  73456. +
  73457. +#if REISER4_DEBUG
  73458. +/* Check if an atom is clean. */
  73459. +static int atom_isclean(txn_atom * atom)
  73460. +{
  73461. + int level;
  73462. +
  73463. + assert("umka-174", atom != NULL);
  73464. +
  73465. + for (level = 0; level < REAL_MAX_ZTREE_HEIGHT + 1; level += 1) {
  73466. + if (!list_empty_careful(ATOM_DIRTY_LIST(atom, level))) {
  73467. + return 0;
  73468. + }
  73469. + }
  73470. +
  73471. + return atom->stage == ASTAGE_FREE &&
  73472. + atom->txnh_count == 0 &&
  73473. + atom->capture_count == 0 &&
  73474. + atomic_read(&atom->refcount) == 0 &&
  73475. + (&atom->atom_link == atom->atom_link.next &&
  73476. + &atom->atom_link == atom->atom_link.prev) &&
  73477. + list_empty_careful(&atom->txnh_list) &&
  73478. + list_empty_careful(ATOM_CLEAN_LIST(atom)) &&
  73479. + list_empty_careful(ATOM_OVRWR_LIST(atom)) &&
  73480. + list_empty_careful(ATOM_WB_LIST(atom)) &&
  73481. + list_empty_careful(&atom->fwaitfor_list) &&
  73482. + list_empty_careful(&atom->fwaiting_list) &&
  73483. + atom_fq_parts_are_clean(atom);
  73484. +}
  73485. +#endif
  73486. +
  73487. +/* Begin a transaction in this context. Currently this uses the reiser4_context's
  73488. + trans_in_ctx, which means that transaction handles are stack-allocated. Eventually
  73489. + this will be extended to allow transaction handles to span several contexts. */
  73490. +/* Audited by: umka (2002.06.13) */
  73491. +void reiser4_txn_begin(reiser4_context * context)
  73492. +{
  73493. + assert("jmacd-544", context->trans == NULL);
  73494. +
  73495. + context->trans = &context->trans_in_ctx;
  73496. +
  73497. + /* FIXME_LATER_JMACD Currently there's no way to begin a TXN_READ_FUSING
  73498. + transcrash. Default should be TXN_WRITE_FUSING. Also, the _trans variable is
  73499. + stack allocated right now, but we would like to allow for dynamically allocated
  73500. + transcrashes that span multiple system calls.
  73501. + */
  73502. + txnh_init(context->trans, TXN_WRITE_FUSING);
  73503. +}
  73504. +
  73505. +/* Finish a transaction handle context. */
  73506. +int reiser4_txn_end(reiser4_context * context)
  73507. +{
  73508. + long ret = 0;
  73509. + txn_handle *txnh;
  73510. +
  73511. + assert("umka-283", context != NULL);
  73512. + assert("nikita-3012", reiser4_schedulable());
  73513. + assert("vs-24", context == get_current_context());
  73514. + assert("nikita-2967", lock_stack_isclean(get_current_lock_stack()));
  73515. +
  73516. + txnh = context->trans;
  73517. + if (txnh != NULL) {
  73518. + if (txnh->atom != NULL)
  73519. + ret = commit_txnh(txnh);
  73520. + assert("jmacd-633", txnh_isclean(txnh));
  73521. + context->trans = NULL;
  73522. + }
  73523. + return ret;
  73524. +}
  73525. +
  73526. +void reiser4_txn_restart(reiser4_context * context)
  73527. +{
  73528. + reiser4_txn_end(context);
  73529. + reiser4_preempt_point();
  73530. + reiser4_txn_begin(context);
  73531. +}
  73532. +
  73533. +void reiser4_txn_restart_current(void)
  73534. +{
  73535. + reiser4_txn_restart(get_current_context());
  73536. +}
  73537. +
  73538. +/* TXN_ATOM */
  73539. +
  73540. +/* Get the atom belonging to a txnh, which is not locked. Return txnh locked. Locks atom, if atom
  73541. + is not NULL. This performs the necessary spin_trylock to break the lock-ordering cycle. May
  73542. + return NULL. */
  73543. +static txn_atom *txnh_get_atom(txn_handle * txnh)
  73544. +{
  73545. + txn_atom *atom;
  73546. +
  73547. + assert("umka-180", txnh != NULL);
  73548. + assert_spin_not_locked(&(txnh->hlock));
  73549. +
  73550. + while (1) {
  73551. + spin_lock_txnh(txnh);
  73552. + atom = txnh->atom;
  73553. +
  73554. + if (atom == NULL)
  73555. + break;
  73556. +
  73557. + if (spin_trylock_atom(atom))
  73558. + break;
  73559. +
  73560. + atomic_inc(&atom->refcount);
  73561. +
  73562. + spin_unlock_txnh(txnh);
  73563. + spin_lock_atom(atom);
  73564. + spin_lock_txnh(txnh);
  73565. +
  73566. + if (txnh->atom == atom) {
  73567. + atomic_dec(&atom->refcount);
  73568. + break;
  73569. + }
  73570. +
  73571. + spin_unlock_txnh(txnh);
  73572. + atom_dec_and_unlock(atom);
  73573. + }
  73574. +
  73575. + return atom;
  73576. +}
  73577. +
  73578. +/* Get the current atom and spinlock it if current atom present. May return NULL */
  73579. +txn_atom *get_current_atom_locked_nocheck(void)
  73580. +{
  73581. + reiser4_context *cx;
  73582. + txn_atom *atom;
  73583. + txn_handle *txnh;
  73584. +
  73585. + cx = get_current_context();
  73586. + assert("zam-437", cx != NULL);
  73587. +
  73588. + txnh = cx->trans;
  73589. + assert("zam-435", txnh != NULL);
  73590. +
  73591. + atom = txnh_get_atom(txnh);
  73592. +
  73593. + spin_unlock_txnh(txnh);
  73594. + return atom;
  73595. +}
  73596. +
  73597. +/* Get the atom belonging to a jnode, which is initially locked. Return with
  73598. + both jnode and atom locked. This performs the necessary spin_trylock to
  73599. + break the lock-ordering cycle. Assumes the jnode is already locked, and
  73600. + returns NULL if atom is not set. */
  73601. +txn_atom *jnode_get_atom(jnode * node)
  73602. +{
  73603. + txn_atom *atom;
  73604. +
  73605. + assert("umka-181", node != NULL);
  73606. +
  73607. + while (1) {
  73608. + assert_spin_locked(&(node->guard));
  73609. +
  73610. + atom = node->atom;
  73611. + /* node is not in any atom */
  73612. + if (atom == NULL)
  73613. + break;
  73614. +
  73615. + /* If atom is not locked, grab the lock and return */
  73616. + if (spin_trylock_atom(atom))
  73617. + break;
  73618. +
  73619. + /* At least one jnode belongs to this atom it guarantees that
  73620. + * atom->refcount > 0, we can safely increment refcount. */
  73621. + atomic_inc(&atom->refcount);
  73622. + spin_unlock_jnode(node);
  73623. +
  73624. + /* re-acquire spin locks in the right order */
  73625. + spin_lock_atom(atom);
  73626. + spin_lock_jnode(node);
  73627. +
  73628. + /* check if node still points to the same atom. */
  73629. + if (node->atom == atom) {
  73630. + atomic_dec(&atom->refcount);
  73631. + break;
  73632. + }
  73633. +
  73634. + /* releasing of atom lock and reference requires not holding
  73635. + * locks on jnodes. */
  73636. + spin_unlock_jnode(node);
  73637. +
  73638. + /* We do not sure that this atom has extra references except our
  73639. + * one, so we should call proper function which may free atom if
  73640. + * last reference is released. */
  73641. + atom_dec_and_unlock(atom);
  73642. +
  73643. + /* lock jnode again for getting valid node->atom pointer
  73644. + * value. */
  73645. + spin_lock_jnode(node);
  73646. + }
  73647. +
  73648. + return atom;
  73649. +}
  73650. +
  73651. +/* Returns true if @node is dirty and part of the same atom as one of its neighbors. Used
  73652. + by flush code to indicate whether the next node (in some direction) is suitable for
  73653. + flushing. */
  73654. +int
  73655. +same_slum_check(jnode * node, jnode * check, int alloc_check, int alloc_value)
  73656. +{
  73657. + int compat;
  73658. + txn_atom *atom;
  73659. +
  73660. + assert("umka-182", node != NULL);
  73661. + assert("umka-183", check != NULL);
  73662. +
  73663. + /* Not sure what this function is supposed to do if supplied with @check that is
  73664. + neither formatted nor unformatted (bitmap or so). */
  73665. + assert("nikita-2373", jnode_is_znode(check)
  73666. + || jnode_is_unformatted(check));
  73667. +
  73668. + /* Need a lock on CHECK to get its atom and to check various state bits.
  73669. + Don't need a lock on NODE once we get the atom lock. */
  73670. + /* It is not enough to lock two nodes and check (node->atom ==
  73671. + check->atom) because atom could be locked and being fused at that
  73672. + moment, jnodes of the atom of that state (being fused) can point to
  73673. + different objects, but the atom is the same. */
  73674. + spin_lock_jnode(check);
  73675. +
  73676. + atom = jnode_get_atom(check);
  73677. +
  73678. + if (atom == NULL) {
  73679. + compat = 0;
  73680. + } else {
  73681. + compat = (node->atom == atom && JF_ISSET(check, JNODE_DIRTY));
  73682. +
  73683. + if (compat && jnode_is_znode(check)) {
  73684. + compat &= znode_is_connected(JZNODE(check));
  73685. + }
  73686. +
  73687. + if (compat && alloc_check) {
  73688. + compat &= (alloc_value == jnode_is_flushprepped(check));
  73689. + }
  73690. +
  73691. + spin_unlock_atom(atom);
  73692. + }
  73693. +
  73694. + spin_unlock_jnode(check);
  73695. +
  73696. + return compat;
  73697. +}
  73698. +
  73699. +/* Decrement the atom's reference count and if it falls to zero, free it. */
  73700. +void atom_dec_and_unlock(txn_atom * atom)
  73701. +{
  73702. + txn_mgr *mgr = &get_super_private(reiser4_get_current_sb())->tmgr;
  73703. +
  73704. + assert("umka-186", atom != NULL);
  73705. + assert_spin_locked(&(atom->alock));
  73706. + assert("zam-1039", atomic_read(&atom->refcount) > 0);
  73707. +
  73708. + if (atomic_dec_and_test(&atom->refcount)) {
  73709. + /* take txnmgr lock and atom lock in proper order. */
  73710. + if (!spin_trylock_txnmgr(mgr)) {
  73711. + /* This atom should exist after we re-acquire its
  73712. + * spinlock, so we increment its reference counter. */
  73713. + atomic_inc(&atom->refcount);
  73714. + spin_unlock_atom(atom);
  73715. + spin_lock_txnmgr(mgr);
  73716. + spin_lock_atom(atom);
  73717. +
  73718. + if (!atomic_dec_and_test(&atom->refcount)) {
  73719. + spin_unlock_atom(atom);
  73720. + spin_unlock_txnmgr(mgr);
  73721. + return;
  73722. + }
  73723. + }
  73724. + assert_spin_locked(&(mgr->tmgr_lock));
  73725. + atom_free(atom);
  73726. + spin_unlock_txnmgr(mgr);
  73727. + } else
  73728. + spin_unlock_atom(atom);
  73729. +}
  73730. +
  73731. +/* Create new atom and connect it to given transaction handle. This adds the
  73732. + atom to the transaction manager's list and sets its reference count to 1, an
  73733. + artificial reference which is kept until it commits. We play strange games
  73734. + to avoid allocation under jnode & txnh spinlocks.*/
  73735. +
  73736. +static int atom_begin_and_assign_to_txnh(txn_atom ** atom_alloc, txn_handle * txnh)
  73737. +{
  73738. + txn_atom *atom;
  73739. + txn_mgr *mgr;
  73740. +
  73741. + if (REISER4_DEBUG && rofs_tree(current_tree)) {
  73742. + warning("nikita-3366", "Creating atom on rofs");
  73743. + dump_stack();
  73744. + }
  73745. +
  73746. + if (*atom_alloc == NULL) {
  73747. + (*atom_alloc) = kmem_cache_alloc(_atom_slab,
  73748. + reiser4_ctx_gfp_mask_get());
  73749. +
  73750. + if (*atom_alloc == NULL)
  73751. + return RETERR(-ENOMEM);
  73752. + }
  73753. +
  73754. + /* and, also, txnmgr spin lock should be taken before jnode and txnh
  73755. + locks. */
  73756. + mgr = &get_super_private(reiser4_get_current_sb())->tmgr;
  73757. + spin_lock_txnmgr(mgr);
  73758. + spin_lock_txnh(txnh);
  73759. +
  73760. + /* Check whether new atom still needed */
  73761. + if (txnh->atom != NULL) {
  73762. + /* NOTE-NIKITA probably it is rather better to free
  73763. + * atom_alloc here than thread it up to reiser4_try_capture() */
  73764. +
  73765. + spin_unlock_txnh(txnh);
  73766. + spin_unlock_txnmgr(mgr);
  73767. +
  73768. + return -E_REPEAT;
  73769. + }
  73770. +
  73771. + atom = *atom_alloc;
  73772. + *atom_alloc = NULL;
  73773. +
  73774. + atom_init(atom);
  73775. +
  73776. + assert("jmacd-17", atom_isclean(atom));
  73777. +
  73778. + /*
  73779. + * lock ordering is broken here. It is ok, as long as @atom is new
  73780. + * and inaccessible for others. We can't use spin_lock_atom or
  73781. + * spin_lock(&atom->alock) because they care about locking
  73782. + * dependencies. spin_trylock_lock doesn't.
  73783. + */
  73784. + check_me("", spin_trylock_atom(atom));
  73785. +
  73786. + /* add atom to the end of transaction manager's list of atoms */
  73787. + list_add_tail(&atom->atom_link, &mgr->atoms_list);
  73788. + atom->atom_id = mgr->id_count++;
  73789. + mgr->atom_count += 1;
  73790. +
  73791. + /* Release txnmgr lock */
  73792. + spin_unlock_txnmgr(mgr);
  73793. +
  73794. + /* One reference until it commits. */
  73795. + atomic_inc(&atom->refcount);
  73796. + atom->stage = ASTAGE_CAPTURE_FUSE;
  73797. + atom->super = reiser4_get_current_sb();
  73798. + capture_assign_txnh_nolock(atom, txnh);
  73799. +
  73800. + spin_unlock_atom(atom);
  73801. + spin_unlock_txnh(txnh);
  73802. +
  73803. + return -E_REPEAT;
  73804. +}
  73805. +
  73806. +/* Return true if an atom is currently "open". */
  73807. +static int atom_isopen(const txn_atom * atom)
  73808. +{
  73809. + assert("umka-185", atom != NULL);
  73810. +
  73811. + return atom->stage > 0 && atom->stage < ASTAGE_PRE_COMMIT;
  73812. +}
  73813. +
  73814. +/* Return the number of pointers to this atom that must be updated during fusion. This
  73815. + approximates the amount of work to be done. Fusion chooses the atom with fewer
  73816. + pointers to fuse into the atom with more pointers. */
  73817. +static int atom_pointer_count(const txn_atom * atom)
  73818. +{
  73819. + assert("umka-187", atom != NULL);
  73820. +
  73821. + /* This is a measure of the amount of work needed to fuse this atom
  73822. + * into another. */
  73823. + return atom->txnh_count + atom->capture_count;
  73824. +}
  73825. +
  73826. +/* Called holding the atom lock, this removes the atom from the transaction manager list
  73827. + and frees it. */
  73828. +static void atom_free(txn_atom * atom)
  73829. +{
  73830. + txn_mgr *mgr = &get_super_private(reiser4_get_current_sb())->tmgr;
  73831. +
  73832. + assert("umka-188", atom != NULL);
  73833. + assert_spin_locked(&(atom->alock));
  73834. +
  73835. + /* Remove from the txn_mgr's atom list */
  73836. + assert_spin_locked(&(mgr->tmgr_lock));
  73837. + mgr->atom_count -= 1;
  73838. + list_del_init(&atom->atom_link);
  73839. +
  73840. + /* Clean the atom */
  73841. + assert("jmacd-16",
  73842. + (atom->stage == ASTAGE_INVALID || atom->stage == ASTAGE_DONE));
  73843. + atom->stage = ASTAGE_FREE;
  73844. +
  73845. + blocknr_set_destroy(&atom->wandered_map);
  73846. +
  73847. + atom_dset_destroy(atom);
  73848. +
  73849. + assert("jmacd-16", atom_isclean(atom));
  73850. +
  73851. + spin_unlock_atom(atom);
  73852. +
  73853. + kmem_cache_free(_atom_slab, atom);
  73854. +}
  73855. +
  73856. +static int atom_is_dotard(const txn_atom * atom)
  73857. +{
  73858. + return time_after(jiffies, atom->start_time +
  73859. + get_current_super_private()->tmgr.atom_max_age);
  73860. +}
  73861. +
  73862. +static int atom_can_be_committed(txn_atom * atom)
  73863. +{
  73864. + assert_spin_locked(&(atom->alock));
  73865. + assert("zam-885", atom->txnh_count > atom->nr_waiters);
  73866. + return atom->txnh_count == atom->nr_waiters + 1;
  73867. +}
  73868. +
  73869. +/* Return true if an atom should commit now. This is determined by aging, atom
  73870. + size or atom flags. */
  73871. +static int atom_should_commit(const txn_atom * atom)
  73872. +{
  73873. + assert("umka-189", atom != NULL);
  73874. + return
  73875. + (atom->flags & ATOM_FORCE_COMMIT) ||
  73876. + ((unsigned)atom_pointer_count(atom) >
  73877. + get_current_super_private()->tmgr.atom_max_size)
  73878. + || atom_is_dotard(atom);
  73879. +}
  73880. +
  73881. +/* return 1 if current atom exists and requires commit. */
  73882. +int current_atom_should_commit(void)
  73883. +{
  73884. + txn_atom *atom;
  73885. + int result = 0;
  73886. +
  73887. + atom = get_current_atom_locked_nocheck();
  73888. + if (atom) {
  73889. + result = atom_should_commit(atom);
  73890. + spin_unlock_atom(atom);
  73891. + }
  73892. + return result;
  73893. +}
  73894. +
  73895. +static int atom_should_commit_asap(const txn_atom * atom)
  73896. +{
  73897. + unsigned int captured;
  73898. + unsigned int pinnedpages;
  73899. +
  73900. + assert("nikita-3309", atom != NULL);
  73901. +
  73902. + captured = (unsigned)atom->capture_count;
  73903. + pinnedpages = (captured >> PAGE_SHIFT) * sizeof(znode);
  73904. +
  73905. + return (pinnedpages > (totalram_pages >> 3)) || (atom->flushed > 100);
  73906. +}
  73907. +
  73908. +static jnode *find_first_dirty_in_list(struct list_head *head, int flags)
  73909. +{
  73910. + jnode *first_dirty;
  73911. +
  73912. + list_for_each_entry(first_dirty, head, capture_link) {
  73913. + if (!(flags & JNODE_FLUSH_COMMIT)) {
  73914. + /*
  73915. + * skip jnodes which "heard banshee" or having active
  73916. + * I/O
  73917. + */
  73918. + if (JF_ISSET(first_dirty, JNODE_HEARD_BANSHEE) ||
  73919. + JF_ISSET(first_dirty, JNODE_WRITEBACK))
  73920. + continue;
  73921. + }
  73922. + return first_dirty;
  73923. + }
  73924. + return NULL;
  73925. +}
  73926. +
  73927. +/* Get first dirty node from the atom's dirty_nodes[n] lists; return NULL if atom has no dirty
  73928. + nodes on atom's lists */
  73929. +jnode *find_first_dirty_jnode(txn_atom * atom, int flags)
  73930. +{
  73931. + jnode *first_dirty;
  73932. + tree_level level;
  73933. +
  73934. + assert_spin_locked(&(atom->alock));
  73935. +
  73936. + /* The flush starts from LEAF_LEVEL (=1). */
  73937. + for (level = 1; level < REAL_MAX_ZTREE_HEIGHT + 1; level += 1) {
  73938. + if (list_empty_careful(ATOM_DIRTY_LIST(atom, level)))
  73939. + continue;
  73940. +
  73941. + first_dirty =
  73942. + find_first_dirty_in_list(ATOM_DIRTY_LIST(atom, level),
  73943. + flags);
  73944. + if (first_dirty)
  73945. + return first_dirty;
  73946. + }
  73947. +
  73948. + /* znode-above-root is on the list #0. */
  73949. + return find_first_dirty_in_list(ATOM_DIRTY_LIST(atom, 0), flags);
  73950. +}
  73951. +
  73952. +static void dispatch_wb_list(txn_atom * atom, flush_queue_t * fq)
  73953. +{
  73954. + jnode *cur;
  73955. +
  73956. + assert("zam-905", atom_is_protected(atom));
  73957. +
  73958. + cur = list_entry(ATOM_WB_LIST(atom)->next, jnode, capture_link);
  73959. + while (ATOM_WB_LIST(atom) != &cur->capture_link) {
  73960. + jnode *next = list_entry(cur->capture_link.next, jnode, capture_link);
  73961. +
  73962. + spin_lock_jnode(cur);
  73963. + if (!JF_ISSET(cur, JNODE_WRITEBACK)) {
  73964. + if (JF_ISSET(cur, JNODE_DIRTY)) {
  73965. + queue_jnode(fq, cur);
  73966. + } else {
  73967. + /* move jnode to atom's clean list */
  73968. + list_move_tail(&cur->capture_link,
  73969. + ATOM_CLEAN_LIST(atom));
  73970. + }
  73971. + }
  73972. + spin_unlock_jnode(cur);
  73973. +
  73974. + cur = next;
  73975. + }
  73976. +}
  73977. +
  73978. +/* Scan current atom->writeback_nodes list, re-submit dirty and !writeback
  73979. + * jnodes to disk. */
  73980. +static int submit_wb_list(void)
  73981. +{
  73982. + int ret;
  73983. + flush_queue_t *fq;
  73984. +
  73985. + fq = get_fq_for_current_atom();
  73986. + if (IS_ERR(fq))
  73987. + return PTR_ERR(fq);
  73988. +
  73989. + dispatch_wb_list(fq->atom, fq);
  73990. + spin_unlock_atom(fq->atom);
  73991. +
  73992. + ret = reiser4_write_fq(fq, NULL, 1);
  73993. + reiser4_fq_put(fq);
  73994. +
  73995. + return ret;
  73996. +}
  73997. +
  73998. +/* Wait completion of all writes, re-submit atom writeback list if needed. */
  73999. +static int current_atom_complete_writes(void)
  74000. +{
  74001. + int ret;
  74002. +
  74003. + /* Each jnode from that list was modified and dirtied when it had i/o
  74004. + * request running already. After i/o completion we have to resubmit
  74005. + * them to disk again.*/
  74006. + ret = submit_wb_list();
  74007. + if (ret < 0)
  74008. + return ret;
  74009. +
  74010. + /* Wait all i/o completion */
  74011. + ret = current_atom_finish_all_fq();
  74012. + if (ret)
  74013. + return ret;
  74014. +
  74015. + /* Scan wb list again; all i/o should be completed, we re-submit dirty
  74016. + * nodes to disk */
  74017. + ret = submit_wb_list();
  74018. + if (ret < 0)
  74019. + return ret;
  74020. +
  74021. + /* Wait all nodes we just submitted */
  74022. + return current_atom_finish_all_fq();
  74023. +}
  74024. +
  74025. +#if REISER4_DEBUG
  74026. +
  74027. +static void reiser4_info_atom(const char *prefix, const txn_atom * atom)
  74028. +{
  74029. + if (atom == NULL) {
  74030. + printk("%s: no atom\n", prefix);
  74031. + return;
  74032. + }
  74033. +
  74034. + printk("%s: refcount: %i id: %i flags: %x txnh_count: %i"
  74035. + " capture_count: %i stage: %x start: %lu, flushed: %i\n", prefix,
  74036. + atomic_read(&atom->refcount), atom->atom_id, atom->flags,
  74037. + atom->txnh_count, atom->capture_count, atom->stage,
  74038. + atom->start_time, atom->flushed);
  74039. +}
  74040. +
  74041. +#else /* REISER4_DEBUG */
  74042. +
  74043. +static inline void reiser4_info_atom(const char *prefix, const txn_atom * atom) {}
  74044. +
  74045. +#endif /* REISER4_DEBUG */
  74046. +
  74047. +#define TOOMANYFLUSHES (1 << 13)
  74048. +
  74049. +/* Called with the atom locked and no open "active" transaction handlers except
  74050. + ours, this function calls flush_current_atom() until all dirty nodes are
  74051. + processed. Then it initiates commit processing.
  74052. +
  74053. + Called by the single remaining open "active" txnh, which is closing. Other
  74054. + open txnhs belong to processes which wait atom commit in commit_txnh()
  74055. + routine. They are counted as "waiters" in atom->nr_waiters. Therefore as
  74056. + long as we hold the atom lock none of the jnodes can be captured and/or
  74057. + locked.
  74058. +
  74059. + Return value is an error code if commit fails.
  74060. +*/
  74061. +static int commit_current_atom(long *nr_submitted, txn_atom ** atom)
  74062. +{
  74063. + reiser4_super_info_data *sbinfo = get_current_super_private();
  74064. + long ret = 0;
  74065. + /* how many times jnode_flush() was called as a part of attempt to
  74066. + * commit this atom. */
  74067. + int flushiters;
  74068. +
  74069. + assert("zam-888", atom != NULL && *atom != NULL);
  74070. + assert_spin_locked(&((*atom)->alock));
  74071. + assert("zam-887", get_current_context()->trans->atom == *atom);
  74072. + assert("jmacd-151", atom_isopen(*atom));
  74073. +
  74074. + assert("nikita-3184",
  74075. + get_current_super_private()->delete_mutex_owner != current);
  74076. +
  74077. + for (flushiters = 0;; ++flushiters) {
  74078. + ret =
  74079. + flush_current_atom(JNODE_FLUSH_WRITE_BLOCKS |
  74080. + JNODE_FLUSH_COMMIT,
  74081. + LONG_MAX /* nr_to_write */ ,
  74082. + nr_submitted, atom, NULL);
  74083. + if (ret != -E_REPEAT)
  74084. + break;
  74085. +
  74086. + /* if atom's dirty list contains one znode which is
  74087. + HEARD_BANSHEE and is locked we have to allow lock owner to
  74088. + continue and uncapture that znode */
  74089. + reiser4_preempt_point();
  74090. +
  74091. + *atom = get_current_atom_locked();
  74092. + if (flushiters > TOOMANYFLUSHES && IS_POW(flushiters)) {
  74093. + warning("nikita-3176",
  74094. + "Flushing like mad: %i", flushiters);
  74095. + reiser4_info_atom("atom", *atom);
  74096. + DEBUGON(flushiters > (1 << 20));
  74097. + }
  74098. + }
  74099. +
  74100. + if (ret)
  74101. + return ret;
  74102. +
  74103. + assert_spin_locked(&((*atom)->alock));
  74104. +
  74105. + if (!atom_can_be_committed(*atom)) {
  74106. + spin_unlock_atom(*atom);
  74107. + return RETERR(-E_REPEAT);
  74108. + }
  74109. +
  74110. + if ((*atom)->capture_count == 0)
  74111. + goto done;
  74112. +
  74113. + /* Up to this point we have been flushing and after flush is called we
  74114. + return -E_REPEAT. Now we can commit. We cannot return -E_REPEAT
  74115. + at this point, commit should be successful. */
  74116. + reiser4_atom_set_stage(*atom, ASTAGE_PRE_COMMIT);
  74117. + ON_DEBUG(((*atom)->committer = current));
  74118. + spin_unlock_atom(*atom);
  74119. +
  74120. + ret = current_atom_complete_writes();
  74121. + if (ret)
  74122. + return ret;
  74123. +
  74124. + assert("zam-906", list_empty(ATOM_WB_LIST(*atom)));
  74125. +
  74126. + /* isolate critical code path which should be executed by only one
  74127. + * thread using tmgr mutex */
  74128. + mutex_lock(&sbinfo->tmgr.commit_mutex);
  74129. +
  74130. + ret = reiser4_write_logs(nr_submitted);
  74131. + if (ret < 0)
  74132. + reiser4_panic("zam-597", "write log failed (%ld)\n", ret);
  74133. +
  74134. + /* The atom->ovrwr_nodes list is processed under commit mutex held
  74135. + because of bitmap nodes which are captured by special way in
  74136. + reiser4_pre_commit_hook_bitmap(), that way does not include
  74137. + capture_fuse_wait() as a capturing of other nodes does -- the commit
  74138. + mutex is used for transaction isolation instead. */
  74139. + reiser4_invalidate_list(ATOM_OVRWR_LIST(*atom));
  74140. + mutex_unlock(&sbinfo->tmgr.commit_mutex);
  74141. +
  74142. + reiser4_invalidate_list(ATOM_CLEAN_LIST(*atom));
  74143. + reiser4_invalidate_list(ATOM_WB_LIST(*atom));
  74144. + assert("zam-927", list_empty(&(*atom)->inodes));
  74145. +
  74146. + spin_lock_atom(*atom);
  74147. + done:
  74148. + reiser4_atom_set_stage(*atom, ASTAGE_DONE);
  74149. + ON_DEBUG((*atom)->committer = NULL);
  74150. +
  74151. + /* Atom's state changes, so wake up everybody waiting for this
  74152. + event. */
  74153. + wakeup_atom_waiting_list(*atom);
  74154. +
  74155. + /* Decrement the "until commit" reference, at least one txnh (the caller) is
  74156. + still open. */
  74157. + atomic_dec(&(*atom)->refcount);
  74158. +
  74159. + assert("jmacd-1070", atomic_read(&(*atom)->refcount) > 0);
  74160. + assert("jmacd-1062", (*atom)->capture_count == 0);
  74161. + BUG_ON((*atom)->capture_count != 0);
  74162. + assert_spin_locked(&((*atom)->alock));
  74163. +
  74164. + return ret;
  74165. +}
  74166. +
  74167. +/* TXN_TXNH */
  74168. +
  74169. +/**
  74170. + * force_commit_atom - commit current atom and wait commit completion
  74171. + * @txnh:
  74172. + *
  74173. + * Commits current atom and wait commit completion; current atom and @txnh have
  74174. + * to be spinlocked before call, this function unlocks them on exit.
  74175. + */
  74176. +int force_commit_atom(txn_handle *txnh)
  74177. +{
  74178. + txn_atom *atom;
  74179. +
  74180. + assert("zam-837", txnh != NULL);
  74181. + assert_spin_locked(&(txnh->hlock));
  74182. + assert("nikita-2966", lock_stack_isclean(get_current_lock_stack()));
  74183. +
  74184. + atom = txnh->atom;
  74185. +
  74186. + assert("zam-834", atom != NULL);
  74187. + assert_spin_locked(&(atom->alock));
  74188. +
  74189. + /*
  74190. + * Set flags for atom and txnh: forcing atom commit and waiting for
  74191. + * commit completion
  74192. + */
  74193. + txnh->flags |= TXNH_WAIT_COMMIT;
  74194. + atom->flags |= ATOM_FORCE_COMMIT;
  74195. +
  74196. + spin_unlock_txnh(txnh);
  74197. + spin_unlock_atom(atom);
  74198. +
  74199. + /* commit is here */
  74200. + reiser4_txn_restart_current();
  74201. + return 0;
  74202. +}
  74203. +
  74204. +/* Called to force commit of any outstanding atoms. @commit_all_atoms controls
  74205. + * should we commit all atoms including new ones which are created after this
  74206. + * functions is called. */
  74207. +int txnmgr_force_commit_all(struct super_block *super, int commit_all_atoms)
  74208. +{
  74209. + int ret;
  74210. + txn_atom *atom;
  74211. + txn_mgr *mgr;
  74212. + txn_handle *txnh;
  74213. + unsigned long start_time = jiffies;
  74214. + reiser4_context *ctx = get_current_context();
  74215. +
  74216. + assert("nikita-2965", lock_stack_isclean(get_current_lock_stack()));
  74217. + assert("nikita-3058", reiser4_commit_check_locks());
  74218. +
  74219. + reiser4_txn_restart_current();
  74220. +
  74221. + mgr = &get_super_private(super)->tmgr;
  74222. +
  74223. + txnh = ctx->trans;
  74224. +
  74225. + again:
  74226. +
  74227. + spin_lock_txnmgr(mgr);
  74228. +
  74229. + list_for_each_entry(atom, &mgr->atoms_list, atom_link) {
  74230. + spin_lock_atom(atom);
  74231. +
  74232. + /* Commit any atom which can be committed. If @commit_new_atoms
  74233. + * is not set we commit only atoms which were created before
  74234. + * this call is started. */
  74235. + if (commit_all_atoms
  74236. + || time_before_eq(atom->start_time, start_time)) {
  74237. + if (atom->stage <= ASTAGE_POST_COMMIT) {
  74238. + spin_unlock_txnmgr(mgr);
  74239. +
  74240. + if (atom->stage < ASTAGE_PRE_COMMIT) {
  74241. + spin_lock_txnh(txnh);
  74242. + /* Add force-context txnh */
  74243. + capture_assign_txnh_nolock(atom, txnh);
  74244. + ret = force_commit_atom(txnh);
  74245. + if (ret)
  74246. + return ret;
  74247. + } else
  74248. + /* wait atom commit */
  74249. + reiser4_atom_wait_event(atom);
  74250. +
  74251. + goto again;
  74252. + }
  74253. + }
  74254. +
  74255. + spin_unlock_atom(atom);
  74256. + }
  74257. +
  74258. +#if REISER4_DEBUG
  74259. + if (commit_all_atoms) {
  74260. + reiser4_super_info_data *sbinfo = get_super_private(super);
  74261. + spin_lock_reiser4_super(sbinfo);
  74262. + assert("zam-813",
  74263. + sbinfo->blocks_fake_allocated_unformatted == 0);
  74264. + assert("zam-812", sbinfo->blocks_fake_allocated == 0);
  74265. + spin_unlock_reiser4_super(sbinfo);
  74266. + }
  74267. +#endif
  74268. +
  74269. + spin_unlock_txnmgr(mgr);
  74270. +
  74271. + return 0;
  74272. +}
  74273. +
  74274. +/* check whether commit_some_atoms() can commit @atom. Locking is up to the
  74275. + * caller */
  74276. +static int atom_is_committable(txn_atom * atom)
  74277. +{
  74278. + return
  74279. + atom->stage < ASTAGE_PRE_COMMIT &&
  74280. + atom->txnh_count == atom->nr_waiters && atom_should_commit(atom);
  74281. +}
  74282. +
  74283. +/* called periodically from ktxnmgrd to commit old atoms. Releases ktxnmgrd spin
  74284. + * lock at exit */
  74285. +int commit_some_atoms(txn_mgr * mgr)
  74286. +{
  74287. + int ret = 0;
  74288. + txn_atom *atom;
  74289. + txn_handle *txnh;
  74290. + reiser4_context *ctx;
  74291. + struct list_head *pos, *tmp;
  74292. +
  74293. + ctx = get_current_context();
  74294. + assert("nikita-2444", ctx != NULL);
  74295. +
  74296. + txnh = ctx->trans;
  74297. + spin_lock_txnmgr(mgr);
  74298. +
  74299. + /*
  74300. + * this is to avoid gcc complain that atom might be used
  74301. + * uninitialized
  74302. + */
  74303. + atom = NULL;
  74304. +
  74305. + /* look for atom to commit */
  74306. + list_for_each_safe(pos, tmp, &mgr->atoms_list) {
  74307. + atom = list_entry(pos, txn_atom, atom_link);
  74308. + /*
  74309. + * first test without taking atom spin lock, whether it is
  74310. + * eligible for committing at all
  74311. + */
  74312. + if (atom_is_committable(atom)) {
  74313. + /* now, take spin lock and re-check */
  74314. + spin_lock_atom(atom);
  74315. + if (atom_is_committable(atom))
  74316. + break;
  74317. + spin_unlock_atom(atom);
  74318. + }
  74319. + }
  74320. +
  74321. + ret = (&mgr->atoms_list == pos);
  74322. + spin_unlock_txnmgr(mgr);
  74323. +
  74324. + if (ret) {
  74325. + /* nothing found */
  74326. + spin_unlock(&mgr->daemon->guard);
  74327. + return 0;
  74328. + }
  74329. +
  74330. + spin_lock_txnh(txnh);
  74331. +
  74332. + BUG_ON(atom == NULL);
  74333. + /* Set the atom to force committing */
  74334. + atom->flags |= ATOM_FORCE_COMMIT;
  74335. +
  74336. + /* Add force-context txnh */
  74337. + capture_assign_txnh_nolock(atom, txnh);
  74338. +
  74339. + spin_unlock_txnh(txnh);
  74340. + spin_unlock_atom(atom);
  74341. +
  74342. + /* we are about to release daemon spin lock, notify daemon it
  74343. + has to rescan atoms */
  74344. + mgr->daemon->rescan = 1;
  74345. + spin_unlock(&mgr->daemon->guard);
  74346. + reiser4_txn_restart_current();
  74347. + return 0;
  74348. +}
  74349. +
  74350. +static int txn_try_to_fuse_small_atom(txn_mgr * tmgr, txn_atom * atom)
  74351. +{
  74352. + int atom_stage;
  74353. + txn_atom *atom_2;
  74354. + int repeat;
  74355. +
  74356. + assert("zam-1051", atom->stage < ASTAGE_PRE_COMMIT);
  74357. +
  74358. + atom_stage = atom->stage;
  74359. + repeat = 0;
  74360. +
  74361. + if (!spin_trylock_txnmgr(tmgr)) {
  74362. + atomic_inc(&atom->refcount);
  74363. + spin_unlock_atom(atom);
  74364. + spin_lock_txnmgr(tmgr);
  74365. + spin_lock_atom(atom);
  74366. + repeat = 1;
  74367. + if (atom->stage != atom_stage) {
  74368. + spin_unlock_txnmgr(tmgr);
  74369. + atom_dec_and_unlock(atom);
  74370. + return -E_REPEAT;
  74371. + }
  74372. + atomic_dec(&atom->refcount);
  74373. + }
  74374. +
  74375. + list_for_each_entry(atom_2, &tmgr->atoms_list, atom_link) {
  74376. + if (atom == atom_2)
  74377. + continue;
  74378. + /*
  74379. + * if trylock does not succeed we just do not fuse with that
  74380. + * atom.
  74381. + */
  74382. + if (spin_trylock_atom(atom_2)) {
  74383. + if (atom_2->stage < ASTAGE_PRE_COMMIT) {
  74384. + spin_unlock_txnmgr(tmgr);
  74385. + capture_fuse_into(atom_2, atom);
  74386. + /* all locks are lost we can only repeat here */
  74387. + return -E_REPEAT;
  74388. + }
  74389. + spin_unlock_atom(atom_2);
  74390. + }
  74391. + }
  74392. + atom->flags |= ATOM_CANCEL_FUSION;
  74393. + spin_unlock_txnmgr(tmgr);
  74394. + if (repeat) {
  74395. + spin_unlock_atom(atom);
  74396. + return -E_REPEAT;
  74397. + }
  74398. + return 0;
  74399. +}
  74400. +
  74401. +/* Calls jnode_flush for current atom if it exists; if not, just take another
  74402. + atom and call jnode_flush() for him. If current transaction handle has
  74403. + already assigned atom (current atom) we have to close current transaction
  74404. + prior to switch to another atom or do something with current atom. This
  74405. + code tries to flush current atom.
  74406. +
  74407. + flush_some_atom() is called as part of memory clearing process. It is
  74408. + invoked from balance_dirty_pages(), pdflushd, and entd.
  74409. +
  74410. + If we can flush no nodes, atom is committed, because this frees memory.
  74411. +
  74412. + If atom is too large or too old it is committed also.
  74413. +*/
  74414. +int
  74415. +flush_some_atom(jnode * start, long *nr_submitted, const struct writeback_control *wbc,
  74416. + int flags)
  74417. +{
  74418. + reiser4_context *ctx = get_current_context();
  74419. + txn_mgr *tmgr = &get_super_private(ctx->super)->tmgr;
  74420. + txn_handle *txnh = ctx->trans;
  74421. + txn_atom *atom;
  74422. + int ret;
  74423. +
  74424. + BUG_ON(wbc->nr_to_write == 0);
  74425. + BUG_ON(*nr_submitted != 0);
  74426. + assert("zam-1042", txnh != NULL);
  74427. +repeat:
  74428. + if (txnh->atom == NULL) {
  74429. + /* current atom is not available, take first from txnmgr */
  74430. + spin_lock_txnmgr(tmgr);
  74431. +
  74432. + /* traverse the list of all atoms */
  74433. + list_for_each_entry(atom, &tmgr->atoms_list, atom_link) {
  74434. + /* lock atom before checking its state */
  74435. + spin_lock_atom(atom);
  74436. +
  74437. + /*
  74438. + * we need an atom which is not being committed and
  74439. + * which has no flushers (jnode_flush() add one flusher
  74440. + * at the beginning and subtract one at the end).
  74441. + */
  74442. + if (atom->stage < ASTAGE_PRE_COMMIT &&
  74443. + atom->nr_flushers == 0) {
  74444. + spin_lock_txnh(txnh);
  74445. + capture_assign_txnh_nolock(atom, txnh);
  74446. + spin_unlock_txnh(txnh);
  74447. +
  74448. + goto found;
  74449. + }
  74450. +
  74451. + spin_unlock_atom(atom);
  74452. + }
  74453. +
  74454. + /*
  74455. + * Write throttling is case of no one atom can be
  74456. + * flushed/committed.
  74457. + */
  74458. + if (!current_is_flush_bd_task()) {
  74459. + list_for_each_entry(atom, &tmgr->atoms_list, atom_link) {
  74460. + spin_lock_atom(atom);
  74461. + /* Repeat the check from the above. */
  74462. + if (atom->stage < ASTAGE_PRE_COMMIT
  74463. + && atom->nr_flushers == 0) {
  74464. + spin_lock_txnh(txnh);
  74465. + capture_assign_txnh_nolock(atom, txnh);
  74466. + spin_unlock_txnh(txnh);
  74467. +
  74468. + goto found;
  74469. + }
  74470. + if (atom->stage <= ASTAGE_POST_COMMIT) {
  74471. + spin_unlock_txnmgr(tmgr);
  74472. + /*
  74473. + * we just wait until atom's flusher
  74474. + * makes a progress in flushing or
  74475. + * committing the atom
  74476. + */
  74477. + reiser4_atom_wait_event(atom);
  74478. + goto repeat;
  74479. + }
  74480. + spin_unlock_atom(atom);
  74481. + }
  74482. + }
  74483. + spin_unlock_txnmgr(tmgr);
  74484. + return 0;
  74485. + found:
  74486. + spin_unlock_txnmgr(tmgr);
  74487. + } else
  74488. + atom = get_current_atom_locked();
  74489. +
  74490. + BUG_ON(atom->super != ctx->super);
  74491. + assert("vs-35", atom->super == ctx->super);
  74492. + if (start) {
  74493. + spin_lock_jnode(start);
  74494. + ret = (atom == start->atom) ? 1 : 0;
  74495. + spin_unlock_jnode(start);
  74496. + if (ret == 0)
  74497. + start = NULL;
  74498. + }
  74499. + ret = flush_current_atom(flags, wbc->nr_to_write, nr_submitted, &atom, start);
  74500. + if (ret == 0) {
  74501. + /* flush_current_atom returns 0 only if it submitted for write
  74502. + nothing */
  74503. + BUG_ON(*nr_submitted != 0);
  74504. + if (*nr_submitted == 0 || atom_should_commit_asap(atom)) {
  74505. + if (atom->capture_count < tmgr->atom_min_size &&
  74506. + !(atom->flags & ATOM_CANCEL_FUSION)) {
  74507. + ret = txn_try_to_fuse_small_atom(tmgr, atom);
  74508. + if (ret == -E_REPEAT) {
  74509. + reiser4_preempt_point();
  74510. + goto repeat;
  74511. + }
  74512. + }
  74513. + /* if early flushing could not make more nodes clean,
  74514. + * or atom is too old/large,
  74515. + * we force current atom to commit */
  74516. + /* wait for commit completion but only if this
  74517. + * wouldn't stall pdflushd and ent thread. */
  74518. + if (!ctx->entd)
  74519. + txnh->flags |= TXNH_WAIT_COMMIT;
  74520. + atom->flags |= ATOM_FORCE_COMMIT;
  74521. + }
  74522. + spin_unlock_atom(atom);
  74523. + } else if (ret == -E_REPEAT) {
  74524. + if (*nr_submitted == 0) {
  74525. + /* let others who hampers flushing (hold longterm locks,
  74526. + for instance) to free the way for flush */
  74527. + reiser4_preempt_point();
  74528. + goto repeat;
  74529. + }
  74530. + ret = 0;
  74531. + }
  74532. +/*
  74533. + if (*nr_submitted > wbc->nr_to_write)
  74534. + warning("", "asked for %ld, written %ld\n", wbc->nr_to_write, *nr_submitted);
  74535. +*/
  74536. + reiser4_txn_restart(ctx);
  74537. +
  74538. + return ret;
  74539. +}
  74540. +
  74541. +/* Remove processed nodes from atom's clean list (thereby remove them from transaction). */
  74542. +void reiser4_invalidate_list(struct list_head *head)
  74543. +{
  74544. + while (!list_empty(head)) {
  74545. + jnode *node;
  74546. +
  74547. + node = list_entry(head->next, jnode, capture_link);
  74548. + spin_lock_jnode(node);
  74549. + reiser4_uncapture_block(node);
  74550. + jput(node);
  74551. + }
  74552. +}
  74553. +
  74554. +static void init_wlinks(txn_wait_links * wlinks)
  74555. +{
  74556. + wlinks->_lock_stack = get_current_lock_stack();
  74557. + INIT_LIST_HEAD(&wlinks->_fwaitfor_link);
  74558. + INIT_LIST_HEAD(&wlinks->_fwaiting_link);
  74559. + wlinks->waitfor_cb = NULL;
  74560. + wlinks->waiting_cb = NULL;
  74561. +}
  74562. +
  74563. +/* Add atom to the atom's waitfor list and wait for somebody to wake us up; */
  74564. +void reiser4_atom_wait_event(txn_atom * atom)
  74565. +{
  74566. + txn_wait_links _wlinks;
  74567. +
  74568. + assert_spin_locked(&(atom->alock));
  74569. + assert("nikita-3156",
  74570. + lock_stack_isclean(get_current_lock_stack()) ||
  74571. + atom->nr_running_queues > 0);
  74572. +
  74573. + init_wlinks(&_wlinks);
  74574. + list_add_tail(&_wlinks._fwaitfor_link, &atom->fwaitfor_list);
  74575. + atomic_inc(&atom->refcount);
  74576. + spin_unlock_atom(atom);
  74577. +
  74578. + reiser4_prepare_to_sleep(_wlinks._lock_stack);
  74579. + reiser4_go_to_sleep(_wlinks._lock_stack);
  74580. +
  74581. + spin_lock_atom(atom);
  74582. + list_del(&_wlinks._fwaitfor_link);
  74583. + atom_dec_and_unlock(atom);
  74584. +}
  74585. +
  74586. +void reiser4_atom_set_stage(txn_atom * atom, txn_stage stage)
  74587. +{
  74588. + assert("nikita-3535", atom != NULL);
  74589. + assert_spin_locked(&(atom->alock));
  74590. + assert("nikita-3536", stage <= ASTAGE_INVALID);
  74591. + /* Excelsior! */
  74592. + assert("nikita-3537", stage >= atom->stage);
  74593. + if (atom->stage != stage) {
  74594. + atom->stage = stage;
  74595. + reiser4_atom_send_event(atom);
  74596. + }
  74597. +}
  74598. +
  74599. +/* wake all threads which wait for an event */
  74600. +void reiser4_atom_send_event(txn_atom * atom)
  74601. +{
  74602. + assert_spin_locked(&(atom->alock));
  74603. + wakeup_atom_waitfor_list(atom);
  74604. +}
  74605. +
  74606. +/* Informs txn manager code that owner of this txn_handle should wait atom commit completion (for
  74607. + example, because it does fsync(2)) */
  74608. +static int should_wait_commit(txn_handle * h)
  74609. +{
  74610. + return h->flags & TXNH_WAIT_COMMIT;
  74611. +}
  74612. +
  74613. +typedef struct commit_data {
  74614. + txn_atom *atom;
  74615. + txn_handle *txnh;
  74616. + long nr_written;
  74617. + /* as an optimization we start committing atom by first trying to
  74618. + * flush it few times without switching into ASTAGE_CAPTURE_WAIT. This
  74619. + * allows to reduce stalls due to other threads waiting for atom in
  74620. + * ASTAGE_CAPTURE_WAIT stage. ->preflush is counter of these
  74621. + * preliminary flushes. */
  74622. + int preflush;
  74623. + /* have we waited on atom. */
  74624. + int wait;
  74625. + int failed;
  74626. + int wake_ktxnmgrd_up;
  74627. +} commit_data;
  74628. +
  74629. +/*
  74630. + * Called from commit_txnh() repeatedly, until either error happens, or atom
  74631. + * commits successfully.
  74632. + */
  74633. +static int try_commit_txnh(commit_data * cd)
  74634. +{
  74635. + int result;
  74636. +
  74637. + assert("nikita-2968", lock_stack_isclean(get_current_lock_stack()));
  74638. +
  74639. + /* Get the atom and txnh locked. */
  74640. + cd->atom = txnh_get_atom(cd->txnh);
  74641. + assert("jmacd-309", cd->atom != NULL);
  74642. + spin_unlock_txnh(cd->txnh);
  74643. +
  74644. + if (cd->wait) {
  74645. + cd->atom->nr_waiters--;
  74646. + cd->wait = 0;
  74647. + }
  74648. +
  74649. + if (cd->atom->stage == ASTAGE_DONE)
  74650. + return 0;
  74651. +
  74652. + if (cd->failed)
  74653. + return 0;
  74654. +
  74655. + if (atom_should_commit(cd->atom)) {
  74656. + /* if atom is _very_ large schedule it for commit as soon as
  74657. + * possible. */
  74658. + if (atom_should_commit_asap(cd->atom)) {
  74659. + /*
  74660. + * When atom is in PRE_COMMIT or later stage following
  74661. + * invariant (encoded in atom_can_be_committed())
  74662. + * holds: there is exactly one non-waiter transaction
  74663. + * handle opened on this atom. When thread wants to
  74664. + * wait until atom commits (for example sync()) it
  74665. + * waits on atom event after increasing
  74666. + * atom->nr_waiters (see blow in this function). It
  74667. + * cannot be guaranteed that atom is already committed
  74668. + * after receiving event, so loop has to be
  74669. + * re-started. But if atom switched into PRE_COMMIT
  74670. + * stage and became too large, we cannot change its
  74671. + * state back to CAPTURE_WAIT (atom stage can only
  74672. + * increase monotonically), hence this check.
  74673. + */
  74674. + if (cd->atom->stage < ASTAGE_CAPTURE_WAIT)
  74675. + reiser4_atom_set_stage(cd->atom,
  74676. + ASTAGE_CAPTURE_WAIT);
  74677. + cd->atom->flags |= ATOM_FORCE_COMMIT;
  74678. + }
  74679. + if (cd->txnh->flags & TXNH_DONT_COMMIT) {
  74680. + /*
  74681. + * this thread (transaction handle that is) doesn't
  74682. + * want to commit atom. Notify waiters that handle is
  74683. + * closed. This can happen, for example, when we are
  74684. + * under VFS directory lock and don't want to commit
  74685. + * atom right now to avoid stalling other threads
  74686. + * working in the same directory.
  74687. + */
  74688. +
  74689. + /* Wake the ktxnmgrd up if the ktxnmgrd is needed to
  74690. + * commit this atom: no atom waiters and only one
  74691. + * (our) open transaction handle. */
  74692. + cd->wake_ktxnmgrd_up =
  74693. + cd->atom->txnh_count == 1 &&
  74694. + cd->atom->nr_waiters == 0;
  74695. + reiser4_atom_send_event(cd->atom);
  74696. + result = 0;
  74697. + } else if (!atom_can_be_committed(cd->atom)) {
  74698. + if (should_wait_commit(cd->txnh)) {
  74699. + /* sync(): wait for commit */
  74700. + cd->atom->nr_waiters++;
  74701. + cd->wait = 1;
  74702. + reiser4_atom_wait_event(cd->atom);
  74703. + result = RETERR(-E_REPEAT);
  74704. + } else {
  74705. + result = 0;
  74706. + }
  74707. + } else if (cd->preflush > 0 && !is_current_ktxnmgrd()) {
  74708. + /*
  74709. + * optimization: flush atom without switching it into
  74710. + * ASTAGE_CAPTURE_WAIT.
  74711. + *
  74712. + * But don't do this for ktxnmgrd, because ktxnmgrd
  74713. + * should never block on atom fusion.
  74714. + */
  74715. + result = flush_current_atom(JNODE_FLUSH_WRITE_BLOCKS,
  74716. + LONG_MAX, &cd->nr_written,
  74717. + &cd->atom, NULL);
  74718. + if (result == 0) {
  74719. + spin_unlock_atom(cd->atom);
  74720. + cd->preflush = 0;
  74721. + result = RETERR(-E_REPEAT);
  74722. + } else /* Atoms wasn't flushed
  74723. + * completely. Rinse. Repeat. */
  74724. + --cd->preflush;
  74725. + } else {
  74726. + /* We change atom state to ASTAGE_CAPTURE_WAIT to
  74727. + prevent atom fusion and count ourself as an active
  74728. + flusher */
  74729. + reiser4_atom_set_stage(cd->atom, ASTAGE_CAPTURE_WAIT);
  74730. + cd->atom->flags |= ATOM_FORCE_COMMIT;
  74731. +
  74732. + result =
  74733. + commit_current_atom(&cd->nr_written, &cd->atom);
  74734. + if (result != 0 && result != -E_REPEAT)
  74735. + cd->failed = 1;
  74736. + }
  74737. + } else
  74738. + result = 0;
  74739. +
  74740. +#if REISER4_DEBUG
  74741. + if (result == 0)
  74742. + assert_spin_locked(&(cd->atom->alock));
  74743. +#endif
  74744. +
  74745. + /* perfectly valid assertion, except that when atom/txnh is not locked
  74746. + * fusion can take place, and cd->atom points nowhere. */
  74747. + /*
  74748. + assert("jmacd-1028", ergo(result != 0, spin_atom_is_not_locked(cd->atom)));
  74749. + */
  74750. + return result;
  74751. +}
  74752. +
  74753. +/* Called to commit a transaction handle. This decrements the atom's number of open
  74754. + handles and if it is the last handle to commit and the atom should commit, initiates
  74755. + atom commit. if commit does not fail, return number of written blocks */
  74756. +static int commit_txnh(txn_handle * txnh)
  74757. +{
  74758. + commit_data cd;
  74759. + assert("umka-192", txnh != NULL);
  74760. +
  74761. + memset(&cd, 0, sizeof cd);
  74762. + cd.txnh = txnh;
  74763. + cd.preflush = 10;
  74764. +
  74765. + /* calls try_commit_txnh() until either atom commits, or error
  74766. + * happens */
  74767. + while (try_commit_txnh(&cd) != 0)
  74768. + reiser4_preempt_point();
  74769. +
  74770. + spin_lock_txnh(txnh);
  74771. +
  74772. + cd.atom->txnh_count -= 1;
  74773. + txnh->atom = NULL;
  74774. + /* remove transaction handle from atom's list of transaction handles */
  74775. + list_del_init(&txnh->txnh_link);
  74776. +
  74777. + spin_unlock_txnh(txnh);
  74778. + atom_dec_and_unlock(cd.atom);
  74779. + /* if we don't want to do a commit (TXNH_DONT_COMMIT is set, probably
  74780. + * because it takes time) by current thread, we do that work
  74781. + * asynchronously by ktxnmgrd daemon. */
  74782. + if (cd.wake_ktxnmgrd_up)
  74783. + ktxnmgrd_kick(&get_current_super_private()->tmgr);
  74784. +
  74785. + return 0;
  74786. +}
  74787. +
  74788. +/* TRY_CAPTURE */
  74789. +
  74790. +/* This routine attempts a single block-capture request. It may return -E_REPEAT if some
  74791. + condition indicates that the request should be retried, and it may block if the
  74792. + txn_capture mode does not include the TXN_CAPTURE_NONBLOCKING request flag.
  74793. +
  74794. + This routine encodes the basic logic of block capturing described by:
  74795. +
  74796. + http://namesys.com/v4/v4.html
  74797. +
  74798. + Our goal here is to ensure that any two blocks that contain dependent modifications
  74799. + should commit at the same time. This function enforces this discipline by initiating
  74800. + fusion whenever a transaction handle belonging to one atom requests to read or write a
  74801. + block belonging to another atom (TXN_CAPTURE_WRITE or TXN_CAPTURE_READ_ATOMIC).
  74802. +
  74803. + In addition, this routine handles the initial assignment of atoms to blocks and
  74804. + transaction handles. These are possible outcomes of this function:
  74805. +
  74806. + 1. The block and handle are already part of the same atom: return immediate success
  74807. +
  74808. + 2. The block is assigned but the handle is not: call capture_assign_txnh to assign
  74809. + the handle to the block's atom.
  74810. +
  74811. + 3. The handle is assigned but the block is not: call capture_assign_block to assign
  74812. + the block to the handle's atom.
  74813. +
  74814. + 4. Both handle and block are assigned, but to different atoms: call capture_init_fusion
  74815. + to fuse atoms.
  74816. +
  74817. + 5. Neither block nor handle are assigned: create a new atom and assign them both.
  74818. +
  74819. + 6. A read request for a non-captured block: return immediate success.
  74820. +
  74821. + This function acquires and releases the handle's spinlock. This function is called
  74822. + under the jnode lock and if the return value is 0, it returns with the jnode lock still
  74823. + held. If the return is -E_REPEAT or some other error condition, the jnode lock is
  74824. + released. The external interface (reiser4_try_capture) manages re-aquiring the jnode
  74825. + lock in the failure case.
  74826. +*/
  74827. +static int try_capture_block(
  74828. + txn_handle * txnh, jnode * node, txn_capture mode,
  74829. + txn_atom ** atom_alloc)
  74830. +{
  74831. + txn_atom *block_atom;
  74832. + txn_atom *txnh_atom;
  74833. +
  74834. + /* Should not call capture for READ_NONCOM requests, handled in reiser4_try_capture. */
  74835. + assert("jmacd-567", CAPTURE_TYPE(mode) != TXN_CAPTURE_READ_NONCOM);
  74836. +
  74837. + /* FIXME-ZAM-HANS: FIXME_LATER_JMACD Should assert that atom->tree ==
  74838. + * node->tree somewhere. */
  74839. + assert("umka-194", txnh != NULL);
  74840. + assert("umka-195", node != NULL);
  74841. +
  74842. + /* The jnode is already locked! Being called from reiser4_try_capture(). */
  74843. + assert_spin_locked(&(node->guard));
  74844. + block_atom = node->atom;
  74845. +
  74846. + /* Get txnh spinlock, this allows us to compare txn_atom pointers but it doesn't
  74847. + let us touch the atoms themselves. */
  74848. + spin_lock_txnh(txnh);
  74849. + txnh_atom = txnh->atom;
  74850. + /* Process of capturing continues into one of four branches depends on
  74851. + which atoms from (block atom (node->atom), current atom (txnh->atom))
  74852. + exist. */
  74853. + if (txnh_atom == NULL) {
  74854. + if (block_atom == NULL) {
  74855. + spin_unlock_txnh(txnh);
  74856. + spin_unlock_jnode(node);
  74857. + /* assign empty atom to the txnh and repeat */
  74858. + return atom_begin_and_assign_to_txnh(atom_alloc, txnh);
  74859. + } else {
  74860. + atomic_inc(&block_atom->refcount);
  74861. + /* node spin-lock isn't needed anymore */
  74862. + spin_unlock_jnode(node);
  74863. + if (!spin_trylock_atom(block_atom)) {
  74864. + spin_unlock_txnh(txnh);
  74865. + spin_lock_atom(block_atom);
  74866. + spin_lock_txnh(txnh);
  74867. + }
  74868. + /* re-check state after getting txnh and the node
  74869. + * atom spin-locked */
  74870. + if (node->atom != block_atom || txnh->atom != NULL) {
  74871. + spin_unlock_txnh(txnh);
  74872. + atom_dec_and_unlock(block_atom);
  74873. + return RETERR(-E_REPEAT);
  74874. + }
  74875. + atomic_dec(&block_atom->refcount);
  74876. + if (block_atom->stage > ASTAGE_CAPTURE_WAIT ||
  74877. + (block_atom->stage == ASTAGE_CAPTURE_WAIT &&
  74878. + block_atom->txnh_count != 0))
  74879. + return capture_fuse_wait(txnh, block_atom, NULL, mode);
  74880. + capture_assign_txnh_nolock(block_atom, txnh);
  74881. + spin_unlock_txnh(txnh);
  74882. + spin_unlock_atom(block_atom);
  74883. + return RETERR(-E_REPEAT);
  74884. + }
  74885. + } else {
  74886. + /* It is time to perform deadlock prevention check over the
  74887. + node we want to capture. It is possible this node was locked
  74888. + for read without capturing it. The optimization which allows
  74889. + to do it helps us in keeping atoms independent as long as
  74890. + possible but it may cause lock/fuse deadlock problems.
  74891. +
  74892. + A number of similar deadlock situations with locked but not
  74893. + captured nodes were found. In each situation there are two
  74894. + or more threads: one of them does flushing while another one
  74895. + does routine balancing or tree lookup. The flushing thread
  74896. + (F) sleeps in long term locking request for node (N), another
  74897. + thread (A) sleeps in trying to capture some node already
  74898. + belonging the atom F, F has a state which prevents
  74899. + immediately fusion .
  74900. +
  74901. + Deadlocks of this kind cannot happen if node N was properly
  74902. + captured by thread A. The F thread fuse atoms before locking
  74903. + therefore current atom of thread F and current atom of thread
  74904. + A became the same atom and thread A may proceed. This does
  74905. + not work if node N was not captured because the fusion of
  74906. + atom does not happens.
  74907. +
  74908. + The following scheme solves the deadlock: If
  74909. + longterm_lock_znode locks and does not capture a znode, that
  74910. + znode is marked as MISSED_IN_CAPTURE. A node marked this way
  74911. + is processed by the code below which restores the missed
  74912. + capture and fuses current atoms of all the node lock owners
  74913. + by calling the fuse_not_fused_lock_owners() function. */
  74914. + if (JF_ISSET(node, JNODE_MISSED_IN_CAPTURE)) {
  74915. + JF_CLR(node, JNODE_MISSED_IN_CAPTURE);
  74916. + if (jnode_is_znode(node) && znode_is_locked(JZNODE(node))) {
  74917. + spin_unlock_txnh(txnh);
  74918. + spin_unlock_jnode(node);
  74919. + fuse_not_fused_lock_owners(txnh, JZNODE(node));
  74920. + return RETERR(-E_REPEAT);
  74921. + }
  74922. + }
  74923. + if (block_atom == NULL) {
  74924. + atomic_inc(&txnh_atom->refcount);
  74925. + spin_unlock_txnh(txnh);
  74926. + if (!spin_trylock_atom(txnh_atom)) {
  74927. + spin_unlock_jnode(node);
  74928. + spin_lock_atom(txnh_atom);
  74929. + spin_lock_jnode(node);
  74930. + }
  74931. + if (txnh->atom != txnh_atom || node->atom != NULL
  74932. + || JF_ISSET(node, JNODE_IS_DYING)) {
  74933. + spin_unlock_jnode(node);
  74934. + atom_dec_and_unlock(txnh_atom);
  74935. + return RETERR(-E_REPEAT);
  74936. + }
  74937. + atomic_dec(&txnh_atom->refcount);
  74938. + capture_assign_block_nolock(txnh_atom, node);
  74939. + spin_unlock_atom(txnh_atom);
  74940. + } else {
  74941. + if (txnh_atom != block_atom) {
  74942. + if (mode & TXN_CAPTURE_DONT_FUSE) {
  74943. + spin_unlock_txnh(txnh);
  74944. + spin_unlock_jnode(node);
  74945. + /* we are in a "no-fusion" mode and @node is
  74946. + * already part of transaction. */
  74947. + return RETERR(-E_NO_NEIGHBOR);
  74948. + }
  74949. + return capture_init_fusion(node, txnh, mode);
  74950. + }
  74951. + spin_unlock_txnh(txnh);
  74952. + }
  74953. + }
  74954. + return 0;
  74955. +}
  74956. +
  74957. +static txn_capture
  74958. +build_capture_mode(jnode * node, znode_lock_mode lock_mode, txn_capture flags)
  74959. +{
  74960. + txn_capture cap_mode;
  74961. +
  74962. + assert_spin_locked(&(node->guard));
  74963. +
  74964. + /* FIXME_JMACD No way to set TXN_CAPTURE_READ_MODIFY yet. */
  74965. +
  74966. + if (lock_mode == ZNODE_WRITE_LOCK) {
  74967. + cap_mode = TXN_CAPTURE_WRITE;
  74968. + } else if (node->atom != NULL) {
  74969. + cap_mode = TXN_CAPTURE_WRITE;
  74970. + } else if (0 && /* txnh->mode == TXN_READ_FUSING && */
  74971. + jnode_get_level(node) == LEAF_LEVEL) {
  74972. + /* NOTE-NIKITA TXN_READ_FUSING is not currently used */
  74973. + /* We only need a READ_FUSING capture at the leaf level. This
  74974. + is because the internal levels of the tree (twigs included)
  74975. + are redundant from the point of the user that asked for a
  74976. + read-fusing transcrash. The user only wants to read-fuse
  74977. + atoms due to reading uncommitted data that another user has
  74978. + written. It is the file system that reads/writes the
  74979. + internal tree levels, the user only reads/writes leaves. */
  74980. + cap_mode = TXN_CAPTURE_READ_ATOMIC;
  74981. + } else {
  74982. + /* In this case (read lock at a non-leaf) there's no reason to
  74983. + * capture. */
  74984. + /* cap_mode = TXN_CAPTURE_READ_NONCOM; */
  74985. + return 0;
  74986. + }
  74987. +
  74988. + cap_mode |= (flags & (TXN_CAPTURE_NONBLOCKING | TXN_CAPTURE_DONT_FUSE));
  74989. + assert("nikita-3186", cap_mode != 0);
  74990. + return cap_mode;
  74991. +}
  74992. +
  74993. +/* This is an external interface to try_capture_block(), it calls
  74994. + try_capture_block() repeatedly as long as -E_REPEAT is returned.
  74995. +
  74996. + @node: node to capture,
  74997. + @lock_mode: read or write lock is used in capture mode calculation,
  74998. + @flags: see txn_capture flags enumeration,
  74999. + @can_coc : can copy-on-capture
  75000. +
  75001. + @return: 0 - node was successfully captured, -E_REPEAT - capture request
  75002. + cannot be processed immediately as it was requested in flags,
  75003. + < 0 - other errors.
  75004. +*/
  75005. +int reiser4_try_capture(jnode *node, znode_lock_mode lock_mode,
  75006. + txn_capture flags)
  75007. +{
  75008. + txn_atom *atom_alloc = NULL;
  75009. + txn_capture cap_mode;
  75010. + txn_handle *txnh = get_current_context()->trans;
  75011. + int ret;
  75012. +
  75013. + assert_spin_locked(&(node->guard));
  75014. +
  75015. + repeat:
  75016. + if (JF_ISSET(node, JNODE_IS_DYING))
  75017. + return RETERR(-EINVAL);
  75018. + if (node->atom != NULL && txnh->atom == node->atom)
  75019. + return 0;
  75020. + cap_mode = build_capture_mode(node, lock_mode, flags);
  75021. + if (cap_mode == 0 ||
  75022. + (!(cap_mode & TXN_CAPTURE_WTYPES) && node->atom == NULL)) {
  75023. + /* Mark this node as "MISSED". It helps in further deadlock
  75024. + * analysis */
  75025. + if (jnode_is_znode(node))
  75026. + JF_SET(node, JNODE_MISSED_IN_CAPTURE);
  75027. + return 0;
  75028. + }
  75029. + /* Repeat try_capture as long as -E_REPEAT is returned. */
  75030. + ret = try_capture_block(txnh, node, cap_mode, &atom_alloc);
  75031. + /* Regardless of non_blocking:
  75032. +
  75033. + If ret == 0 then jnode is still locked.
  75034. + If ret != 0 then jnode is unlocked.
  75035. + */
  75036. +#if REISER4_DEBUG
  75037. + if (ret == 0)
  75038. + assert_spin_locked(&(node->guard));
  75039. + else
  75040. + assert_spin_not_locked(&(node->guard));
  75041. +#endif
  75042. + assert_spin_not_locked(&(txnh->guard));
  75043. +
  75044. + if (ret == -E_REPEAT) {
  75045. + /* E_REPEAT implies all locks were released, therefore we need
  75046. + to take the jnode's lock again. */
  75047. + spin_lock_jnode(node);
  75048. +
  75049. + /* Although this may appear to be a busy loop, it is not.
  75050. + There are several conditions that cause E_REPEAT to be
  75051. + returned by the call to try_capture_block, all cases
  75052. + indicating some kind of state change that means you should
  75053. + retry the request and will get a different result. In some
  75054. + cases this could be avoided with some extra code, but
  75055. + generally it is done because the necessary locks were
  75056. + released as a result of the operation and repeating is the
  75057. + simplest thing to do (less bug potential). The cases are:
  75058. + atom fusion returns E_REPEAT after it completes (jnode and
  75059. + txnh were unlocked); race conditions in assign_block,
  75060. + assign_txnh, and init_fusion return E_REPEAT (trylock
  75061. + failure); after going to sleep in capture_fuse_wait
  75062. + (request was blocked but may now succeed). I'm not quite
  75063. + sure how capture_copy works yet, but it may also return
  75064. + E_REPEAT. When the request is legitimately blocked, the
  75065. + requestor goes to sleep in fuse_wait, so this is not a busy
  75066. + loop. */
  75067. + /* NOTE-NIKITA: still don't understand:
  75068. +
  75069. + try_capture_block->capture_assign_txnh->spin_trylock_atom->E_REPEAT
  75070. +
  75071. + looks like busy loop?
  75072. + */
  75073. + goto repeat;
  75074. + }
  75075. +
  75076. + /* free extra atom object that was possibly allocated by
  75077. + try_capture_block().
  75078. +
  75079. + Do this before acquiring jnode spin lock to
  75080. + minimize time spent under lock. --nikita */
  75081. + if (atom_alloc != NULL) {
  75082. + kmem_cache_free(_atom_slab, atom_alloc);
  75083. + }
  75084. +
  75085. + if (ret != 0) {
  75086. + if (ret == -E_BLOCK) {
  75087. + assert("nikita-3360",
  75088. + cap_mode & TXN_CAPTURE_NONBLOCKING);
  75089. + ret = -E_REPEAT;
  75090. + }
  75091. +
  75092. + /* Failure means jnode is not locked. FIXME_LATER_JMACD May
  75093. + want to fix the above code to avoid releasing the lock and
  75094. + re-acquiring it, but there are cases were failure occurs
  75095. + when the lock is not held, and those cases would need to be
  75096. + modified to re-take the lock. */
  75097. + spin_lock_jnode(node);
  75098. + }
  75099. +
  75100. + /* Jnode is still locked. */
  75101. + assert_spin_locked(&(node->guard));
  75102. + return ret;
  75103. +}
  75104. +
  75105. +static void release_two_atoms(txn_atom *one, txn_atom *two)
  75106. +{
  75107. + spin_unlock_atom(one);
  75108. + atom_dec_and_unlock(two);
  75109. + spin_lock_atom(one);
  75110. + atom_dec_and_unlock(one);
  75111. +}
  75112. +
  75113. +/* This function sets up a call to try_capture_block and repeats as long as -E_REPEAT is
  75114. + returned by that routine. The txn_capture request mode is computed here depending on
  75115. + the transaction handle's type and the lock request. This is called from the depths of
  75116. + the lock manager with the jnode lock held and it always returns with the jnode lock
  75117. + held.
  75118. +*/
  75119. +
  75120. +/* fuse all 'active' atoms of lock owners of given node. */
  75121. +static void fuse_not_fused_lock_owners(txn_handle * txnh, znode * node)
  75122. +{
  75123. + lock_handle *lh;
  75124. + int repeat;
  75125. + txn_atom *atomh, *atomf;
  75126. + reiser4_context *me = get_current_context();
  75127. + reiser4_context *ctx = NULL;
  75128. +
  75129. + assert_spin_not_locked(&(ZJNODE(node)->guard));
  75130. + assert_spin_not_locked(&(txnh->hlock));
  75131. +
  75132. + repeat:
  75133. + repeat = 0;
  75134. + atomh = txnh_get_atom(txnh);
  75135. + spin_unlock_txnh(txnh);
  75136. + assert("zam-692", atomh != NULL);
  75137. +
  75138. + spin_lock_zlock(&node->lock);
  75139. + /* inspect list of lock owners */
  75140. + list_for_each_entry(lh, &node->lock.owners, owners_link) {
  75141. + ctx = get_context_by_lock_stack(lh->owner);
  75142. + if (ctx == me)
  75143. + continue;
  75144. + /* below we use two assumptions to avoid addition spin-locks
  75145. + for checking the condition :
  75146. +
  75147. + 1) if the lock stack has lock, the transaction should be
  75148. + opened, i.e. ctx->trans != NULL;
  75149. +
  75150. + 2) reading of well-aligned ctx->trans->atom is atomic, if it
  75151. + equals to the address of spin-locked atomh, we take that
  75152. + the atoms are the same, nothing has to be captured. */
  75153. + if (atomh != ctx->trans->atom) {
  75154. + reiser4_wake_up(lh->owner);
  75155. + repeat = 1;
  75156. + break;
  75157. + }
  75158. + }
  75159. + if (repeat) {
  75160. + if (!spin_trylock_txnh(ctx->trans)) {
  75161. + spin_unlock_zlock(&node->lock);
  75162. + spin_unlock_atom(atomh);
  75163. + goto repeat;
  75164. + }
  75165. + atomf = ctx->trans->atom;
  75166. + if (atomf == NULL) {
  75167. + capture_assign_txnh_nolock(atomh, ctx->trans);
  75168. + /* release zlock lock _after_ assigning the atom to the
  75169. + * transaction handle, otherwise the lock owner thread
  75170. + * may unlock all znodes, exit kernel context and here
  75171. + * we would access an invalid transaction handle. */
  75172. + spin_unlock_zlock(&node->lock);
  75173. + spin_unlock_atom(atomh);
  75174. + spin_unlock_txnh(ctx->trans);
  75175. + goto repeat;
  75176. + }
  75177. + assert("zam-1059", atomf != atomh);
  75178. + spin_unlock_zlock(&node->lock);
  75179. + atomic_inc(&atomh->refcount);
  75180. + atomic_inc(&atomf->refcount);
  75181. + spin_unlock_txnh(ctx->trans);
  75182. + if (atomf > atomh) {
  75183. + spin_lock_atom_nested(atomf);
  75184. + } else {
  75185. + spin_unlock_atom(atomh);
  75186. + spin_lock_atom(atomf);
  75187. + spin_lock_atom_nested(atomh);
  75188. + }
  75189. + if (atomh == atomf || !atom_isopen(atomh) || !atom_isopen(atomf)) {
  75190. + release_two_atoms(atomf, atomh);
  75191. + goto repeat;
  75192. + }
  75193. + atomic_dec(&atomh->refcount);
  75194. + atomic_dec(&atomf->refcount);
  75195. + capture_fuse_into(atomf, atomh);
  75196. + goto repeat;
  75197. + }
  75198. + spin_unlock_zlock(&node->lock);
  75199. + spin_unlock_atom(atomh);
  75200. +}
  75201. +
  75202. +/* This is the interface to capture unformatted nodes via their struct page
  75203. + reference. Currently it is only used in reiser4_invalidatepage */
  75204. +int try_capture_page_to_invalidate(struct page *pg)
  75205. +{
  75206. + int ret;
  75207. + jnode *node;
  75208. +
  75209. + assert("umka-292", pg != NULL);
  75210. + assert("nikita-2597", PageLocked(pg));
  75211. +
  75212. + if (IS_ERR(node = jnode_of_page(pg))) {
  75213. + return PTR_ERR(node);
  75214. + }
  75215. +
  75216. + spin_lock_jnode(node);
  75217. + unlock_page(pg);
  75218. +
  75219. + ret = reiser4_try_capture(node, ZNODE_WRITE_LOCK, 0);
  75220. + spin_unlock_jnode(node);
  75221. + jput(node);
  75222. + lock_page(pg);
  75223. + return ret;
  75224. +}
  75225. +
  75226. +/* This informs the transaction manager when a node is deleted. Add the block to the
  75227. + atom's delete set and uncapture the block.
  75228. +
  75229. +VS-FIXME-HANS: this E_REPEAT paradigm clutters the code and creates a need for
  75230. +explanations. find all the functions that use it, and unless there is some very
  75231. +good reason to use it (I have not noticed one so far and I doubt it exists, but maybe somewhere somehow....),
  75232. +move the loop to inside the function.
  75233. +
  75234. +VS-FIXME-HANS: can this code be at all streamlined? In particular, can you lock and unlock the jnode fewer times?
  75235. + */
  75236. +void reiser4_uncapture_page(struct page *pg)
  75237. +{
  75238. + jnode *node;
  75239. + txn_atom *atom;
  75240. +
  75241. + assert("umka-199", pg != NULL);
  75242. + assert("nikita-3155", PageLocked(pg));
  75243. +
  75244. + clear_page_dirty_for_io(pg);
  75245. +
  75246. + reiser4_wait_page_writeback(pg);
  75247. +
  75248. + node = jprivate(pg);
  75249. + BUG_ON(node == NULL);
  75250. +
  75251. + spin_lock_jnode(node);
  75252. +
  75253. + atom = jnode_get_atom(node);
  75254. + if (atom == NULL) {
  75255. + assert("jmacd-7111", !JF_ISSET(node, JNODE_DIRTY));
  75256. + spin_unlock_jnode(node);
  75257. + return;
  75258. + }
  75259. +
  75260. + /* We can remove jnode from transaction even if it is on flush queue
  75261. + * prepped list, we only need to be sure that flush queue is not being
  75262. + * written by reiser4_write_fq(). reiser4_write_fq() does not use atom
  75263. + * spin lock for protection of the prepped nodes list, instead
  75264. + * write_fq() increments atom's nr_running_queues counters for the time
  75265. + * when prepped list is not protected by spin lock. Here we check this
  75266. + * counter if we want to remove jnode from flush queue and, if the
  75267. + * counter is not zero, wait all reiser4_write_fq() for this atom to
  75268. + * complete. This is not significant overhead. */
  75269. + while (JF_ISSET(node, JNODE_FLUSH_QUEUED) && atom->nr_running_queues) {
  75270. + spin_unlock_jnode(node);
  75271. + /*
  75272. + * at this moment we want to wait for "atom event", viz. wait
  75273. + * until @node can be removed from flush queue. But
  75274. + * reiser4_atom_wait_event() cannot be called with page locked,
  75275. + * because it deadlocks with jnode_extent_write(). Unlock page,
  75276. + * after making sure (through get_page()) that it cannot
  75277. + * be released from memory.
  75278. + */
  75279. + get_page(pg);
  75280. + unlock_page(pg);
  75281. + reiser4_atom_wait_event(atom);
  75282. + lock_page(pg);
  75283. + /*
  75284. + * page may has been detached by ->writepage()->releasepage().
  75285. + */
  75286. + reiser4_wait_page_writeback(pg);
  75287. + spin_lock_jnode(node);
  75288. + put_page(pg);
  75289. + atom = jnode_get_atom(node);
  75290. +/* VS-FIXME-HANS: improve the commenting in this function */
  75291. + if (atom == NULL) {
  75292. + spin_unlock_jnode(node);
  75293. + return;
  75294. + }
  75295. + }
  75296. + reiser4_uncapture_block(node);
  75297. + spin_unlock_atom(atom);
  75298. + jput(node);
  75299. +}
  75300. +
  75301. +/* this is used in extent's kill hook to uncapture and unhash jnodes attached to
  75302. + * inode's tree of jnodes */
  75303. +void reiser4_uncapture_jnode(jnode * node)
  75304. +{
  75305. + txn_atom *atom;
  75306. +
  75307. + assert_spin_locked(&(node->guard));
  75308. + assert("", node->pg == 0);
  75309. +
  75310. + atom = jnode_get_atom(node);
  75311. + if (atom == NULL) {
  75312. + assert("jmacd-7111", !JF_ISSET(node, JNODE_DIRTY));
  75313. + spin_unlock_jnode(node);
  75314. + return;
  75315. + }
  75316. +
  75317. + reiser4_uncapture_block(node);
  75318. + spin_unlock_atom(atom);
  75319. + jput(node);
  75320. +}
  75321. +
  75322. +/* No-locking version of assign_txnh. Sets the transaction handle's atom pointer,
  75323. + increases atom refcount and txnh_count, adds to txnh_list. */
  75324. +static void capture_assign_txnh_nolock(txn_atom *atom, txn_handle *txnh)
  75325. +{
  75326. + assert("umka-200", atom != NULL);
  75327. + assert("umka-201", txnh != NULL);
  75328. +
  75329. + assert_spin_locked(&(txnh->hlock));
  75330. + assert_spin_locked(&(atom->alock));
  75331. + assert("jmacd-824", txnh->atom == NULL);
  75332. + assert("nikita-3540", atom_isopen(atom));
  75333. + BUG_ON(txnh->atom != NULL);
  75334. +
  75335. + atomic_inc(&atom->refcount);
  75336. + txnh->atom = atom;
  75337. + reiser4_ctx_gfp_mask_set();
  75338. + list_add_tail(&txnh->txnh_link, &atom->txnh_list);
  75339. + atom->txnh_count += 1;
  75340. +}
  75341. +
  75342. +/* No-locking version of assign_block. Sets the block's atom pointer, references the
  75343. + block, adds it to the clean or dirty capture_jnode list, increments capture_count. */
  75344. +static void capture_assign_block_nolock(txn_atom *atom, jnode *node)
  75345. +{
  75346. + assert("umka-202", atom != NULL);
  75347. + assert("umka-203", node != NULL);
  75348. + assert_spin_locked(&(node->guard));
  75349. + assert_spin_locked(&(atom->alock));
  75350. + assert("jmacd-323", node->atom == NULL);
  75351. + BUG_ON(!list_empty_careful(&node->capture_link));
  75352. + assert("nikita-3470", !JF_ISSET(node, JNODE_DIRTY));
  75353. +
  75354. + /* Pointer from jnode to atom is not counted in atom->refcount. */
  75355. + node->atom = atom;
  75356. +
  75357. + list_add_tail(&node->capture_link, ATOM_CLEAN_LIST(atom));
  75358. + atom->capture_count += 1;
  75359. + /* reference to jnode is acquired by atom. */
  75360. + jref(node);
  75361. +
  75362. + ON_DEBUG(count_jnode(atom, node, NOT_CAPTURED, CLEAN_LIST, 1));
  75363. +
  75364. + LOCK_CNT_INC(t_refs);
  75365. +}
  75366. +
  75367. +/* common code for dirtying both unformatted jnodes and formatted znodes. */
  75368. +static void do_jnode_make_dirty(jnode * node, txn_atom * atom)
  75369. +{
  75370. + assert_spin_locked(&(node->guard));
  75371. + assert_spin_locked(&(atom->alock));
  75372. + assert("jmacd-3981", !JF_ISSET(node, JNODE_DIRTY));
  75373. +
  75374. + JF_SET(node, JNODE_DIRTY);
  75375. +
  75376. + if (!JF_ISSET(node, JNODE_CLUSTER_PAGE))
  75377. + get_current_context()->nr_marked_dirty++;
  75378. +
  75379. + /* We grab2flush_reserve one additional block only if node was
  75380. + not CREATED and jnode_flush did not sort it into neither
  75381. + relocate set nor overwrite one. If node is in overwrite or
  75382. + relocate set we assume that atom's flush reserved counter was
  75383. + already adjusted. */
  75384. + if (!JF_ISSET(node, JNODE_CREATED) && !JF_ISSET(node, JNODE_RELOC)
  75385. + && !JF_ISSET(node, JNODE_OVRWR) && jnode_is_leaf(node)
  75386. + && !jnode_is_cluster_page(node)) {
  75387. + assert("vs-1093", !reiser4_blocknr_is_fake(&node->blocknr));
  75388. + assert("vs-1506", *jnode_get_block(node) != 0);
  75389. + grabbed2flush_reserved_nolock(atom, (__u64) 1);
  75390. + JF_SET(node, JNODE_FLUSH_RESERVED);
  75391. + }
  75392. +
  75393. + if (!JF_ISSET(node, JNODE_FLUSH_QUEUED)) {
  75394. + /* If the atom is not set yet, it will be added to the appropriate list in
  75395. + capture_assign_block_nolock. */
  75396. + /* Sometimes a node is set dirty before being captured -- the case for new
  75397. + jnodes. In that case the jnode will be added to the appropriate list
  75398. + in capture_assign_block_nolock. Another reason not to re-link jnode is
  75399. + that jnode is on a flush queue (see flush.c for details) */
  75400. +
  75401. + int level = jnode_get_level(node);
  75402. +
  75403. + assert("nikita-3152", !JF_ISSET(node, JNODE_OVRWR));
  75404. + assert("zam-654", atom->stage < ASTAGE_PRE_COMMIT);
  75405. + assert("nikita-2607", 0 <= level);
  75406. + assert("nikita-2606", level <= REAL_MAX_ZTREE_HEIGHT);
  75407. +
  75408. + /* move node to atom's dirty list */
  75409. + list_move_tail(&node->capture_link, ATOM_DIRTY_LIST(atom, level));
  75410. + ON_DEBUG(count_jnode
  75411. + (atom, node, NODE_LIST(node), DIRTY_LIST, 1));
  75412. + }
  75413. +}
  75414. +
  75415. +/* Set the dirty status for this (spin locked) jnode. */
  75416. +void jnode_make_dirty_locked(jnode * node)
  75417. +{
  75418. + assert("umka-204", node != NULL);
  75419. + assert_spin_locked(&(node->guard));
  75420. +
  75421. + if (REISER4_DEBUG && rofs_jnode(node)) {
  75422. + warning("nikita-3365", "Dirtying jnode on rofs");
  75423. + dump_stack();
  75424. + }
  75425. +
  75426. + /* Fast check for already dirty node */
  75427. + if (!JF_ISSET(node, JNODE_DIRTY)) {
  75428. + txn_atom *atom;
  75429. +
  75430. + atom = jnode_get_atom(node);
  75431. + assert("vs-1094", atom);
  75432. + /* Check jnode dirty status again because node spin lock might
  75433. + * be released inside jnode_get_atom(). */
  75434. + if (likely(!JF_ISSET(node, JNODE_DIRTY)))
  75435. + do_jnode_make_dirty(node, atom);
  75436. + spin_unlock_atom(atom);
  75437. + }
  75438. +}
  75439. +
  75440. +/* Set the dirty status for this znode. */
  75441. +void znode_make_dirty(znode * z)
  75442. +{
  75443. + jnode *node;
  75444. + struct page *page;
  75445. +
  75446. + assert("umka-204", z != NULL);
  75447. + assert("nikita-3290", znode_above_root(z) || znode_is_loaded(z));
  75448. + assert("nikita-3560", znode_is_write_locked(z));
  75449. +
  75450. + node = ZJNODE(z);
  75451. + /* znode is longterm locked, we can check dirty bit without spinlock */
  75452. + if (JF_ISSET(node, JNODE_DIRTY)) {
  75453. + /* znode is dirty already. All we have to do is to change znode version */
  75454. + z->version = znode_build_version(jnode_get_tree(node));
  75455. + return;
  75456. + }
  75457. +
  75458. + spin_lock_jnode(node);
  75459. + jnode_make_dirty_locked(node);
  75460. + page = jnode_page(node);
  75461. + if (page != NULL) {
  75462. + /* this is useful assertion (allows one to check that no
  75463. + * modifications are lost due to update of in-flight page),
  75464. + * but it requires locking on page to check PG_writeback
  75465. + * bit. */
  75466. + /* assert("nikita-3292",
  75467. + !PageWriteback(page) || ZF_ISSET(z, JNODE_WRITEBACK)); */
  75468. + get_page(page);
  75469. +
  75470. + /* jnode lock is not needed for the rest of
  75471. + * znode_set_dirty(). */
  75472. + spin_unlock_jnode(node);
  75473. + /* reiser4 file write code calls set_page_dirty for
  75474. + * unformatted nodes, for formatted nodes we do it here. */
  75475. + set_page_dirty_notag(page);
  75476. + put_page(page);
  75477. + /* bump version counter in znode */
  75478. + z->version = znode_build_version(jnode_get_tree(node));
  75479. + } else {
  75480. + assert("zam-596", znode_above_root(JZNODE(node)));
  75481. + spin_unlock_jnode(node);
  75482. + }
  75483. +
  75484. + assert("nikita-1900", znode_is_write_locked(z));
  75485. + assert("jmacd-9777", node->atom != NULL);
  75486. +}
  75487. +
  75488. +int reiser4_sync_atom(txn_atom * atom)
  75489. +{
  75490. + int result;
  75491. + txn_handle *txnh;
  75492. +
  75493. + txnh = get_current_context()->trans;
  75494. +
  75495. + result = 0;
  75496. + if (atom != NULL) {
  75497. + if (atom->stage < ASTAGE_PRE_COMMIT) {
  75498. + spin_lock_txnh(txnh);
  75499. + capture_assign_txnh_nolock(atom, txnh);
  75500. + result = force_commit_atom(txnh);
  75501. + } else if (atom->stage < ASTAGE_POST_COMMIT) {
  75502. + /* wait atom commit */
  75503. + reiser4_atom_wait_event(atom);
  75504. + /* try once more */
  75505. + result = RETERR(-E_REPEAT);
  75506. + } else
  75507. + spin_unlock_atom(atom);
  75508. + }
  75509. + return result;
  75510. +}
  75511. +
  75512. +#if REISER4_DEBUG
  75513. +
  75514. +/* move jnode form one list to another
  75515. + call this after atom->capture_count is updated */
  75516. +void
  75517. +count_jnode(txn_atom * atom, jnode * node, atom_list old_list,
  75518. + atom_list new_list, int check_lists)
  75519. +{
  75520. + struct list_head *pos;
  75521. +
  75522. + assert("zam-1018", atom_is_protected(atom));
  75523. + assert_spin_locked(&(node->guard));
  75524. + assert("", NODE_LIST(node) == old_list);
  75525. +
  75526. + switch (NODE_LIST(node)) {
  75527. + case NOT_CAPTURED:
  75528. + break;
  75529. + case DIRTY_LIST:
  75530. + assert("", atom->dirty > 0);
  75531. + atom->dirty--;
  75532. + break;
  75533. + case CLEAN_LIST:
  75534. + assert("", atom->clean > 0);
  75535. + atom->clean--;
  75536. + break;
  75537. + case FQ_LIST:
  75538. + assert("", atom->fq > 0);
  75539. + atom->fq--;
  75540. + break;
  75541. + case WB_LIST:
  75542. + assert("", atom->wb > 0);
  75543. + atom->wb--;
  75544. + break;
  75545. + case OVRWR_LIST:
  75546. + assert("", atom->ovrwr > 0);
  75547. + atom->ovrwr--;
  75548. + break;
  75549. + default:
  75550. + impossible("", "");
  75551. + }
  75552. +
  75553. + switch (new_list) {
  75554. + case NOT_CAPTURED:
  75555. + break;
  75556. + case DIRTY_LIST:
  75557. + atom->dirty++;
  75558. + break;
  75559. + case CLEAN_LIST:
  75560. + atom->clean++;
  75561. + break;
  75562. + case FQ_LIST:
  75563. + atom->fq++;
  75564. + break;
  75565. + case WB_LIST:
  75566. + atom->wb++;
  75567. + break;
  75568. + case OVRWR_LIST:
  75569. + atom->ovrwr++;
  75570. + break;
  75571. + default:
  75572. + impossible("", "");
  75573. + }
  75574. + ASSIGN_NODE_LIST(node, new_list);
  75575. + if (0 && check_lists) {
  75576. + int count;
  75577. + tree_level level;
  75578. +
  75579. + count = 0;
  75580. +
  75581. + /* flush queue list */
  75582. + /* reiser4_check_fq(atom); */
  75583. +
  75584. + /* dirty list */
  75585. + count = 0;
  75586. + for (level = 0; level < REAL_MAX_ZTREE_HEIGHT + 1; level += 1) {
  75587. + list_for_each(pos, ATOM_DIRTY_LIST(atom, level))
  75588. + count++;
  75589. + }
  75590. + if (count != atom->dirty)
  75591. + warning("", "dirty counter %d, real %d\n", atom->dirty,
  75592. + count);
  75593. +
  75594. + /* clean list */
  75595. + count = 0;
  75596. + list_for_each(pos, ATOM_CLEAN_LIST(atom))
  75597. + count++;
  75598. + if (count != atom->clean)
  75599. + warning("", "clean counter %d, real %d\n", atom->clean,
  75600. + count);
  75601. +
  75602. + /* wb list */
  75603. + count = 0;
  75604. + list_for_each(pos, ATOM_WB_LIST(atom))
  75605. + count++;
  75606. + if (count != atom->wb)
  75607. + warning("", "wb counter %d, real %d\n", atom->wb,
  75608. + count);
  75609. +
  75610. + /* overwrite list */
  75611. + count = 0;
  75612. + list_for_each(pos, ATOM_OVRWR_LIST(atom))
  75613. + count++;
  75614. +
  75615. + if (count != atom->ovrwr)
  75616. + warning("", "ovrwr counter %d, real %d\n", atom->ovrwr,
  75617. + count);
  75618. + }
  75619. + assert("vs-1624", atom->num_queued == atom->fq);
  75620. + if (atom->capture_count !=
  75621. + atom->dirty + atom->clean + atom->ovrwr + atom->wb + atom->fq) {
  75622. + printk
  75623. + ("count %d, dirty %d clean %d ovrwr %d wb %d fq %d\n",
  75624. + atom->capture_count, atom->dirty, atom->clean, atom->ovrwr,
  75625. + atom->wb, atom->fq);
  75626. + assert("vs-1622",
  75627. + atom->capture_count ==
  75628. + atom->dirty + atom->clean + atom->ovrwr + atom->wb +
  75629. + atom->fq);
  75630. + }
  75631. +}
  75632. +
  75633. +#endif
  75634. +
  75635. +int reiser4_capture_super_block(struct super_block *s)
  75636. +{
  75637. + int result;
  75638. + znode *uber;
  75639. + lock_handle lh;
  75640. +
  75641. + init_lh(&lh);
  75642. + result = get_uber_znode(reiser4_get_tree(s),
  75643. + ZNODE_WRITE_LOCK, ZNODE_LOCK_LOPRI, &lh);
  75644. + if (result)
  75645. + return result;
  75646. +
  75647. + uber = lh.node;
  75648. + /* Grabbing one block for superblock */
  75649. + result = reiser4_grab_space_force((__u64) 1, BA_RESERVED);
  75650. + if (result != 0)
  75651. + return result;
  75652. +
  75653. + znode_make_dirty(uber);
  75654. +
  75655. + done_lh(&lh);
  75656. + return 0;
  75657. +}
  75658. +
  75659. +/* Wakeup every handle on the atom's WAITFOR list */
  75660. +static void wakeup_atom_waitfor_list(txn_atom * atom)
  75661. +{
  75662. + txn_wait_links *wlinks;
  75663. +
  75664. + assert("umka-210", atom != NULL);
  75665. +
  75666. + /* atom is locked */
  75667. + list_for_each_entry(wlinks, &atom->fwaitfor_list, _fwaitfor_link) {
  75668. + if (wlinks->waitfor_cb == NULL ||
  75669. + wlinks->waitfor_cb(atom, wlinks))
  75670. + /* Wake up. */
  75671. + reiser4_wake_up(wlinks->_lock_stack);
  75672. + }
  75673. +}
  75674. +
  75675. +/* Wakeup every handle on the atom's WAITING list */
  75676. +static void wakeup_atom_waiting_list(txn_atom * atom)
  75677. +{
  75678. + txn_wait_links *wlinks;
  75679. +
  75680. + assert("umka-211", atom != NULL);
  75681. +
  75682. + /* atom is locked */
  75683. + list_for_each_entry(wlinks, &atom->fwaiting_list, _fwaiting_link) {
  75684. + if (wlinks->waiting_cb == NULL ||
  75685. + wlinks->waiting_cb(atom, wlinks))
  75686. + /* Wake up. */
  75687. + reiser4_wake_up(wlinks->_lock_stack);
  75688. + }
  75689. +}
  75690. +
  75691. +/* helper function used by capture_fuse_wait() to avoid "spurious wake-ups" */
  75692. +static int wait_for_fusion(txn_atom * atom, txn_wait_links * wlinks)
  75693. +{
  75694. + assert("nikita-3330", atom != NULL);
  75695. + assert_spin_locked(&(atom->alock));
  75696. +
  75697. + /* atom->txnh_count == 1 is for waking waiters up if we are releasing
  75698. + * last transaction handle. */
  75699. + return atom->stage != ASTAGE_CAPTURE_WAIT || atom->txnh_count == 1;
  75700. +}
  75701. +
  75702. +/* The general purpose of this function is to wait on the first of two possible events.
  75703. + The situation is that a handle (and its atom atomh) is blocked trying to capture a
  75704. + block (i.e., node) but the node's atom (atomf) is in the CAPTURE_WAIT state. The
  75705. + handle's atom (atomh) is not in the CAPTURE_WAIT state. However, atomh could fuse with
  75706. + another atom or, due to age, enter the CAPTURE_WAIT state itself, at which point it
  75707. + needs to unblock the handle to avoid deadlock. When the txnh is unblocked it will
  75708. + proceed and fuse the two atoms in the CAPTURE_WAIT state.
  75709. +
  75710. + In other words, if either atomh or atomf change state, the handle will be awakened,
  75711. + thus there are two lists per atom: WAITING and WAITFOR.
  75712. +
  75713. + This is also called by capture_assign_txnh with (atomh == NULL) to wait for atomf to
  75714. + close but it is not assigned to an atom of its own.
  75715. +
  75716. + Lock ordering in this method: all four locks are held: JNODE_LOCK, TXNH_LOCK,
  75717. + BOTH_ATOM_LOCKS. Result: all four locks are released.
  75718. +*/
  75719. +static int capture_fuse_wait(txn_handle * txnh, txn_atom * atomf,
  75720. + txn_atom * atomh, txn_capture mode)
  75721. +{
  75722. + int ret;
  75723. + txn_wait_links wlinks;
  75724. +
  75725. + assert("umka-213", txnh != NULL);
  75726. + assert("umka-214", atomf != NULL);
  75727. +
  75728. + if ((mode & TXN_CAPTURE_NONBLOCKING) != 0) {
  75729. + spin_unlock_txnh(txnh);
  75730. + spin_unlock_atom(atomf);
  75731. +
  75732. + if (atomh) {
  75733. + spin_unlock_atom(atomh);
  75734. + }
  75735. +
  75736. + return RETERR(-E_BLOCK);
  75737. + }
  75738. +
  75739. + /* Initialize the waiting list links. */
  75740. + init_wlinks(&wlinks);
  75741. +
  75742. + /* Add txnh to atomf's waitfor list, unlock atomf. */
  75743. + list_add_tail(&wlinks._fwaitfor_link, &atomf->fwaitfor_list);
  75744. + wlinks.waitfor_cb = wait_for_fusion;
  75745. + atomic_inc(&atomf->refcount);
  75746. + spin_unlock_atom(atomf);
  75747. +
  75748. + if (atomh) {
  75749. + /* Add txnh to atomh's waiting list, unlock atomh. */
  75750. + list_add_tail(&wlinks._fwaiting_link, &atomh->fwaiting_list);
  75751. + atomic_inc(&atomh->refcount);
  75752. + spin_unlock_atom(atomh);
  75753. + }
  75754. +
  75755. + /* Go to sleep. */
  75756. + spin_unlock_txnh(txnh);
  75757. +
  75758. + ret = reiser4_prepare_to_sleep(wlinks._lock_stack);
  75759. + if (ret == 0) {
  75760. + reiser4_go_to_sleep(wlinks._lock_stack);
  75761. + ret = RETERR(-E_REPEAT);
  75762. + }
  75763. +
  75764. + /* Remove from the waitfor list. */
  75765. + spin_lock_atom(atomf);
  75766. +
  75767. + list_del(&wlinks._fwaitfor_link);
  75768. + atom_dec_and_unlock(atomf);
  75769. +
  75770. + if (atomh) {
  75771. + /* Remove from the waiting list. */
  75772. + spin_lock_atom(atomh);
  75773. + list_del(&wlinks._fwaiting_link);
  75774. + atom_dec_and_unlock(atomh);
  75775. + }
  75776. + return ret;
  75777. +}
  75778. +
  75779. +static void lock_two_atoms(txn_atom * one, txn_atom * two)
  75780. +{
  75781. + assert("zam-1067", one != two);
  75782. +
  75783. + /* lock the atom with lesser address first */
  75784. + if (one < two) {
  75785. + spin_lock_atom(one);
  75786. + spin_lock_atom_nested(two);
  75787. + } else {
  75788. + spin_lock_atom(two);
  75789. + spin_lock_atom_nested(one);
  75790. + }
  75791. +}
  75792. +
  75793. +/* Perform the necessary work to prepare for fusing two atoms, which involves
  75794. + * acquiring two atom locks in the proper order. If one of the node's atom is
  75795. + * blocking fusion (i.e., it is in the CAPTURE_WAIT stage) and the handle's
  75796. + * atom is not then the handle's request is put to sleep. If the node's atom
  75797. + * is committing, then the node can be copy-on-captured. Otherwise, pick the
  75798. + * atom with fewer pointers to be fused into the atom with more pointer and
  75799. + * call capture_fuse_into.
  75800. + */
  75801. +static int capture_init_fusion(jnode *node, txn_handle *txnh, txn_capture mode)
  75802. +{
  75803. + txn_atom * txnh_atom = txnh->atom;
  75804. + txn_atom * block_atom = node->atom;
  75805. +
  75806. + atomic_inc(&txnh_atom->refcount);
  75807. + atomic_inc(&block_atom->refcount);
  75808. +
  75809. + spin_unlock_txnh(txnh);
  75810. + spin_unlock_jnode(node);
  75811. +
  75812. + lock_two_atoms(txnh_atom, block_atom);
  75813. +
  75814. + if (txnh->atom != txnh_atom || node->atom != block_atom ) {
  75815. + release_two_atoms(txnh_atom, block_atom);
  75816. + return RETERR(-E_REPEAT);
  75817. + }
  75818. +
  75819. + atomic_dec(&txnh_atom->refcount);
  75820. + atomic_dec(&block_atom->refcount);
  75821. +
  75822. + assert ("zam-1066", atom_isopen(txnh_atom));
  75823. +
  75824. + if (txnh_atom->stage >= block_atom->stage ||
  75825. + (block_atom->stage == ASTAGE_CAPTURE_WAIT && block_atom->txnh_count == 0)) {
  75826. + capture_fuse_into(txnh_atom, block_atom);
  75827. + return RETERR(-E_REPEAT);
  75828. + }
  75829. + spin_lock_txnh(txnh);
  75830. + return capture_fuse_wait(txnh, block_atom, txnh_atom, mode);
  75831. +}
  75832. +
  75833. +/* This function splices together two jnode lists (small and large) and sets all jnodes in
  75834. + the small list to point to the large atom. Returns the length of the list. */
  75835. +static int
  75836. +capture_fuse_jnode_lists(txn_atom *large, struct list_head *large_head,
  75837. + struct list_head *small_head)
  75838. +{
  75839. + int count = 0;
  75840. + jnode *node;
  75841. +
  75842. + assert("umka-218", large != NULL);
  75843. + assert("umka-219", large_head != NULL);
  75844. + assert("umka-220", small_head != NULL);
  75845. + /* small atom should be locked also. */
  75846. + assert_spin_locked(&(large->alock));
  75847. +
  75848. + /* For every jnode on small's capture list... */
  75849. + list_for_each_entry(node, small_head, capture_link) {
  75850. + count += 1;
  75851. +
  75852. + /* With the jnode lock held, update atom pointer. */
  75853. + spin_lock_jnode(node);
  75854. + node->atom = large;
  75855. + spin_unlock_jnode(node);
  75856. + }
  75857. +
  75858. + /* Splice the lists. */
  75859. + list_splice_init(small_head, large_head->prev);
  75860. +
  75861. + return count;
  75862. +}
  75863. +
  75864. +/* This function splices together two txnh lists (small and large) and sets all txn handles in
  75865. + the small list to point to the large atom. Returns the length of the list. */
  75866. +static int
  75867. +capture_fuse_txnh_lists(txn_atom *large, struct list_head *large_head,
  75868. + struct list_head *small_head)
  75869. +{
  75870. + int count = 0;
  75871. + txn_handle *txnh;
  75872. +
  75873. + assert("umka-221", large != NULL);
  75874. + assert("umka-222", large_head != NULL);
  75875. + assert("umka-223", small_head != NULL);
  75876. +
  75877. + /* Adjust every txnh to the new atom. */
  75878. + list_for_each_entry(txnh, small_head, txnh_link) {
  75879. + count += 1;
  75880. +
  75881. + /* With the txnh lock held, update atom pointer. */
  75882. + spin_lock_txnh(txnh);
  75883. + txnh->atom = large;
  75884. + spin_unlock_txnh(txnh);
  75885. + }
  75886. +
  75887. + /* Splice the txn_handle list. */
  75888. + list_splice_init(small_head, large_head->prev);
  75889. +
  75890. + return count;
  75891. +}
  75892. +
  75893. +/* This function fuses two atoms. The captured nodes and handles belonging to SMALL are
  75894. + added to LARGE and their ->atom pointers are all updated. The associated counts are
  75895. + updated as well, and any waiting handles belonging to either are awakened. Finally the
  75896. + smaller atom's refcount is decremented.
  75897. +*/
  75898. +static void capture_fuse_into(txn_atom * small, txn_atom * large)
  75899. +{
  75900. + int level;
  75901. + unsigned zcount = 0;
  75902. + unsigned tcount = 0;
  75903. +
  75904. + assert("umka-224", small != NULL);
  75905. + assert("umka-225", small != NULL);
  75906. +
  75907. + assert_spin_locked(&(large->alock));
  75908. + assert_spin_locked(&(small->alock));
  75909. +
  75910. + assert("jmacd-201", atom_isopen(small));
  75911. + assert("jmacd-202", atom_isopen(large));
  75912. +
  75913. + /* Splice and update the per-level dirty jnode lists */
  75914. + for (level = 0; level < REAL_MAX_ZTREE_HEIGHT + 1; level += 1) {
  75915. + zcount +=
  75916. + capture_fuse_jnode_lists(large,
  75917. + ATOM_DIRTY_LIST(large, level),
  75918. + ATOM_DIRTY_LIST(small, level));
  75919. + }
  75920. +
  75921. + /* Splice and update the [clean,dirty] jnode and txnh lists */
  75922. + zcount +=
  75923. + capture_fuse_jnode_lists(large, ATOM_CLEAN_LIST(large),
  75924. + ATOM_CLEAN_LIST(small));
  75925. + zcount +=
  75926. + capture_fuse_jnode_lists(large, ATOM_OVRWR_LIST(large),
  75927. + ATOM_OVRWR_LIST(small));
  75928. + zcount +=
  75929. + capture_fuse_jnode_lists(large, ATOM_WB_LIST(large),
  75930. + ATOM_WB_LIST(small));
  75931. + zcount +=
  75932. + capture_fuse_jnode_lists(large, &large->inodes, &small->inodes);
  75933. + tcount +=
  75934. + capture_fuse_txnh_lists(large, &large->txnh_list,
  75935. + &small->txnh_list);
  75936. +
  75937. + /* Check our accounting. */
  75938. + assert("jmacd-1063",
  75939. + zcount + small->num_queued == small->capture_count);
  75940. + assert("jmacd-1065", tcount == small->txnh_count);
  75941. +
  75942. + /* sum numbers of waiters threads */
  75943. + large->nr_waiters += small->nr_waiters;
  75944. + small->nr_waiters = 0;
  75945. +
  75946. + /* splice flush queues */
  75947. + reiser4_fuse_fq(large, small);
  75948. +
  75949. + /* update counter of jnode on every atom' list */
  75950. + ON_DEBUG(large->dirty += small->dirty;
  75951. + small->dirty = 0;
  75952. + large->clean += small->clean;
  75953. + small->clean = 0;
  75954. + large->ovrwr += small->ovrwr;
  75955. + small->ovrwr = 0;
  75956. + large->wb += small->wb;
  75957. + small->wb = 0;
  75958. + large->fq += small->fq;
  75959. + small->fq = 0;);
  75960. +
  75961. + /* count flushers in result atom */
  75962. + large->nr_flushers += small->nr_flushers;
  75963. + small->nr_flushers = 0;
  75964. +
  75965. + /* update counts of flushed nodes */
  75966. + large->flushed += small->flushed;
  75967. + small->flushed = 0;
  75968. +
  75969. + /* Transfer list counts to large. */
  75970. + large->txnh_count += small->txnh_count;
  75971. + large->capture_count += small->capture_count;
  75972. +
  75973. + /* Add all txnh references to large. */
  75974. + atomic_add(small->txnh_count, &large->refcount);
  75975. + atomic_sub(small->txnh_count, &small->refcount);
  75976. +
  75977. + /* Reset small counts */
  75978. + small->txnh_count = 0;
  75979. + small->capture_count = 0;
  75980. +
  75981. + /* Assign the oldest start_time, merge flags. */
  75982. + large->start_time = min(large->start_time, small->start_time);
  75983. + large->flags |= small->flags;
  75984. +
  75985. + /* Merge blocknr sets. */
  75986. + blocknr_set_merge(&small->wandered_map, &large->wandered_map);
  75987. +
  75988. + /* Merge delete sets. */
  75989. + atom_dset_merge(small, large);
  75990. +
  75991. + /* Merge allocated/deleted file counts */
  75992. + large->nr_objects_deleted += small->nr_objects_deleted;
  75993. + large->nr_objects_created += small->nr_objects_created;
  75994. +
  75995. + small->nr_objects_deleted = 0;
  75996. + small->nr_objects_created = 0;
  75997. +
  75998. + /* Merge allocated blocks counts */
  75999. + large->nr_blocks_allocated += small->nr_blocks_allocated;
  76000. +
  76001. + large->nr_running_queues += small->nr_running_queues;
  76002. + small->nr_running_queues = 0;
  76003. +
  76004. + /* Merge blocks reserved for overwrite set. */
  76005. + large->flush_reserved += small->flush_reserved;
  76006. + small->flush_reserved = 0;
  76007. +
  76008. + if (large->stage < small->stage) {
  76009. + /* Large only needs to notify if it has changed state. */
  76010. + reiser4_atom_set_stage(large, small->stage);
  76011. + wakeup_atom_waiting_list(large);
  76012. + }
  76013. +
  76014. + reiser4_atom_set_stage(small, ASTAGE_INVALID);
  76015. +
  76016. + /* Notify any waiters--small needs to unload its wait lists. Waiters
  76017. + actually remove themselves from the list before returning from the
  76018. + fuse_wait function. */
  76019. + wakeup_atom_waiting_list(small);
  76020. +
  76021. + /* Unlock atoms */
  76022. + spin_unlock_atom(large);
  76023. + atom_dec_and_unlock(small);
  76024. +}
  76025. +
  76026. +/* TXNMGR STUFF */
  76027. +
  76028. +/* Release a block from the atom, reversing the effects of being captured,
  76029. + do not release atom's reference to jnode due to holding spin-locks.
  76030. + Currently this is only called when the atom commits.
  76031. +
  76032. + NOTE: this function does not release a (journal) reference to jnode
  76033. + due to locking optimizations, you should call jput() somewhere after
  76034. + calling reiser4_uncapture_block(). */
  76035. +void reiser4_uncapture_block(jnode * node)
  76036. +{
  76037. + txn_atom *atom;
  76038. +
  76039. + assert("umka-226", node != NULL);
  76040. + atom = node->atom;
  76041. + assert("umka-228", atom != NULL);
  76042. +
  76043. + assert("jmacd-1021", node->atom == atom);
  76044. + assert_spin_locked(&(node->guard));
  76045. + assert("jmacd-1023", atom_is_protected(atom));
  76046. +
  76047. + JF_CLR(node, JNODE_DIRTY);
  76048. + JF_CLR(node, JNODE_RELOC);
  76049. + JF_CLR(node, JNODE_OVRWR);
  76050. + JF_CLR(node, JNODE_CREATED);
  76051. + JF_CLR(node, JNODE_WRITEBACK);
  76052. + JF_CLR(node, JNODE_REPACK);
  76053. +
  76054. + list_del_init(&node->capture_link);
  76055. + if (JF_ISSET(node, JNODE_FLUSH_QUEUED)) {
  76056. + assert("zam-925", atom_isopen(atom));
  76057. + assert("vs-1623", NODE_LIST(node) == FQ_LIST);
  76058. + ON_DEBUG(atom->num_queued--);
  76059. + JF_CLR(node, JNODE_FLUSH_QUEUED);
  76060. + }
  76061. + atom->capture_count -= 1;
  76062. + ON_DEBUG(count_jnode(atom, node, NODE_LIST(node), NOT_CAPTURED, 1));
  76063. + node->atom = NULL;
  76064. +
  76065. + spin_unlock_jnode(node);
  76066. + LOCK_CNT_DEC(t_refs);
  76067. +}
  76068. +
  76069. +/* Unconditional insert of jnode into atom's overwrite list. Currently used in
  76070. + bitmap-based allocator code for adding modified bitmap blocks the
  76071. + transaction. @atom and @node are spin locked */
  76072. +void insert_into_atom_ovrwr_list(txn_atom * atom, jnode * node)
  76073. +{
  76074. + assert("zam-538", atom_is_protected(atom));
  76075. + assert_spin_locked(&(node->guard));
  76076. + assert("zam-899", JF_ISSET(node, JNODE_OVRWR));
  76077. + assert("zam-543", node->atom == NULL);
  76078. + assert("vs-1433", !jnode_is_unformatted(node) && !jnode_is_znode(node));
  76079. +
  76080. + list_add(&node->capture_link, ATOM_OVRWR_LIST(atom));
  76081. + jref(node);
  76082. + node->atom = atom;
  76083. + atom->capture_count++;
  76084. + ON_DEBUG(count_jnode(atom, node, NODE_LIST(node), OVRWR_LIST, 1));
  76085. +}
  76086. +
  76087. +static int count_deleted_blocks_actor(txn_atom * atom,
  76088. + const reiser4_block_nr * a,
  76089. + const reiser4_block_nr * b, void *data)
  76090. +{
  76091. + reiser4_block_nr *counter = data;
  76092. +
  76093. + assert("zam-995", data != NULL);
  76094. + assert("zam-996", a != NULL);
  76095. + if (b == NULL)
  76096. + *counter += 1;
  76097. + else
  76098. + *counter += *b;
  76099. + return 0;
  76100. +}
  76101. +
  76102. +reiser4_block_nr txnmgr_count_deleted_blocks(void)
  76103. +{
  76104. + reiser4_block_nr result;
  76105. + txn_mgr *tmgr = &get_super_private(reiser4_get_current_sb())->tmgr;
  76106. + txn_atom *atom;
  76107. +
  76108. + result = 0;
  76109. +
  76110. + spin_lock_txnmgr(tmgr);
  76111. + list_for_each_entry(atom, &tmgr->atoms_list, atom_link) {
  76112. + spin_lock_atom(atom);
  76113. + if (atom_isopen(atom))
  76114. + atom_dset_deferred_apply(atom, count_deleted_blocks_actor, &result, 0);
  76115. + spin_unlock_atom(atom);
  76116. + }
  76117. + spin_unlock_txnmgr(tmgr);
  76118. +
  76119. + return result;
  76120. +}
  76121. +
  76122. +void atom_dset_init(txn_atom *atom)
  76123. +{
  76124. + if (reiser4_is_set(reiser4_get_current_sb(), REISER4_DISCARD)) {
  76125. + blocknr_list_init(&atom->discard.delete_set);
  76126. + } else {
  76127. + blocknr_set_init(&atom->nodiscard.delete_set);
  76128. + }
  76129. +}
  76130. +
  76131. +void atom_dset_destroy(txn_atom *atom)
  76132. +{
  76133. + if (reiser4_is_set(reiser4_get_current_sb(), REISER4_DISCARD)) {
  76134. + blocknr_list_destroy(&atom->discard.delete_set);
  76135. + } else {
  76136. + blocknr_set_destroy(&atom->nodiscard.delete_set);
  76137. + }
  76138. +}
  76139. +
  76140. +void atom_dset_merge(txn_atom *from, txn_atom *to)
  76141. +{
  76142. + if (reiser4_is_set(reiser4_get_current_sb(), REISER4_DISCARD)) {
  76143. + blocknr_list_merge(&from->discard.delete_set, &to->discard.delete_set);
  76144. + } else {
  76145. + blocknr_set_merge(&from->nodiscard.delete_set, &to->nodiscard.delete_set);
  76146. + }
  76147. +}
  76148. +
  76149. +int atom_dset_deferred_apply(txn_atom* atom,
  76150. + blocknr_set_actor_f actor,
  76151. + void *data,
  76152. + int delete)
  76153. +{
  76154. + int ret;
  76155. +
  76156. + if (reiser4_is_set(reiser4_get_current_sb(), REISER4_DISCARD)) {
  76157. + ret = blocknr_list_iterator(atom,
  76158. + &atom->discard.delete_set,
  76159. + actor,
  76160. + data,
  76161. + delete);
  76162. + } else {
  76163. + ret = blocknr_set_iterator(atom,
  76164. + &atom->nodiscard.delete_set,
  76165. + actor,
  76166. + data,
  76167. + delete);
  76168. + }
  76169. +
  76170. + return ret;
  76171. +}
  76172. +
  76173. +extern int atom_dset_deferred_add_extent(txn_atom *atom,
  76174. + void **new_entry,
  76175. + const reiser4_block_nr *start,
  76176. + const reiser4_block_nr *len)
  76177. +{
  76178. + int ret;
  76179. +
  76180. + if (reiser4_is_set(reiser4_get_current_sb(), REISER4_DISCARD)) {
  76181. + ret = blocknr_list_add_extent(atom,
  76182. + &atom->discard.delete_set,
  76183. + (blocknr_list_entry**)new_entry,
  76184. + start,
  76185. + len);
  76186. + } else {
  76187. + ret = blocknr_set_add_extent(atom,
  76188. + &atom->nodiscard.delete_set,
  76189. + (blocknr_set_entry**)new_entry,
  76190. + start,
  76191. + len);
  76192. + }
  76193. +
  76194. + return ret;
  76195. +}
  76196. +
  76197. +/*
  76198. + * Local variables:
  76199. + * c-indentation-style: "K&R"
  76200. + * mode-name: "LC"
  76201. + * c-basic-offset: 8
  76202. + * tab-width: 8
  76203. + * fill-column: 79
  76204. + * End:
  76205. + */
  76206. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/txnmgr.h linux-4.14.2/fs/reiser4/txnmgr.h
  76207. --- linux-4.14.2.orig/fs/reiser4/txnmgr.h 1970-01-01 01:00:00.000000000 +0100
  76208. +++ linux-4.14.2/fs/reiser4/txnmgr.h 2017-11-26 22:13:09.000000000 +0100
  76209. @@ -0,0 +1,755 @@
  76210. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  76211. + * reiser4/README */
  76212. +
  76213. +/* data-types and function declarations for transaction manager. See txnmgr.c
  76214. + * for details. */
  76215. +
  76216. +#ifndef __REISER4_TXNMGR_H__
  76217. +#define __REISER4_TXNMGR_H__
  76218. +
  76219. +#include "forward.h"
  76220. +#include "dformat.h"
  76221. +
  76222. +#include <linux/fs.h>
  76223. +#include <linux/mm.h>
  76224. +#include <linux/types.h>
  76225. +#include <linux/spinlock.h>
  76226. +#include <asm/atomic.h>
  76227. +#include <linux/wait.h>
  76228. +
  76229. +/* TYPE DECLARATIONS */
  76230. +
  76231. +/* This enumeration describes the possible types of a capture request (reiser4_try_capture).
  76232. + A capture request dynamically assigns a block to the calling thread's transaction
  76233. + handle. */
  76234. +typedef enum {
  76235. + /* A READ_ATOMIC request indicates that a block will be read and that the caller's
  76236. + atom should fuse in order to ensure that the block commits atomically with the
  76237. + caller. */
  76238. + TXN_CAPTURE_READ_ATOMIC = (1 << 0),
  76239. +
  76240. + /* A READ_NONCOM request indicates that a block will be read and that the caller is
  76241. + willing to read a non-committed block without causing atoms to fuse. */
  76242. + TXN_CAPTURE_READ_NONCOM = (1 << 1),
  76243. +
  76244. + /* A READ_MODIFY request indicates that a block will be read but that the caller
  76245. + wishes for the block to be captured as it will be written. This capture request
  76246. + mode is not currently used, but eventually it will be useful for preventing
  76247. + deadlock in read-modify-write cycles. */
  76248. + TXN_CAPTURE_READ_MODIFY = (1 << 2),
  76249. +
  76250. + /* A WRITE capture request indicates that a block will be modified and that atoms
  76251. + should fuse to make the commit atomic. */
  76252. + TXN_CAPTURE_WRITE = (1 << 3),
  76253. +
  76254. + /* CAPTURE_TYPES is a mask of the four above capture types, used to separate the
  76255. + exclusive type designation from extra bits that may be supplied -- see
  76256. + below. */
  76257. + TXN_CAPTURE_TYPES = (TXN_CAPTURE_READ_ATOMIC |
  76258. + TXN_CAPTURE_READ_NONCOM | TXN_CAPTURE_READ_MODIFY |
  76259. + TXN_CAPTURE_WRITE),
  76260. +
  76261. + /* A subset of CAPTURE_TYPES, CAPTURE_WTYPES is a mask of request types that
  76262. + indicate modification will occur. */
  76263. + TXN_CAPTURE_WTYPES = (TXN_CAPTURE_READ_MODIFY | TXN_CAPTURE_WRITE),
  76264. +
  76265. + /* An option to reiser4_try_capture, NONBLOCKING indicates that the caller would
  76266. + prefer not to sleep waiting for an aging atom to commit. */
  76267. + TXN_CAPTURE_NONBLOCKING = (1 << 4),
  76268. +
  76269. + /* An option to reiser4_try_capture to prevent atom fusion, just simple
  76270. + capturing is allowed */
  76271. + TXN_CAPTURE_DONT_FUSE = (1 << 5)
  76272. +
  76273. + /* This macro selects only the exclusive capture request types, stripping out any
  76274. + options that were supplied (i.e., NONBLOCKING). */
  76275. +#define CAPTURE_TYPE(x) ((x) & TXN_CAPTURE_TYPES)
  76276. +} txn_capture;
  76277. +
  76278. +/* There are two kinds of transaction handle: WRITE_FUSING and READ_FUSING, the only
  76279. + difference is in the handling of read requests. A WRITE_FUSING transaction handle
  76280. + defaults read capture requests to TXN_CAPTURE_READ_NONCOM whereas a READ_FUSIONG
  76281. + transaction handle defaults to TXN_CAPTURE_READ_ATOMIC. */
  76282. +typedef enum {
  76283. + TXN_WRITE_FUSING = (1 << 0),
  76284. + TXN_READ_FUSING = (1 << 1) | TXN_WRITE_FUSING, /* READ implies WRITE */
  76285. +} txn_mode;
  76286. +
  76287. +/* Every atom has a stage, which is one of these exclusive values: */
  76288. +typedef enum {
  76289. + /* Initially an atom is free. */
  76290. + ASTAGE_FREE = 0,
  76291. +
  76292. + /* An atom begins by entering the CAPTURE_FUSE stage, where it proceeds to capture
  76293. + blocks and fuse with other atoms. */
  76294. + ASTAGE_CAPTURE_FUSE = 1,
  76295. +
  76296. + /* We need to have a ASTAGE_CAPTURE_SLOW in which an atom fuses with one node for every X nodes it flushes to disk where X > 1. */
  76297. +
  76298. + /* When an atom reaches a certain age it must do all it can to commit. An atom in
  76299. + the CAPTURE_WAIT stage refuses new transaction handles and prevents fusion from
  76300. + atoms in the CAPTURE_FUSE stage. */
  76301. + ASTAGE_CAPTURE_WAIT = 2,
  76302. +
  76303. + /* Waiting for I/O before commit. Copy-on-capture (see
  76304. + http://namesys.com/v4/v4.html). */
  76305. + ASTAGE_PRE_COMMIT = 3,
  76306. +
  76307. + /* Post-commit overwrite I/O. Steal-on-capture. */
  76308. + ASTAGE_POST_COMMIT = 4,
  76309. +
  76310. + /* Atom which waits for the removal of the last reference to (it? ) to
  76311. + * be deleted from memory */
  76312. + ASTAGE_DONE = 5,
  76313. +
  76314. + /* invalid atom. */
  76315. + ASTAGE_INVALID = 6,
  76316. +
  76317. +} txn_stage;
  76318. +
  76319. +/* Certain flags may be set in the txn_atom->flags field. */
  76320. +typedef enum {
  76321. + /* Indicates that the atom should commit as soon as possible. */
  76322. + ATOM_FORCE_COMMIT = (1 << 0),
  76323. + /* to avoid endless loop, mark the atom (which was considered as too
  76324. + * small) after failed attempt to fuse it. */
  76325. + ATOM_CANCEL_FUSION = (1 << 1)
  76326. +} txn_flags;
  76327. +
  76328. +/* Flags for controlling commit_txnh */
  76329. +typedef enum {
  76330. + /* Wait commit atom completion in commit_txnh */
  76331. + TXNH_WAIT_COMMIT = 0x2,
  76332. + /* Don't commit atom when this handle is closed */
  76333. + TXNH_DONT_COMMIT = 0x4
  76334. +} txn_handle_flags_t;
  76335. +
  76336. +/* TYPE DEFINITIONS */
  76337. +
  76338. +/* A note on lock ordering: the handle & jnode spinlock protects reading of their ->atom
  76339. + fields, so typically an operation on the atom through either of these objects must (1)
  76340. + lock the object, (2) read the atom pointer, (3) lock the atom.
  76341. +
  76342. + During atom fusion, the process holds locks on both atoms at once. Then, it iterates
  76343. + through the list of handles and pages held by the smaller of the two atoms. For each
  76344. + handle and page referencing the smaller atom, the fusing process must: (1) lock the
  76345. + object, and (2) update the atom pointer.
  76346. +
  76347. + You can see that there is a conflict of lock ordering here, so the more-complex
  76348. + procedure should have priority, i.e., the fusing process has priority so that it is
  76349. + guaranteed to make progress and to avoid restarts.
  76350. +
  76351. + This decision, however, means additional complexity for aquiring the atom lock in the
  76352. + first place.
  76353. +
  76354. + The general original procedure followed in the code was:
  76355. +
  76356. + TXN_OBJECT *obj = ...;
  76357. + TXN_ATOM *atom;
  76358. +
  76359. + spin_lock (& obj->_lock);
  76360. +
  76361. + atom = obj->_atom;
  76362. +
  76363. + if (! spin_trylock_atom (atom))
  76364. + {
  76365. + spin_unlock (& obj->_lock);
  76366. + RESTART OPERATION, THERE WAS A RACE;
  76367. + }
  76368. +
  76369. + ELSE YOU HAVE BOTH ATOM AND OBJ LOCKED
  76370. +
  76371. + It has however been found that this wastes CPU a lot in a manner that is
  76372. + hard to profile. So, proper refcounting was added to atoms, and new
  76373. + standard locking sequence is like following:
  76374. +
  76375. + TXN_OBJECT *obj = ...;
  76376. + TXN_ATOM *atom;
  76377. +
  76378. + spin_lock (& obj->_lock);
  76379. +
  76380. + atom = obj->_atom;
  76381. +
  76382. + if (! spin_trylock_atom (atom))
  76383. + {
  76384. + atomic_inc (& atom->refcount);
  76385. + spin_unlock (& obj->_lock);
  76386. + spin_lock (&atom->_lock);
  76387. + atomic_dec (& atom->refcount);
  76388. + // HERE atom is locked
  76389. + spin_unlock (&atom->_lock);
  76390. + RESTART OPERATION, THERE WAS A RACE;
  76391. + }
  76392. +
  76393. + ELSE YOU HAVE BOTH ATOM AND OBJ LOCKED
  76394. +
  76395. + (core of this is implemented in trylock_throttle() function)
  76396. +
  76397. + See the jnode_get_atom() function for a common case.
  76398. +
  76399. + As an additional (and important) optimization allowing to avoid restarts,
  76400. + it is possible to re-check required pre-conditions at the HERE point in
  76401. + code above and proceed without restarting if they are still satisfied.
  76402. +*/
  76403. +
  76404. +/* An atomic transaction: this is the underlying system representation
  76405. + of a transaction, not the one seen by clients.
  76406. +
  76407. + Invariants involving this data-type:
  76408. +
  76409. + [sb-fake-allocated]
  76410. +*/
  76411. +struct txn_atom {
  76412. + /* The spinlock protecting the atom, held during fusion and various other state
  76413. + changes. */
  76414. + spinlock_t alock;
  76415. +
  76416. + /* The atom's reference counter, increasing (in case of a duplication
  76417. + of an existing reference or when we are sure that some other
  76418. + reference exists) may be done without taking spinlock, decrementing
  76419. + of the ref. counter requires a spinlock to be held.
  76420. +
  76421. + Each transaction handle counts in ->refcount. All jnodes count as
  76422. + one reference acquired in atom_begin_andlock(), released in
  76423. + commit_current_atom().
  76424. + */
  76425. + atomic_t refcount;
  76426. +
  76427. + /* The atom_id identifies the atom in persistent records such as the log. */
  76428. + __u32 atom_id;
  76429. +
  76430. + /* Flags holding any of the txn_flags enumerated values (e.g.,
  76431. + ATOM_FORCE_COMMIT). */
  76432. + __u32 flags;
  76433. +
  76434. + /* Number of open handles. */
  76435. + __u32 txnh_count;
  76436. +
  76437. + /* The number of znodes captured by this atom. Equal to the sum of lengths of the
  76438. + dirty_nodes[level] and clean_nodes lists. */
  76439. + __u32 capture_count;
  76440. +
  76441. +#if REISER4_DEBUG
  76442. + int clean;
  76443. + int dirty;
  76444. + int ovrwr;
  76445. + int wb;
  76446. + int fq;
  76447. +#endif
  76448. +
  76449. + __u32 flushed;
  76450. +
  76451. + /* Current transaction stage. */
  76452. + txn_stage stage;
  76453. +
  76454. + /* Start time. */
  76455. + unsigned long start_time;
  76456. +
  76457. + /* The atom's delete sets.
  76458. + "simple" are blocknr_set instances and are used when discard is disabled.
  76459. + "discard" are blocknr_list instances and are used when discard is enabled. */
  76460. + union {
  76461. + struct {
  76462. + /* The atom's delete set. It collects block numbers of the nodes
  76463. + which were deleted during the transaction. */
  76464. + struct list_head delete_set;
  76465. + } nodiscard;
  76466. +
  76467. + struct {
  76468. + /* The atom's delete set. It collects all blocks that have been
  76469. + deallocated (both immediate and deferred) during the transaction.
  76470. + These blocks are considered for discarding at commit time.
  76471. + For details see discard.c */
  76472. + struct list_head delete_set;
  76473. + } discard;
  76474. + };
  76475. +
  76476. + /* The atom's wandered_block mapping. */
  76477. + struct list_head wandered_map;
  76478. +
  76479. + /* The transaction's list of dirty captured nodes--per level. Index
  76480. + by (level). dirty_nodes[0] is for znode-above-root */
  76481. + struct list_head dirty_nodes[REAL_MAX_ZTREE_HEIGHT + 1];
  76482. +
  76483. + /* The transaction's list of clean captured nodes. */
  76484. + struct list_head clean_nodes;
  76485. +
  76486. + /* The atom's overwrite set */
  76487. + struct list_head ovrwr_nodes;
  76488. +
  76489. + /* nodes which are being written to disk */
  76490. + struct list_head writeback_nodes;
  76491. +
  76492. + /* list of inodes */
  76493. + struct list_head inodes;
  76494. +
  76495. + /* List of handles associated with this atom. */
  76496. + struct list_head txnh_list;
  76497. +
  76498. + /* Transaction list link: list of atoms in the transaction manager. */
  76499. + struct list_head atom_link;
  76500. +
  76501. + /* List of handles waiting FOR this atom: see 'capture_fuse_wait' comment. */
  76502. + struct list_head fwaitfor_list;
  76503. +
  76504. + /* List of this atom's handles that are waiting: see 'capture_fuse_wait' comment. */
  76505. + struct list_head fwaiting_list;
  76506. +
  76507. + /* Numbers of objects which were deleted/created in this transaction
  76508. + thereby numbers of objects IDs which were released/deallocated. */
  76509. + int nr_objects_deleted;
  76510. + int nr_objects_created;
  76511. + /* number of blocks allocated during the transaction */
  76512. + __u64 nr_blocks_allocated;
  76513. + /* All atom's flush queue objects are on this list */
  76514. + struct list_head flush_queues;
  76515. +#if REISER4_DEBUG
  76516. + /* number of flush queues for this atom. */
  76517. + int nr_flush_queues;
  76518. + /* Number of jnodes which were removed from atom's lists and put
  76519. + on flush_queue */
  76520. + int num_queued;
  76521. +#endif
  76522. + /* number of threads who wait for this atom to complete commit */
  76523. + int nr_waiters;
  76524. + /* number of threads which do jnode_flush() over this atom */
  76525. + int nr_flushers;
  76526. + /* number of flush queues which are IN_USE and jnodes from fq->prepped
  76527. + are submitted to disk by the reiser4_write_fq() routine. */
  76528. + int nr_running_queues;
  76529. + /* A counter of grabbed unformatted nodes, see a description of the
  76530. + * reiser4 space reservation scheme at block_alloc.c */
  76531. + reiser4_block_nr flush_reserved;
  76532. +#if REISER4_DEBUG
  76533. + void *committer;
  76534. +#endif
  76535. + struct super_block *super;
  76536. +};
  76537. +
  76538. +#define ATOM_DIRTY_LIST(atom, level) (&(atom)->dirty_nodes[level])
  76539. +#define ATOM_CLEAN_LIST(atom) (&(atom)->clean_nodes)
  76540. +#define ATOM_OVRWR_LIST(atom) (&(atom)->ovrwr_nodes)
  76541. +#define ATOM_WB_LIST(atom) (&(atom)->writeback_nodes)
  76542. +#define ATOM_FQ_LIST(fq) (&(fq)->prepped)
  76543. +
  76544. +#define NODE_LIST(node) (node)->list
  76545. +#define ASSIGN_NODE_LIST(node, list) ON_DEBUG(NODE_LIST(node) = list)
  76546. +ON_DEBUG(void
  76547. + count_jnode(txn_atom *, jnode *, atom_list old_list,
  76548. + atom_list new_list, int check_lists));
  76549. +
  76550. +/* A transaction handle: the client obtains and commits this handle which is assigned by
  76551. + the system to a txn_atom. */
  76552. +struct txn_handle {
  76553. + /* Spinlock protecting ->atom pointer */
  76554. + spinlock_t hlock;
  76555. +
  76556. + /* Flags for controlling commit_txnh() behavior */
  76557. + /* from txn_handle_flags_t */
  76558. + txn_handle_flags_t flags;
  76559. +
  76560. + /* Whether it is READ_FUSING or WRITE_FUSING. */
  76561. + txn_mode mode;
  76562. +
  76563. + /* If assigned, the atom it is part of. */
  76564. + txn_atom *atom;
  76565. +
  76566. + /* Transaction list link. Head is in txn_atom. */
  76567. + struct list_head txnh_link;
  76568. +};
  76569. +
  76570. +/* The transaction manager: one is contained in the reiser4_super_info_data */
  76571. +struct txn_mgr {
  76572. + /* A spinlock protecting the atom list, id_count, flush_control */
  76573. + spinlock_t tmgr_lock;
  76574. +
  76575. + /* List of atoms. */
  76576. + struct list_head atoms_list;
  76577. +
  76578. + /* Number of atoms. */
  76579. + int atom_count;
  76580. +
  76581. + /* A counter used to assign atom->atom_id values. */
  76582. + __u32 id_count;
  76583. +
  76584. + /* a mutex object for commit serialization */
  76585. + struct mutex commit_mutex;
  76586. +
  76587. + /* a list of all txnmrgs served by particular daemon. */
  76588. + struct list_head linkage;
  76589. +
  76590. + /* description of daemon for this txnmgr */
  76591. + ktxnmgrd_context *daemon;
  76592. +
  76593. + /* parameters. Adjustable through mount options. */
  76594. + unsigned int atom_max_size;
  76595. + unsigned int atom_max_age;
  76596. + unsigned int atom_min_size;
  76597. + /* max number of concurrent flushers for one atom, 0 - unlimited. */
  76598. + unsigned int atom_max_flushers;
  76599. + struct dentry *debugfs_atom_count;
  76600. + struct dentry *debugfs_id_count;
  76601. +};
  76602. +
  76603. +/* FUNCTION DECLARATIONS */
  76604. +
  76605. +/* These are the externally (within Reiser4) visible transaction functions, therefore they
  76606. + are prefixed with "txn_". For comments, see txnmgr.c. */
  76607. +
  76608. +extern int init_txnmgr_static(void);
  76609. +extern void done_txnmgr_static(void);
  76610. +
  76611. +extern void reiser4_init_txnmgr(txn_mgr *);
  76612. +extern void reiser4_done_txnmgr(txn_mgr *);
  76613. +
  76614. +extern int reiser4_txn_reserve(int reserved);
  76615. +
  76616. +extern void reiser4_txn_begin(reiser4_context * context);
  76617. +extern int reiser4_txn_end(reiser4_context * context);
  76618. +
  76619. +extern void reiser4_txn_restart(reiser4_context * context);
  76620. +extern void reiser4_txn_restart_current(void);
  76621. +
  76622. +extern int txnmgr_force_commit_all(struct super_block *, int);
  76623. +extern int current_atom_should_commit(void);
  76624. +
  76625. +extern jnode *find_first_dirty_jnode(txn_atom *, int);
  76626. +
  76627. +extern int commit_some_atoms(txn_mgr *);
  76628. +extern int force_commit_atom(txn_handle *);
  76629. +extern int flush_current_atom(int, long, long *, txn_atom **, jnode *);
  76630. +
  76631. +extern int flush_some_atom(jnode *, long *, const struct writeback_control *, int);
  76632. +
  76633. +extern void reiser4_atom_set_stage(txn_atom * atom, txn_stage stage);
  76634. +
  76635. +extern int same_slum_check(jnode * base, jnode * check, int alloc_check,
  76636. + int alloc_value);
  76637. +extern void atom_dec_and_unlock(txn_atom * atom);
  76638. +
  76639. +extern int reiser4_try_capture(jnode * node, znode_lock_mode mode, txn_capture flags);
  76640. +extern int try_capture_page_to_invalidate(struct page *pg);
  76641. +
  76642. +extern void reiser4_uncapture_page(struct page *pg);
  76643. +extern void reiser4_uncapture_block(jnode *);
  76644. +extern void reiser4_uncapture_jnode(jnode *);
  76645. +
  76646. +extern int reiser4_capture_inode(struct inode *);
  76647. +extern int reiser4_uncapture_inode(struct inode *);
  76648. +
  76649. +extern txn_atom *get_current_atom_locked_nocheck(void);
  76650. +
  76651. +#if REISER4_DEBUG
  76652. +
  76653. +/**
  76654. + * atom_is_protected - make sure that nobody but us can do anything with atom
  76655. + * @atom: atom to be checked
  76656. + *
  76657. + * This is used to assert that atom either entered commit stages or is spin
  76658. + * locked.
  76659. + */
  76660. +static inline int atom_is_protected(txn_atom *atom)
  76661. +{
  76662. + if (atom->stage >= ASTAGE_PRE_COMMIT)
  76663. + return 1;
  76664. + assert_spin_locked(&(atom->alock));
  76665. + return 1;
  76666. +}
  76667. +
  76668. +#endif
  76669. +
  76670. +/* Get the current atom and spinlock it if current atom present. May not return NULL */
  76671. +static inline txn_atom *get_current_atom_locked(void)
  76672. +{
  76673. + txn_atom *atom;
  76674. +
  76675. + atom = get_current_atom_locked_nocheck();
  76676. + assert("zam-761", atom != NULL);
  76677. +
  76678. + return atom;
  76679. +}
  76680. +
  76681. +extern txn_atom *jnode_get_atom(jnode *);
  76682. +
  76683. +extern void reiser4_atom_wait_event(txn_atom *);
  76684. +extern void reiser4_atom_send_event(txn_atom *);
  76685. +
  76686. +extern void insert_into_atom_ovrwr_list(txn_atom * atom, jnode * node);
  76687. +extern int reiser4_capture_super_block(struct super_block *s);
  76688. +int capture_bulk(jnode **, int count);
  76689. +
  76690. +/* See the comment on the function blocknrset.c:blocknr_set_add for the
  76691. + calling convention of these three routines. */
  76692. +extern int blocknr_set_init_static(void);
  76693. +extern void blocknr_set_done_static(void);
  76694. +extern void blocknr_set_init(struct list_head * bset);
  76695. +extern void blocknr_set_destroy(struct list_head * bset);
  76696. +extern void blocknr_set_merge(struct list_head * from, struct list_head * into);
  76697. +extern int blocknr_set_add_extent(txn_atom * atom,
  76698. + struct list_head * bset,
  76699. + blocknr_set_entry ** new_bsep,
  76700. + const reiser4_block_nr * start,
  76701. + const reiser4_block_nr * len);
  76702. +extern int blocknr_set_add_pair(txn_atom * atom, struct list_head * bset,
  76703. + blocknr_set_entry ** new_bsep,
  76704. + const reiser4_block_nr * a,
  76705. + const reiser4_block_nr * b);
  76706. +
  76707. +typedef int (*blocknr_set_actor_f) (txn_atom *, const reiser4_block_nr *,
  76708. + const reiser4_block_nr *, void *);
  76709. +
  76710. +extern int blocknr_set_iterator(txn_atom * atom, struct list_head * bset,
  76711. + blocknr_set_actor_f actor, void *data,
  76712. + int delete);
  76713. +
  76714. +/* This is the block list interface (see blocknrlist.c) */
  76715. +extern int blocknr_list_init_static(void);
  76716. +extern void blocknr_list_done_static(void);
  76717. +extern void blocknr_list_init(struct list_head *blist);
  76718. +extern void blocknr_list_destroy(struct list_head *blist);
  76719. +extern void blocknr_list_merge(struct list_head *from, struct list_head *to);
  76720. +extern void blocknr_list_sort_and_join(struct list_head *blist);
  76721. +/**
  76722. + * The @atom should be locked.
  76723. + */
  76724. +extern int blocknr_list_add_extent(txn_atom *atom,
  76725. + struct list_head *blist,
  76726. + blocknr_list_entry **new_entry,
  76727. + const reiser4_block_nr *start,
  76728. + const reiser4_block_nr *len);
  76729. +extern int blocknr_list_iterator(txn_atom *atom,
  76730. + struct list_head *blist,
  76731. + blocknr_set_actor_f actor,
  76732. + void *data,
  76733. + int delete);
  76734. +
  76735. +/* These are wrappers for accessing and modifying atom's delete lists,
  76736. + depending on whether discard is enabled or not.
  76737. + If it is enabled, (less memory efficient) blocknr_list is used for delete
  76738. + list storage. Otherwise, blocknr_set is used for this purpose. */
  76739. +extern void atom_dset_init(txn_atom *atom);
  76740. +extern void atom_dset_destroy(txn_atom *atom);
  76741. +extern void atom_dset_merge(txn_atom *from, txn_atom *to);
  76742. +extern int atom_dset_deferred_apply(txn_atom* atom,
  76743. + blocknr_set_actor_f actor,
  76744. + void *data,
  76745. + int delete);
  76746. +extern int atom_dset_deferred_add_extent(txn_atom *atom,
  76747. + void **new_entry,
  76748. + const reiser4_block_nr *start,
  76749. + const reiser4_block_nr *len);
  76750. +
  76751. +/* flush code takes care about how to fuse flush queues */
  76752. +extern void flush_init_atom(txn_atom * atom);
  76753. +extern void flush_fuse_queues(txn_atom * large, txn_atom * small);
  76754. +
  76755. +static inline void spin_lock_atom(txn_atom *atom)
  76756. +{
  76757. + /* check that spinlocks of lower priorities are not held */
  76758. + assert("", (LOCK_CNT_NIL(spin_locked_txnh) &&
  76759. + LOCK_CNT_NIL(spin_locked_atom) &&
  76760. + LOCK_CNT_NIL(spin_locked_jnode) &&
  76761. + LOCK_CNT_NIL(spin_locked_zlock) &&
  76762. + LOCK_CNT_NIL(rw_locked_dk) &&
  76763. + LOCK_CNT_NIL(rw_locked_tree)));
  76764. +
  76765. + spin_lock(&(atom->alock));
  76766. +
  76767. + LOCK_CNT_INC(spin_locked_atom);
  76768. + LOCK_CNT_INC(spin_locked);
  76769. +}
  76770. +
  76771. +static inline void spin_lock_atom_nested(txn_atom *atom)
  76772. +{
  76773. + assert("", (LOCK_CNT_NIL(spin_locked_txnh) &&
  76774. + LOCK_CNT_NIL(spin_locked_jnode) &&
  76775. + LOCK_CNT_NIL(spin_locked_zlock) &&
  76776. + LOCK_CNT_NIL(rw_locked_dk) &&
  76777. + LOCK_CNT_NIL(rw_locked_tree)));
  76778. +
  76779. + spin_lock_nested(&(atom->alock), SINGLE_DEPTH_NESTING);
  76780. +
  76781. + LOCK_CNT_INC(spin_locked_atom);
  76782. + LOCK_CNT_INC(spin_locked);
  76783. +}
  76784. +
  76785. +static inline int spin_trylock_atom(txn_atom *atom)
  76786. +{
  76787. + if (spin_trylock(&(atom->alock))) {
  76788. + LOCK_CNT_INC(spin_locked_atom);
  76789. + LOCK_CNT_INC(spin_locked);
  76790. + return 1;
  76791. + }
  76792. + return 0;
  76793. +}
  76794. +
  76795. +static inline void spin_unlock_atom(txn_atom *atom)
  76796. +{
  76797. + assert_spin_locked(&(atom->alock));
  76798. + assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_atom));
  76799. + assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
  76800. +
  76801. + LOCK_CNT_DEC(spin_locked_atom);
  76802. + LOCK_CNT_DEC(spin_locked);
  76803. +
  76804. + spin_unlock(&(atom->alock));
  76805. +}
  76806. +
  76807. +static inline void spin_lock_txnh(txn_handle *txnh)
  76808. +{
  76809. + /* check that spinlocks of lower priorities are not held */
  76810. + assert("", (LOCK_CNT_NIL(rw_locked_dk) &&
  76811. + LOCK_CNT_NIL(spin_locked_zlock) &&
  76812. + LOCK_CNT_NIL(rw_locked_tree)));
  76813. +
  76814. + spin_lock(&(txnh->hlock));
  76815. +
  76816. + LOCK_CNT_INC(spin_locked_txnh);
  76817. + LOCK_CNT_INC(spin_locked);
  76818. +}
  76819. +
  76820. +static inline int spin_trylock_txnh(txn_handle *txnh)
  76821. +{
  76822. + if (spin_trylock(&(txnh->hlock))) {
  76823. + LOCK_CNT_INC(spin_locked_txnh);
  76824. + LOCK_CNT_INC(spin_locked);
  76825. + return 1;
  76826. + }
  76827. + return 0;
  76828. +}
  76829. +
  76830. +static inline void spin_unlock_txnh(txn_handle *txnh)
  76831. +{
  76832. + assert_spin_locked(&(txnh->hlock));
  76833. + assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_txnh));
  76834. + assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
  76835. +
  76836. + LOCK_CNT_DEC(spin_locked_txnh);
  76837. + LOCK_CNT_DEC(spin_locked);
  76838. +
  76839. + spin_unlock(&(txnh->hlock));
  76840. +}
  76841. +
  76842. +#define spin_ordering_pred_txnmgr(tmgr) \
  76843. + ( LOCK_CNT_NIL(spin_locked_atom) && \
  76844. + LOCK_CNT_NIL(spin_locked_txnh) && \
  76845. + LOCK_CNT_NIL(spin_locked_jnode) && \
  76846. + LOCK_CNT_NIL(rw_locked_zlock) && \
  76847. + LOCK_CNT_NIL(rw_locked_dk) && \
  76848. + LOCK_CNT_NIL(rw_locked_tree) )
  76849. +
  76850. +static inline void spin_lock_txnmgr(txn_mgr *mgr)
  76851. +{
  76852. + /* check that spinlocks of lower priorities are not held */
  76853. + assert("", (LOCK_CNT_NIL(spin_locked_atom) &&
  76854. + LOCK_CNT_NIL(spin_locked_txnh) &&
  76855. + LOCK_CNT_NIL(spin_locked_jnode) &&
  76856. + LOCK_CNT_NIL(spin_locked_zlock) &&
  76857. + LOCK_CNT_NIL(rw_locked_dk) &&
  76858. + LOCK_CNT_NIL(rw_locked_tree)));
  76859. +
  76860. + spin_lock(&(mgr->tmgr_lock));
  76861. +
  76862. + LOCK_CNT_INC(spin_locked_txnmgr);
  76863. + LOCK_CNT_INC(spin_locked);
  76864. +}
  76865. +
  76866. +static inline int spin_trylock_txnmgr(txn_mgr *mgr)
  76867. +{
  76868. + if (spin_trylock(&(mgr->tmgr_lock))) {
  76869. + LOCK_CNT_INC(spin_locked_txnmgr);
  76870. + LOCK_CNT_INC(spin_locked);
  76871. + return 1;
  76872. + }
  76873. + return 0;
  76874. +}
  76875. +
  76876. +static inline void spin_unlock_txnmgr(txn_mgr *mgr)
  76877. +{
  76878. + assert_spin_locked(&(mgr->tmgr_lock));
  76879. + assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_txnmgr));
  76880. + assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
  76881. +
  76882. + LOCK_CNT_DEC(spin_locked_txnmgr);
  76883. + LOCK_CNT_DEC(spin_locked);
  76884. +
  76885. + spin_unlock(&(mgr->tmgr_lock));
  76886. +}
  76887. +
  76888. +typedef enum {
  76889. + FQ_IN_USE = 0x1
  76890. +} flush_queue_state_t;
  76891. +
  76892. +typedef struct flush_queue flush_queue_t;
  76893. +
  76894. +/* This is an accumulator for jnodes prepared for writing to disk. A flush queue
  76895. + is filled by the jnode_flush() routine, and written to disk under memory
  76896. + pressure or at atom commit time. */
  76897. +/* LOCKING: fq state and fq->atom are protected by guard spinlock, fq->nr_queued
  76898. + field and fq->prepped list can be modified if atom is spin-locked and fq
  76899. + object is "in-use" state. For read-only traversal of the fq->prepped list
  76900. + and reading of the fq->nr_queued field it is enough to keep fq "in-use" or
  76901. + only have atom spin-locked. */
  76902. +struct flush_queue {
  76903. + /* linkage element is the first in this structure to make debugging
  76904. + easier. See field in atom struct for description of list. */
  76905. + struct list_head alink;
  76906. + /* A spinlock to protect changes of fq state and fq->atom pointer */
  76907. + spinlock_t guard;
  76908. + /* flush_queue state: [in_use | ready] */
  76909. + flush_queue_state_t state;
  76910. + /* A list which contains queued nodes, queued nodes are removed from any
  76911. + * atom's list and put on this ->prepped one. */
  76912. + struct list_head prepped;
  76913. + /* number of submitted i/o requests */
  76914. + atomic_t nr_submitted;
  76915. + /* number of i/o errors */
  76916. + atomic_t nr_errors;
  76917. + /* An atom this flush queue is attached to */
  76918. + txn_atom *atom;
  76919. + /* A wait queue head to wait on i/o completion */
  76920. + wait_queue_head_t wait;
  76921. +#if REISER4_DEBUG
  76922. + /* A thread which took this fq in exclusive use, NULL if fq is free,
  76923. + * used for debugging. */
  76924. + struct task_struct *owner;
  76925. +#endif
  76926. +};
  76927. +
  76928. +extern int reiser4_fq_by_atom(txn_atom *, flush_queue_t **);
  76929. +extern void reiser4_fq_put_nolock(flush_queue_t *);
  76930. +extern void reiser4_fq_put(flush_queue_t *);
  76931. +extern void reiser4_fuse_fq(txn_atom * to, txn_atom * from);
  76932. +extern void queue_jnode(flush_queue_t *, jnode *);
  76933. +
  76934. +extern int reiser4_write_fq(flush_queue_t *, long *, int);
  76935. +extern int current_atom_finish_all_fq(void);
  76936. +extern void init_atom_fq_parts(txn_atom *);
  76937. +
  76938. +extern reiser4_block_nr txnmgr_count_deleted_blocks(void);
  76939. +
  76940. +extern void znode_make_dirty(znode * node);
  76941. +extern void jnode_make_dirty_locked(jnode * node);
  76942. +
  76943. +extern int reiser4_sync_atom(txn_atom * atom);
  76944. +
  76945. +#if REISER4_DEBUG
  76946. +extern int atom_fq_parts_are_clean(txn_atom *);
  76947. +#endif
  76948. +
  76949. +extern void add_fq_to_bio(flush_queue_t *, struct bio *);
  76950. +extern flush_queue_t *get_fq_for_current_atom(void);
  76951. +
  76952. +void reiser4_invalidate_list(struct list_head * head);
  76953. +
  76954. +# endif /* __REISER4_TXNMGR_H__ */
  76955. +
  76956. +/* Make Linus happy.
  76957. + Local variables:
  76958. + c-indentation-style: "K&R"
  76959. + mode-name: "LC"
  76960. + c-basic-offset: 8
  76961. + tab-width: 8
  76962. + fill-column: 120
  76963. + End:
  76964. +*/
  76965. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/type_safe_hash.h linux-4.14.2/fs/reiser4/type_safe_hash.h
  76966. --- linux-4.14.2.orig/fs/reiser4/type_safe_hash.h 1970-01-01 01:00:00.000000000 +0100
  76967. +++ linux-4.14.2/fs/reiser4/type_safe_hash.h 2017-11-26 22:13:09.000000000 +0100
  76968. @@ -0,0 +1,320 @@
  76969. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  76970. + * reiser4/README */
  76971. +
  76972. +/* A hash table class that uses hash chains (singly-linked) and is
  76973. + parametrized to provide type safety. */
  76974. +
  76975. +#ifndef __REISER4_TYPE_SAFE_HASH_H__
  76976. +#define __REISER4_TYPE_SAFE_HASH_H__
  76977. +
  76978. +#include "debug.h"
  76979. +
  76980. +#include <asm/errno.h>
  76981. +/* Step 1: Use TYPE_SAFE_HASH_DECLARE() to define the TABLE and LINK objects
  76982. + based on the object type. You need to declare the item type before
  76983. + this definition, define it after this definition. */
  76984. +#define TYPE_SAFE_HASH_DECLARE(PREFIX,ITEM_TYPE) \
  76985. + \
  76986. +typedef struct PREFIX##_hash_table_ PREFIX##_hash_table; \
  76987. +typedef struct PREFIX##_hash_link_ PREFIX##_hash_link; \
  76988. + \
  76989. +struct PREFIX##_hash_table_ \
  76990. +{ \
  76991. + ITEM_TYPE **_table; \
  76992. + __u32 _buckets; \
  76993. +}; \
  76994. + \
  76995. +struct PREFIX##_hash_link_ \
  76996. +{ \
  76997. + ITEM_TYPE *_next; \
  76998. +}
  76999. +
  77000. +/* Step 2: Define the object type of the hash: give it field of type
  77001. + PREFIX_hash_link. */
  77002. +
  77003. +/* Step 3: Use TYPE_SAFE_HASH_DEFINE to define the hash table interface using
  77004. + the type and field name used in step 3. The arguments are:
  77005. +
  77006. + ITEM_TYPE The item type being hashed
  77007. + KEY_TYPE The type of key being hashed
  77008. + KEY_NAME The name of the key field within the item
  77009. + LINK_NAME The name of the link field within the item, which you must make type PREFIX_hash_link)
  77010. + HASH_FUNC The name of the hash function (or macro, takes const pointer to key)
  77011. + EQ_FUNC The name of the equality function (or macro, takes const pointer to two keys)
  77012. +
  77013. + It implements these functions:
  77014. +
  77015. + prefix_hash_init Initialize the table given its size.
  77016. + prefix_hash_insert Insert an item
  77017. + prefix_hash_insert_index Insert an item w/ precomputed hash_index
  77018. + prefix_hash_find Find an item by key
  77019. + prefix_hash_find_index Find an item w/ precomputed hash_index
  77020. + prefix_hash_remove Remove an item, returns 1 if found, 0 if not found
  77021. + prefix_hash_remove_index Remove an item w/ precomputed hash_index
  77022. +
  77023. + If you'd like something to be done differently, feel free to ask me
  77024. + for modifications. Additional features that could be added but
  77025. + have not been:
  77026. +
  77027. + prefix_hash_remove_key Find and remove an item by key
  77028. + prefix_hash_remove_key_index Find and remove an item by key w/ precomputed hash_index
  77029. +
  77030. + The hash_function currently receives only the key as an argument,
  77031. + meaning it must somehow know the number of buckets. If this is a
  77032. + problem let me know.
  77033. +
  77034. + This hash table uses a single-linked hash chain. This means
  77035. + insertion is fast but deletion requires searching the chain.
  77036. +
  77037. + There is also the doubly-linked hash chain approach, under which
  77038. + deletion requires no search but the code is longer and it takes two
  77039. + pointers per item.
  77040. +
  77041. + The circularly-linked approach has the shortest code but requires
  77042. + two pointers per bucket, doubling the size of the bucket array (in
  77043. + addition to two pointers per item).
  77044. +*/
  77045. +#define TYPE_SAFE_HASH_DEFINE(PREFIX,ITEM_TYPE,KEY_TYPE,KEY_NAME,LINK_NAME,HASH_FUNC,EQ_FUNC) \
  77046. + \
  77047. +static __inline__ void \
  77048. +PREFIX##_check_hash (PREFIX##_hash_table *table UNUSED_ARG, \
  77049. + __u32 hash UNUSED_ARG) \
  77050. +{ \
  77051. + assert("nikita-2780", hash < table->_buckets); \
  77052. +} \
  77053. + \
  77054. +static __inline__ int \
  77055. +PREFIX##_hash_init (PREFIX##_hash_table *hash, \
  77056. + __u32 buckets) \
  77057. +{ \
  77058. + hash->_table = (ITEM_TYPE**) KMALLOC (sizeof (ITEM_TYPE*) * buckets); \
  77059. + hash->_buckets = buckets; \
  77060. + if (hash->_table == NULL) \
  77061. + { \
  77062. + return RETERR(-ENOMEM); \
  77063. + } \
  77064. + memset (hash->_table, 0, sizeof (ITEM_TYPE*) * buckets); \
  77065. + ON_DEBUG(printk(#PREFIX "_hash_table: %i buckets\n", buckets)); \
  77066. + return 0; \
  77067. +} \
  77068. + \
  77069. +static __inline__ void \
  77070. +PREFIX##_hash_done (PREFIX##_hash_table *hash) \
  77071. +{ \
  77072. + if (REISER4_DEBUG && hash->_table != NULL) { \
  77073. + __u32 i; \
  77074. + for (i = 0 ; i < hash->_buckets ; ++ i) \
  77075. + assert("nikita-2905", hash->_table[i] == NULL); \
  77076. + } \
  77077. + if (hash->_table != NULL) \
  77078. + KFREE (hash->_table, sizeof (ITEM_TYPE*) * hash->_buckets); \
  77079. + hash->_table = NULL; \
  77080. +} \
  77081. + \
  77082. +static __inline__ void \
  77083. +PREFIX##_hash_prefetch_next (ITEM_TYPE *item) \
  77084. +{ \
  77085. + prefetch(item->LINK_NAME._next); \
  77086. +} \
  77087. + \
  77088. +static __inline__ void \
  77089. +PREFIX##_hash_prefetch_bucket (PREFIX##_hash_table *hash, \
  77090. + __u32 index) \
  77091. +{ \
  77092. + prefetch(hash->_table[index]); \
  77093. +} \
  77094. + \
  77095. +static __inline__ ITEM_TYPE* \
  77096. +PREFIX##_hash_find_index (PREFIX##_hash_table *hash, \
  77097. + __u32 hash_index, \
  77098. + KEY_TYPE const *find_key) \
  77099. +{ \
  77100. + ITEM_TYPE *item; \
  77101. + \
  77102. + PREFIX##_check_hash(hash, hash_index); \
  77103. + \
  77104. + for (item = hash->_table[hash_index]; \
  77105. + item != NULL; \
  77106. + item = item->LINK_NAME._next) \
  77107. + { \
  77108. + prefetch(item->LINK_NAME._next); \
  77109. + prefetch(item->LINK_NAME._next + offsetof(ITEM_TYPE, KEY_NAME)); \
  77110. + if (EQ_FUNC (& item->KEY_NAME, find_key)) \
  77111. + { \
  77112. + return item; \
  77113. + } \
  77114. + } \
  77115. + \
  77116. + return NULL; \
  77117. +} \
  77118. + \
  77119. +static __inline__ ITEM_TYPE* \
  77120. +PREFIX##_hash_find_index_lru (PREFIX##_hash_table *hash, \
  77121. + __u32 hash_index, \
  77122. + KEY_TYPE const *find_key) \
  77123. +{ \
  77124. + ITEM_TYPE ** item = &hash->_table[hash_index]; \
  77125. + \
  77126. + PREFIX##_check_hash(hash, hash_index); \
  77127. + \
  77128. + while (*item != NULL) { \
  77129. + prefetch(&(*item)->LINK_NAME._next); \
  77130. + if (EQ_FUNC (&(*item)->KEY_NAME, find_key)) { \
  77131. + ITEM_TYPE *found; \
  77132. + \
  77133. + found = *item; \
  77134. + *item = found->LINK_NAME._next; \
  77135. + found->LINK_NAME._next = hash->_table[hash_index]; \
  77136. + hash->_table[hash_index] = found; \
  77137. + return found; \
  77138. + } \
  77139. + item = &(*item)->LINK_NAME._next; \
  77140. + } \
  77141. + return NULL; \
  77142. +} \
  77143. + \
  77144. +static __inline__ int \
  77145. +PREFIX##_hash_remove_index (PREFIX##_hash_table *hash, \
  77146. + __u32 hash_index, \
  77147. + ITEM_TYPE *del_item) \
  77148. +{ \
  77149. + ITEM_TYPE ** hash_item_p = &hash->_table[hash_index]; \
  77150. + \
  77151. + PREFIX##_check_hash(hash, hash_index); \
  77152. + \
  77153. + while (*hash_item_p != NULL) { \
  77154. + prefetch(&(*hash_item_p)->LINK_NAME._next); \
  77155. + if (*hash_item_p == del_item) { \
  77156. + *hash_item_p = (*hash_item_p)->LINK_NAME._next; \
  77157. + return 1; \
  77158. + } \
  77159. + hash_item_p = &(*hash_item_p)->LINK_NAME._next; \
  77160. + } \
  77161. + return 0; \
  77162. +} \
  77163. + \
  77164. +static __inline__ void \
  77165. +PREFIX##_hash_insert_index (PREFIX##_hash_table *hash, \
  77166. + __u32 hash_index, \
  77167. + ITEM_TYPE *ins_item) \
  77168. +{ \
  77169. + PREFIX##_check_hash(hash, hash_index); \
  77170. + \
  77171. + ins_item->LINK_NAME._next = hash->_table[hash_index]; \
  77172. + hash->_table[hash_index] = ins_item; \
  77173. +} \
  77174. + \
  77175. +static __inline__ void \
  77176. +PREFIX##_hash_insert_index_rcu (PREFIX##_hash_table *hash, \
  77177. + __u32 hash_index, \
  77178. + ITEM_TYPE *ins_item) \
  77179. +{ \
  77180. + PREFIX##_check_hash(hash, hash_index); \
  77181. + \
  77182. + ins_item->LINK_NAME._next = hash->_table[hash_index]; \
  77183. + smp_wmb(); \
  77184. + hash->_table[hash_index] = ins_item; \
  77185. +} \
  77186. + \
  77187. +static __inline__ ITEM_TYPE* \
  77188. +PREFIX##_hash_find (PREFIX##_hash_table *hash, \
  77189. + KEY_TYPE const *find_key) \
  77190. +{ \
  77191. + return PREFIX##_hash_find_index (hash, HASH_FUNC(hash, find_key), find_key); \
  77192. +} \
  77193. + \
  77194. +static __inline__ ITEM_TYPE* \
  77195. +PREFIX##_hash_find_lru (PREFIX##_hash_table *hash, \
  77196. + KEY_TYPE const *find_key) \
  77197. +{ \
  77198. + return PREFIX##_hash_find_index_lru (hash, HASH_FUNC(hash, find_key), find_key); \
  77199. +} \
  77200. + \
  77201. +static __inline__ int \
  77202. +PREFIX##_hash_remove (PREFIX##_hash_table *hash, \
  77203. + ITEM_TYPE *del_item) \
  77204. +{ \
  77205. + return PREFIX##_hash_remove_index (hash, \
  77206. + HASH_FUNC(hash, &del_item->KEY_NAME), del_item); \
  77207. +} \
  77208. + \
  77209. +static __inline__ int \
  77210. +PREFIX##_hash_remove_rcu (PREFIX##_hash_table *hash, \
  77211. + ITEM_TYPE *del_item) \
  77212. +{ \
  77213. + return PREFIX##_hash_remove (hash, del_item); \
  77214. +} \
  77215. + \
  77216. +static __inline__ void \
  77217. +PREFIX##_hash_insert (PREFIX##_hash_table *hash, \
  77218. + ITEM_TYPE *ins_item) \
  77219. +{ \
  77220. + return PREFIX##_hash_insert_index (hash, \
  77221. + HASH_FUNC(hash, &ins_item->KEY_NAME), ins_item); \
  77222. +} \
  77223. + \
  77224. +static __inline__ void \
  77225. +PREFIX##_hash_insert_rcu (PREFIX##_hash_table *hash, \
  77226. + ITEM_TYPE *ins_item) \
  77227. +{ \
  77228. + return PREFIX##_hash_insert_index_rcu (hash, HASH_FUNC(hash, &ins_item->KEY_NAME), \
  77229. + ins_item); \
  77230. +} \
  77231. + \
  77232. +static __inline__ ITEM_TYPE * \
  77233. +PREFIX##_hash_first (PREFIX##_hash_table *hash, __u32 ind) \
  77234. +{ \
  77235. + ITEM_TYPE *first; \
  77236. + \
  77237. + for (first = NULL; ind < hash->_buckets; ++ ind) { \
  77238. + first = hash->_table[ind]; \
  77239. + if (first != NULL) \
  77240. + break; \
  77241. + } \
  77242. + return first; \
  77243. +} \
  77244. + \
  77245. +static __inline__ ITEM_TYPE * \
  77246. +PREFIX##_hash_next (PREFIX##_hash_table *hash, \
  77247. + ITEM_TYPE *item) \
  77248. +{ \
  77249. + ITEM_TYPE *next; \
  77250. + \
  77251. + if (item == NULL) \
  77252. + return NULL; \
  77253. + next = item->LINK_NAME._next; \
  77254. + if (next == NULL) \
  77255. + next = PREFIX##_hash_first (hash, HASH_FUNC(hash, &item->KEY_NAME) + 1); \
  77256. + return next; \
  77257. +} \
  77258. + \
  77259. +typedef struct {} PREFIX##_hash_dummy
  77260. +
  77261. +#define for_all_ht_buckets(table, head) \
  77262. +for ((head) = &(table) -> _table[ 0 ] ; \
  77263. + (head) != &(table) -> _table[ (table) -> _buckets ] ; ++ (head))
  77264. +
  77265. +#define for_all_in_bucket(bucket, item, next, field) \
  77266. +for ((item) = *(bucket), (next) = (item) ? (item) -> field._next : NULL ; \
  77267. + (item) != NULL ; \
  77268. + (item) = (next), (next) = (item) ? (item) -> field._next : NULL )
  77269. +
  77270. +#define for_all_in_htable(table, prefix, item, next) \
  77271. +for ((item) = prefix ## _hash_first ((table), 0), \
  77272. + (next) = prefix ## _hash_next ((table), (item)) ; \
  77273. + (item) != NULL ; \
  77274. + (item) = (next), \
  77275. + (next) = prefix ## _hash_next ((table), (item)))
  77276. +
  77277. +/* __REISER4_TYPE_SAFE_HASH_H__ */
  77278. +#endif
  77279. +
  77280. +/* Make Linus happy.
  77281. + Local variables:
  77282. + c-indentation-style: "K&R"
  77283. + mode-name: "LC"
  77284. + c-basic-offset: 8
  77285. + tab-width: 8
  77286. + fill-column: 120
  77287. + End:
  77288. +*/
  77289. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/vfs_ops.c linux-4.14.2/fs/reiser4/vfs_ops.c
  77290. --- linux-4.14.2.orig/fs/reiser4/vfs_ops.c 1970-01-01 01:00:00.000000000 +0100
  77291. +++ linux-4.14.2/fs/reiser4/vfs_ops.c 2017-11-26 22:13:09.000000000 +0100
  77292. @@ -0,0 +1,260 @@
  77293. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  77294. + * reiser4/README */
  77295. +
  77296. +/* Interface to VFS. Reiser4 {super|export|dentry}_operations are defined
  77297. + here. */
  77298. +
  77299. +#include "forward.h"
  77300. +#include "debug.h"
  77301. +#include "dformat.h"
  77302. +#include "coord.h"
  77303. +#include "plugin/item/item.h"
  77304. +#include "plugin/file/file.h"
  77305. +#include "plugin/security/perm.h"
  77306. +#include "plugin/disk_format/disk_format.h"
  77307. +#include "plugin/plugin.h"
  77308. +#include "plugin/plugin_set.h"
  77309. +#include "plugin/object.h"
  77310. +#include "txnmgr.h"
  77311. +#include "jnode.h"
  77312. +#include "znode.h"
  77313. +#include "block_alloc.h"
  77314. +#include "tree.h"
  77315. +#include "vfs_ops.h"
  77316. +#include "inode.h"
  77317. +#include "page_cache.h"
  77318. +#include "ktxnmgrd.h"
  77319. +#include "super.h"
  77320. +#include "reiser4.h"
  77321. +#include "entd.h"
  77322. +#include "status_flags.h"
  77323. +#include "flush.h"
  77324. +#include "dscale.h"
  77325. +
  77326. +#include <linux/profile.h>
  77327. +#include <linux/types.h>
  77328. +#include <linux/mount.h>
  77329. +#include <linux/vfs.h>
  77330. +#include <linux/mm.h>
  77331. +#include <linux/buffer_head.h>
  77332. +#include <linux/dcache.h>
  77333. +#include <linux/list.h>
  77334. +#include <linux/pagemap.h>
  77335. +#include <linux/slab.h>
  77336. +#include <linux/seq_file.h>
  77337. +#include <linux/init.h>
  77338. +#include <linux/module.h>
  77339. +#include <linux/writeback.h>
  77340. +#include <linux/blkdev.h>
  77341. +#include <linux/security.h>
  77342. +#include <linux/reboot.h>
  77343. +#include <linux/rcupdate.h>
  77344. +
  77345. +/* update inode stat-data by calling plugin */
  77346. +int reiser4_update_sd(struct inode *object)
  77347. +{
  77348. + file_plugin *fplug;
  77349. +
  77350. + assert("nikita-2338", object != NULL);
  77351. + /* check for read-only file system. */
  77352. + if (IS_RDONLY(object))
  77353. + return 0;
  77354. +
  77355. + fplug = inode_file_plugin(object);
  77356. + assert("nikita-2339", fplug != NULL);
  77357. + return fplug->write_sd_by_inode(object);
  77358. +}
  77359. +
  77360. +/* helper function: increase inode nlink count and call plugin method to save
  77361. + updated stat-data.
  77362. +
  77363. + Used by link/create and during creation of dot and dotdot in mkdir
  77364. +*/
  77365. +int reiser4_add_nlink(struct inode *object /* object to which link is added */ ,
  77366. + struct inode *parent /* parent where new entry will be */
  77367. + ,
  77368. + int write_sd_p /* true if stat-data has to be
  77369. + * updated */ )
  77370. +{
  77371. + file_plugin *fplug;
  77372. + int result;
  77373. +
  77374. + assert("nikita-1351", object != NULL);
  77375. +
  77376. + fplug = inode_file_plugin(object);
  77377. + assert("nikita-1445", fplug != NULL);
  77378. +
  77379. + /* ask plugin whether it can add yet another link to this
  77380. + object */
  77381. + if (!fplug->can_add_link(object))
  77382. + return RETERR(-EMLINK);
  77383. +
  77384. + assert("nikita-2211", fplug->add_link != NULL);
  77385. + /* call plugin to do actual addition of link */
  77386. + result = fplug->add_link(object, parent);
  77387. +
  77388. + /* optionally update stat data */
  77389. + if (result == 0 && write_sd_p)
  77390. + result = fplug->write_sd_by_inode(object);
  77391. + return result;
  77392. +}
  77393. +
  77394. +/* helper function: decrease inode nlink count and call plugin method to save
  77395. + updated stat-data.
  77396. +
  77397. + Used by unlink/create
  77398. +*/
  77399. +int reiser4_del_nlink(struct inode *object /* object from which link is
  77400. + * removed */ ,
  77401. + struct inode *parent /* parent where entry was */ ,
  77402. + int write_sd_p /* true is stat-data has to be
  77403. + * updated */ )
  77404. +{
  77405. + file_plugin *fplug;
  77406. + int result;
  77407. +
  77408. + assert("nikita-1349", object != NULL);
  77409. +
  77410. + fplug = inode_file_plugin(object);
  77411. + assert("nikita-1350", fplug != NULL);
  77412. + assert("nikita-1446", object->i_nlink > 0);
  77413. + assert("nikita-2210", fplug->rem_link != NULL);
  77414. +
  77415. + /* call plugin to do actual deletion of link */
  77416. + result = fplug->rem_link(object, parent);
  77417. +
  77418. + /* optionally update stat data */
  77419. + if (result == 0 && write_sd_p)
  77420. + result = fplug->write_sd_by_inode(object);
  77421. + return result;
  77422. +}
  77423. +
  77424. +/* Release reiser4 dentry. This is d_op->d_release() method. */
  77425. +static void reiser4_d_release(struct dentry *dentry /* dentry released */ )
  77426. +{
  77427. + reiser4_free_dentry_fsdata(dentry);
  77428. +}
  77429. +
  77430. +/*
  77431. + * Called by reiser4_sync_inodes(), during speculative write-back (through
  77432. + * pdflush, or balance_dirty_pages()).
  77433. + */
  77434. +void reiser4_writeout(struct super_block *sb, struct writeback_control *wbc)
  77435. +{
  77436. + long written = 0;
  77437. + int repeats = 0;
  77438. + int result;
  77439. +
  77440. + /*
  77441. + * Performs early flushing, trying to free some memory. If there
  77442. + * is nothing to flush, commits some atoms.
  77443. + *
  77444. + * Commit all atoms if reiser4_writepages_dispatch() is called
  77445. + * from sys_sync() or sys_fsync()
  77446. + */
  77447. + if (wbc->sync_mode != WB_SYNC_NONE) {
  77448. + txnmgr_force_commit_all(sb, 0);
  77449. + return;
  77450. + }
  77451. +
  77452. + BUG_ON(reiser4_get_super_fake(sb) == NULL);
  77453. + do {
  77454. + long nr_submitted = 0;
  77455. + jnode *node = NULL;
  77456. +
  77457. + /* do not put more requests to overload write queue */
  77458. + if (bdi_write_congested(inode_to_bdi(reiser4_get_super_fake(sb)))) {
  77459. + //blk_flush_plug(current);
  77460. + break;
  77461. + }
  77462. + repeats++;
  77463. + BUG_ON(wbc->nr_to_write <= 0);
  77464. +
  77465. + if (get_current_context()->entd) {
  77466. + entd_context *ent = get_entd_context(sb);
  77467. +
  77468. + if (ent->cur_request->node)
  77469. + /*
  77470. + * this is ent thread and it managed to capture
  77471. + * requested page itself - start flush from
  77472. + * that page
  77473. + */
  77474. + node = ent->cur_request->node;
  77475. + }
  77476. +
  77477. + result = flush_some_atom(node, &nr_submitted, wbc,
  77478. + JNODE_FLUSH_WRITE_BLOCKS);
  77479. + if (result != 0)
  77480. + warning("nikita-31001", "Flush failed: %i", result);
  77481. + if (node)
  77482. + /* drop the reference aquired
  77483. + in find_or_create_extent() */
  77484. + jput(node);
  77485. + if (!nr_submitted)
  77486. + break;
  77487. +
  77488. + wbc->nr_to_write -= nr_submitted;
  77489. + written += nr_submitted;
  77490. + } while (wbc->nr_to_write > 0);
  77491. +}
  77492. +
  77493. +/* tell VM how many pages were dirtied */
  77494. +void reiser4_throttle_write(struct inode *inode)
  77495. +{
  77496. + reiser4_context *ctx;
  77497. +
  77498. + ctx = get_current_context();
  77499. + reiser4_txn_restart(ctx);
  77500. + current->journal_info = NULL;
  77501. + balance_dirty_pages_ratelimited(inode->i_mapping);
  77502. + current->journal_info = ctx;
  77503. +}
  77504. +
  77505. +const int REISER4_MAGIC_OFFSET = 16 * 4096; /* offset to magic string from the
  77506. + * beginning of device */
  77507. +
  77508. +/*
  77509. + * Reiser4 initialization/shutdown.
  77510. + *
  77511. + * Code below performs global reiser4 initialization that is done either as
  77512. + * part of kernel initialization (when reiser4 is statically built-in), or
  77513. + * during reiser4 module load (when compiled as module).
  77514. + */
  77515. +
  77516. +void reiser4_handle_error(void)
  77517. +{
  77518. + struct super_block *sb = reiser4_get_current_sb();
  77519. +
  77520. + if (!sb)
  77521. + return;
  77522. + reiser4_status_write(REISER4_STATUS_DAMAGED, 0,
  77523. + "Filesystem error occured");
  77524. + switch (get_super_private(sb)->onerror) {
  77525. + case 1:
  77526. + reiser4_panic("foobar-42", "Filesystem error occured\n");
  77527. + default:
  77528. + if (sb->s_flags & MS_RDONLY)
  77529. + return;
  77530. + sb->s_flags |= MS_RDONLY;
  77531. + break;
  77532. + }
  77533. +}
  77534. +
  77535. +struct dentry_operations reiser4_dentry_operations = {
  77536. + .d_revalidate = NULL,
  77537. + .d_hash = NULL,
  77538. + .d_compare = NULL,
  77539. + .d_delete = NULL,
  77540. + .d_release = reiser4_d_release,
  77541. + .d_iput = NULL,
  77542. +};
  77543. +
  77544. +/* Make Linus happy.
  77545. + Local variables:
  77546. + c-indentation-style: "K&R"
  77547. + mode-name: "LC"
  77548. + c-basic-offset: 8
  77549. + tab-width: 8
  77550. + fill-column: 120
  77551. + End:
  77552. +*/
  77553. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/vfs_ops.h linux-4.14.2/fs/reiser4/vfs_ops.h
  77554. --- linux-4.14.2.orig/fs/reiser4/vfs_ops.h 1970-01-01 01:00:00.000000000 +0100
  77555. +++ linux-4.14.2/fs/reiser4/vfs_ops.h 2017-11-26 22:13:09.000000000 +0100
  77556. @@ -0,0 +1,60 @@
  77557. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  77558. + * reiser4/README */
  77559. +
  77560. +/* vfs_ops.c's exported symbols */
  77561. +
  77562. +#if !defined( __FS_REISER4_VFS_OPS_H__ )
  77563. +#define __FS_REISER4_VFS_OPS_H__
  77564. +
  77565. +#include "forward.h"
  77566. +#include "coord.h"
  77567. +#include "seal.h"
  77568. +#include "plugin/file/file.h"
  77569. +#include "super.h"
  77570. +#include "readahead.h"
  77571. +
  77572. +#include <linux/types.h> /* for loff_t */
  77573. +#include <linux/fs.h> /* for struct address_space */
  77574. +#include <linux/dcache.h> /* for struct dentry */
  77575. +#include <linux/mm.h>
  77576. +#include <linux/backing-dev.h>
  77577. +
  77578. +/* address space operations */
  77579. +int reiser4_writepage(struct page *, struct writeback_control *);
  77580. +int reiser4_set_page_dirty(struct page *);
  77581. +void reiser4_invalidatepage(struct page *, unsigned int offset, unsigned int length);
  77582. +int reiser4_releasepage(struct page *, gfp_t);
  77583. +
  77584. +#ifdef CONFIG_MIGRATION
  77585. +int reiser4_migratepage(struct address_space *, struct page *,
  77586. + struct page *, enum migrate_mode);
  77587. +#else
  77588. +#define reiser4_migratepage NULL
  77589. +#endif /* CONFIG_MIGRATION */
  77590. +
  77591. +extern int reiser4_update_sd(struct inode *);
  77592. +extern int reiser4_add_nlink(struct inode *, struct inode *, int);
  77593. +extern int reiser4_del_nlink(struct inode *, struct inode *, int);
  77594. +
  77595. +extern int reiser4_start_up_io(struct page *page);
  77596. +extern void reiser4_throttle_write(struct inode *);
  77597. +extern int jnode_is_releasable(jnode *);
  77598. +
  77599. +#define CAPTURE_APAGE_BURST (1024l)
  77600. +void reiser4_writeout(struct super_block *, struct writeback_control *);
  77601. +
  77602. +extern void reiser4_handle_error(void);
  77603. +
  77604. +/* __FS_REISER4_VFS_OPS_H__ */
  77605. +#endif
  77606. +
  77607. +/* Make Linus happy.
  77608. + Local variables:
  77609. + c-indentation-style: "K&R"
  77610. + mode-name: "LC"
  77611. + c-basic-offset: 8
  77612. + tab-width: 8
  77613. + fill-column: 120
  77614. + scroll-step: 1
  77615. + End:
  77616. +*/
  77617. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/wander.c linux-4.14.2/fs/reiser4/wander.c
  77618. --- linux-4.14.2.orig/fs/reiser4/wander.c 1970-01-01 01:00:00.000000000 +0100
  77619. +++ linux-4.14.2/fs/reiser4/wander.c 2017-11-26 22:14:18.000000000 +0100
  77620. @@ -0,0 +1,1757 @@
  77621. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  77622. + * reiser4/README */
  77623. +
  77624. +/* Reiser4 Wandering Log */
  77625. +
  77626. +/* You should read http://www.namesys.com/txn-doc.html
  77627. +
  77628. + That describes how filesystem operations are performed as atomic
  77629. + transactions, and how we try to arrange it so that we can write most of the
  77630. + data only once while performing the operation atomically.
  77631. +
  77632. + For the purposes of this code, it is enough for it to understand that it
  77633. + has been told a given block should be written either once, or twice (if
  77634. + twice then once to the wandered location and once to the real location).
  77635. +
  77636. + This code guarantees that those blocks that are defined to be part of an
  77637. + atom either all take effect or none of them take effect.
  77638. +
  77639. + The "relocate set" of nodes are submitted to write by the jnode_flush()
  77640. + routine, and the "overwrite set" is submitted by reiser4_write_log().
  77641. + This is because with the overwrite set we seek to optimize writes, and
  77642. + with the relocate set we seek to cause disk order to correlate with the
  77643. + "parent first order" (preorder).
  77644. +
  77645. + reiser4_write_log() allocates and writes wandered blocks and maintains
  77646. + additional on-disk structures of the atom as wander records (each wander
  77647. + record occupies one block) for storing of the "wandered map" (a table which
  77648. + contains a relation between wandered and real block numbers) and other
  77649. + information which might be needed at transaction recovery time.
  77650. +
  77651. + The wander records are unidirectionally linked into a circle: each wander
  77652. + record contains a block number of the next wander record, the last wander
  77653. + record points to the first one.
  77654. +
  77655. + One wander record (named "tx head" in this file) has a format which is
  77656. + different from the other wander records. The "tx head" has a reference to the
  77657. + "tx head" block of the previously committed atom. Also, "tx head" contains
  77658. + fs information (the free blocks counter, and the oid allocator state) which
  77659. + is logged in a special way .
  77660. +
  77661. + There are two journal control blocks, named journal header and journal
  77662. + footer which have fixed on-disk locations. The journal header has a
  77663. + reference to the "tx head" block of the last committed atom. The journal
  77664. + footer points to the "tx head" of the last flushed atom. The atom is
  77665. + "played" when all blocks from its overwrite set are written to disk the
  77666. + second time (i.e. written to their real locations).
  77667. +
  77668. + NOTE: People who know reiserfs internals and its journal structure might be
  77669. + confused with these terms journal footer and journal header. There is a table
  77670. + with terms of similar semantics in reiserfs (reiser3) and reiser4:
  77671. +
  77672. + REISER3 TERM | REISER4 TERM | DESCRIPTION
  77673. + --------------------+-----------------------+----------------------------
  77674. + commit record | journal header | atomic write of this record
  77675. + | | ends transaction commit
  77676. + --------------------+-----------------------+----------------------------
  77677. + journal header | journal footer | atomic write of this record
  77678. + | | ends post-commit writes.
  77679. + | | After successful
  77680. + | | writing of this journal
  77681. + | | blocks (in reiser3) or
  77682. + | | wandered blocks/records are
  77683. + | | free for re-use.
  77684. + --------------------+-----------------------+----------------------------
  77685. +
  77686. + The atom commit process is the following:
  77687. +
  77688. + 1. The overwrite set is taken from atom's clean list, and its size is
  77689. + counted.
  77690. +
  77691. + 2. The number of necessary wander records (including tx head) is calculated,
  77692. + and the wander record blocks are allocated.
  77693. +
  77694. + 3. Allocate wandered blocks and populate wander records by wandered map.
  77695. +
  77696. + 4. submit write requests for wander records and wandered blocks.
  77697. +
  77698. + 5. wait until submitted write requests complete.
  77699. +
  77700. + 6. update journal header: change the pointer to the block number of just
  77701. + written tx head, submit an i/o for modified journal header block and wait
  77702. + for i/o completion.
  77703. +
  77704. + NOTE: The special logging for bitmap blocks and some reiser4 super block
  77705. + fields makes processes of atom commit, flush and recovering a bit more
  77706. + complex (see comments in the source code for details).
  77707. +
  77708. + The atom playing process is the following:
  77709. +
  77710. + 1. Write atom's overwrite set in-place.
  77711. +
  77712. + 2. Wait on i/o.
  77713. +
  77714. + 3. Update journal footer: change the pointer to block number of tx head
  77715. + block of the atom we currently flushing, submit an i/o, wait on i/o
  77716. + completion.
  77717. +
  77718. + 4. Free disk space which was used for wandered blocks and wander records.
  77719. +
  77720. + After the freeing of wandered blocks and wander records we have that journal
  77721. + footer points to the on-disk structure which might be overwritten soon.
  77722. + Neither the log writer nor the journal recovery procedure use that pointer
  77723. + for accessing the data. When the journal recovery procedure finds the oldest
  77724. + transaction it compares the journal footer pointer value with the "prev_tx"
  77725. + pointer value in tx head, if values are equal the oldest not flushed
  77726. + transaction is found.
  77727. +
  77728. + NOTE on disk space leakage: the information about of what blocks and how many
  77729. + blocks are allocated for wandered blocks, wandered records is not written to
  77730. + the disk because of special logging for bitmaps and some super blocks
  77731. + counters. After a system crash we the reiser4 does not remember those
  77732. + objects allocation, thus we have no such a kind of disk space leakage.
  77733. +*/
  77734. +
  77735. +/* Special logging of reiser4 super block fields. */
  77736. +
  77737. +/* There are some reiser4 super block fields (free block count and OID allocator
  77738. + state (number of files and next free OID) which are logged separately from
  77739. + super block to avoid unnecessary atom fusion.
  77740. +
  77741. + So, the reiser4 super block can be not captured by a transaction with
  77742. + allocates/deallocates disk blocks or create/delete file objects. Moreover,
  77743. + the reiser4 on-disk super block is not touched when such a transaction is
  77744. + committed and flushed. Those "counters logged specially" are logged in "tx
  77745. + head" blocks and in the journal footer block.
  77746. +
  77747. + A step-by-step description of special logging:
  77748. +
  77749. + 0. The per-atom information about deleted or created files and allocated or
  77750. + freed blocks is collected during the transaction. The atom's
  77751. + ->nr_objects_created and ->nr_objects_deleted are for object
  77752. + deletion/creation tracking, the numbers of allocated and freed blocks are
  77753. + calculated using atom's delete set and atom's capture list -- all new and
  77754. + relocated nodes should be on atom's clean list and should have JNODE_RELOC
  77755. + bit set.
  77756. +
  77757. + 1. The "logged specially" reiser4 super block fields have their "committed"
  77758. + versions in the reiser4 in-memory super block. They get modified only at
  77759. + atom commit time. The atom's commit thread has an exclusive access to those
  77760. + "committed" fields because the log writer implementation supports only one
  77761. + atom commit a time (there is a per-fs "commit" mutex). At
  77762. + that time "committed" counters are modified using per-atom information
  77763. + collected during the transaction. These counters are stored on disk as a
  77764. + part of tx head block when atom is committed.
  77765. +
  77766. + 2. When the atom is flushed the value of the free block counter and the OID
  77767. + allocator state get written to the journal footer block. A special journal
  77768. + procedure (journal_recover_sb_data()) takes those values from the journal
  77769. + footer and updates the reiser4 in-memory super block.
  77770. +
  77771. + NOTE: That means free block count and OID allocator state are logged
  77772. + separately from the reiser4 super block regardless of the fact that the
  77773. + reiser4 super block has fields to store both the free block counter and the
  77774. + OID allocator.
  77775. +
  77776. + Writing the whole super block at commit time requires knowing true values of
  77777. + all its fields without changes made by not yet committed transactions. It is
  77778. + possible by having their "committed" version of the super block like the
  77779. + reiser4 bitmap blocks have "committed" and "working" versions. However,
  77780. + another scheme was implemented which stores special logged values in the
  77781. + unused free space inside transaction head block. In my opinion it has an
  77782. + advantage of not writing whole super block when only part of it was
  77783. + modified. */
  77784. +
  77785. +#include "debug.h"
  77786. +#include "dformat.h"
  77787. +#include "txnmgr.h"
  77788. +#include "jnode.h"
  77789. +#include "znode.h"
  77790. +#include "block_alloc.h"
  77791. +#include "page_cache.h"
  77792. +#include "wander.h"
  77793. +#include "reiser4.h"
  77794. +#include "super.h"
  77795. +#include "vfs_ops.h"
  77796. +#include "writeout.h"
  77797. +#include "inode.h"
  77798. +#include "entd.h"
  77799. +
  77800. +#include <linux/types.h>
  77801. +#include <linux/fs.h> /* for struct super_block */
  77802. +#include <linux/mm.h> /* for struct page */
  77803. +#include <linux/pagemap.h>
  77804. +#include <linux/bio.h> /* for struct bio */
  77805. +#include <linux/blkdev.h>
  77806. +
  77807. +static int write_jnodes_to_disk_extent(
  77808. + jnode *, int, const reiser4_block_nr *, flush_queue_t *, int);
  77809. +
  77810. +/* The commit_handle is a container for objects needed at atom commit time */
  77811. +struct commit_handle {
  77812. + /* A pointer to atom's list of OVRWR nodes */
  77813. + struct list_head *overwrite_set;
  77814. + /* atom's overwrite set size */
  77815. + int overwrite_set_size;
  77816. + /* jnodes for wander record blocks */
  77817. + struct list_head tx_list;
  77818. + /* number of wander records */
  77819. + __u32 tx_size;
  77820. + /* 'committed' sb counters are saved here until atom is completely
  77821. + flushed */
  77822. + __u64 free_blocks;
  77823. + __u64 nr_files;
  77824. + __u64 next_oid;
  77825. + /* A pointer to the atom which is being committed */
  77826. + txn_atom *atom;
  77827. + /* A pointer to current super block */
  77828. + struct super_block *super;
  77829. + /* The counter of modified bitmaps */
  77830. + reiser4_block_nr nr_bitmap;
  77831. +};
  77832. +
  77833. +static void init_commit_handle(struct commit_handle *ch, txn_atom *atom)
  77834. +{
  77835. + memset(ch, 0, sizeof(struct commit_handle));
  77836. + INIT_LIST_HEAD(&ch->tx_list);
  77837. +
  77838. + ch->atom = atom;
  77839. + ch->super = reiser4_get_current_sb();
  77840. +}
  77841. +
  77842. +static void done_commit_handle(struct commit_handle *ch)
  77843. +{
  77844. + assert("zam-690", list_empty(&ch->tx_list));
  77845. +}
  77846. +
  77847. +/* fill journal header block data */
  77848. +static void format_journal_header(struct commit_handle *ch)
  77849. +{
  77850. + struct reiser4_super_info_data *sbinfo;
  77851. + struct journal_header *header;
  77852. + jnode *txhead;
  77853. +
  77854. + sbinfo = get_super_private(ch->super);
  77855. + assert("zam-479", sbinfo != NULL);
  77856. + assert("zam-480", sbinfo->journal_header != NULL);
  77857. +
  77858. + txhead = list_entry(ch->tx_list.next, jnode, capture_link);
  77859. +
  77860. + jload(sbinfo->journal_header);
  77861. +
  77862. + header = (struct journal_header *)jdata(sbinfo->journal_header);
  77863. + assert("zam-484", header != NULL);
  77864. +
  77865. + put_unaligned(cpu_to_le64(*jnode_get_block(txhead)),
  77866. + &header->last_committed_tx);
  77867. +
  77868. + jrelse(sbinfo->journal_header);
  77869. +}
  77870. +
  77871. +/* fill journal footer block data */
  77872. +static void format_journal_footer(struct commit_handle *ch)
  77873. +{
  77874. + struct reiser4_super_info_data *sbinfo;
  77875. + struct journal_footer *footer;
  77876. + jnode *tx_head;
  77877. +
  77878. + sbinfo = get_super_private(ch->super);
  77879. +
  77880. + tx_head = list_entry(ch->tx_list.next, jnode, capture_link);
  77881. +
  77882. + assert("zam-493", sbinfo != NULL);
  77883. + assert("zam-494", sbinfo->journal_header != NULL);
  77884. +
  77885. + check_me("zam-691", jload(sbinfo->journal_footer) == 0);
  77886. +
  77887. + footer = (struct journal_footer *)jdata(sbinfo->journal_footer);
  77888. + assert("zam-495", footer != NULL);
  77889. +
  77890. + put_unaligned(cpu_to_le64(*jnode_get_block(tx_head)),
  77891. + &footer->last_flushed_tx);
  77892. + put_unaligned(cpu_to_le64(ch->free_blocks), &footer->free_blocks);
  77893. +
  77894. + put_unaligned(cpu_to_le64(ch->nr_files), &footer->nr_files);
  77895. + put_unaligned(cpu_to_le64(ch->next_oid), &footer->next_oid);
  77896. +
  77897. + jrelse(sbinfo->journal_footer);
  77898. +}
  77899. +
  77900. +/* wander record capacity depends on current block size */
  77901. +static int wander_record_capacity(const struct super_block *super)
  77902. +{
  77903. + return (super->s_blocksize -
  77904. + sizeof(struct wander_record_header)) /
  77905. + sizeof(struct wander_entry);
  77906. +}
  77907. +
  77908. +/* Fill first wander record (tx head) in accordance with supplied given data */
  77909. +static void format_tx_head(struct commit_handle *ch)
  77910. +{
  77911. + jnode *tx_head;
  77912. + jnode *next;
  77913. + struct tx_header *header;
  77914. +
  77915. + tx_head = list_entry(ch->tx_list.next, jnode, capture_link);
  77916. + assert("zam-692", &ch->tx_list != &tx_head->capture_link);
  77917. +
  77918. + next = list_entry(tx_head->capture_link.next, jnode, capture_link);
  77919. + if (&ch->tx_list == &next->capture_link)
  77920. + next = tx_head;
  77921. +
  77922. + header = (struct tx_header *)jdata(tx_head);
  77923. +
  77924. + assert("zam-460", header != NULL);
  77925. + assert("zam-462", ch->super->s_blocksize >= sizeof(struct tx_header));
  77926. +
  77927. + memset(jdata(tx_head), 0, (size_t) ch->super->s_blocksize);
  77928. + memcpy(jdata(tx_head), TX_HEADER_MAGIC, TX_HEADER_MAGIC_SIZE);
  77929. +
  77930. + put_unaligned(cpu_to_le32(ch->tx_size), &header->total);
  77931. + put_unaligned(cpu_to_le64(get_super_private(ch->super)->last_committed_tx),
  77932. + &header->prev_tx);
  77933. + put_unaligned(cpu_to_le64(*jnode_get_block(next)), &header->next_block);
  77934. + put_unaligned(cpu_to_le64(ch->free_blocks), &header->free_blocks);
  77935. + put_unaligned(cpu_to_le64(ch->nr_files), &header->nr_files);
  77936. + put_unaligned(cpu_to_le64(ch->next_oid), &header->next_oid);
  77937. +}
  77938. +
  77939. +/* prepare ordinary wander record block (fill all service fields) */
  77940. +static void
  77941. +format_wander_record(struct commit_handle *ch, jnode *node, __u32 serial)
  77942. +{
  77943. + struct wander_record_header *LRH;
  77944. + jnode *next;
  77945. +
  77946. + assert("zam-464", node != NULL);
  77947. +
  77948. + LRH = (struct wander_record_header *)jdata(node);
  77949. + next = list_entry(node->capture_link.next, jnode, capture_link);
  77950. +
  77951. + if (&ch->tx_list == &next->capture_link)
  77952. + next = list_entry(ch->tx_list.next, jnode, capture_link);
  77953. +
  77954. + assert("zam-465", LRH != NULL);
  77955. + assert("zam-463",
  77956. + ch->super->s_blocksize > sizeof(struct wander_record_header));
  77957. +
  77958. + memset(jdata(node), 0, (size_t) ch->super->s_blocksize);
  77959. + memcpy(jdata(node), WANDER_RECORD_MAGIC, WANDER_RECORD_MAGIC_SIZE);
  77960. +
  77961. + put_unaligned(cpu_to_le32(ch->tx_size), &LRH->total);
  77962. + put_unaligned(cpu_to_le32(serial), &LRH->serial);
  77963. + put_unaligned(cpu_to_le64(*jnode_get_block(next)), &LRH->next_block);
  77964. +}
  77965. +
  77966. +/* add one wandered map entry to formatted wander record */
  77967. +static void
  77968. +store_entry(jnode * node, int index, const reiser4_block_nr * a,
  77969. + const reiser4_block_nr * b)
  77970. +{
  77971. + char *data;
  77972. + struct wander_entry *pairs;
  77973. +
  77974. + data = jdata(node);
  77975. + assert("zam-451", data != NULL);
  77976. +
  77977. + pairs =
  77978. + (struct wander_entry *)(data + sizeof(struct wander_record_header));
  77979. +
  77980. + put_unaligned(cpu_to_le64(*a), &pairs[index].original);
  77981. + put_unaligned(cpu_to_le64(*b), &pairs[index].wandered);
  77982. +}
  77983. +
  77984. +/* currently, wander records contains contain only wandered map, which depend on
  77985. + overwrite set size */
  77986. +static void get_tx_size(struct commit_handle *ch)
  77987. +{
  77988. + assert("zam-440", ch->overwrite_set_size != 0);
  77989. + assert("zam-695", ch->tx_size == 0);
  77990. +
  77991. + /* count all ordinary wander records
  77992. + (<overwrite_set_size> - 1) / <wander_record_capacity> + 1 and add one
  77993. + for tx head block */
  77994. + ch->tx_size =
  77995. + (ch->overwrite_set_size - 1) / wander_record_capacity(ch->super) +
  77996. + 2;
  77997. +}
  77998. +
  77999. +/* A special structure for using in store_wmap_actor() for saving its state
  78000. + between calls */
  78001. +struct store_wmap_params {
  78002. + jnode *cur; /* jnode of current wander record to fill */
  78003. + int idx; /* free element index in wander record */
  78004. + int capacity; /* capacity */
  78005. +
  78006. +#if REISER4_DEBUG
  78007. + struct list_head *tx_list;
  78008. +#endif
  78009. +};
  78010. +
  78011. +/* an actor for use in blocknr_set_iterator routine which populates the list
  78012. + of pre-formatted wander records by wandered map info */
  78013. +static int
  78014. +store_wmap_actor(txn_atom * atom UNUSED_ARG, const reiser4_block_nr * a,
  78015. + const reiser4_block_nr * b, void *data)
  78016. +{
  78017. + struct store_wmap_params *params = data;
  78018. +
  78019. + if (params->idx >= params->capacity) {
  78020. + /* a new wander record should be taken from the tx_list */
  78021. + params->cur = list_entry(params->cur->capture_link.next, jnode, capture_link);
  78022. + assert("zam-454",
  78023. + params->tx_list != &params->cur->capture_link);
  78024. +
  78025. + params->idx = 0;
  78026. + }
  78027. +
  78028. + store_entry(params->cur, params->idx, a, b);
  78029. + params->idx++;
  78030. +
  78031. + return 0;
  78032. +}
  78033. +
  78034. +/* This function is called after Relocate set gets written to disk, Overwrite
  78035. + set is written to wandered locations and all wander records are written
  78036. + also. Updated journal header blocks contains a pointer (block number) to
  78037. + first wander record of the just written transaction */
  78038. +static int update_journal_header(struct commit_handle *ch)
  78039. +{
  78040. + struct reiser4_super_info_data *sbinfo = get_super_private(ch->super);
  78041. + jnode *jh = sbinfo->journal_header;
  78042. + jnode *head = list_entry(ch->tx_list.next, jnode, capture_link);
  78043. + int ret;
  78044. +
  78045. + format_journal_header(ch);
  78046. +
  78047. + ret = write_jnodes_to_disk_extent(jh, 1, jnode_get_block(jh), NULL,
  78048. + WRITEOUT_FLUSH_FUA);
  78049. + if (ret)
  78050. + return ret;
  78051. +
  78052. + /* blk_run_address_space(sbinfo->fake->i_mapping);
  78053. + * blk_run_queues(); */
  78054. +
  78055. + ret = jwait_io(jh, WRITE);
  78056. +
  78057. + if (ret)
  78058. + return ret;
  78059. +
  78060. + sbinfo->last_committed_tx = *jnode_get_block(head);
  78061. +
  78062. + return 0;
  78063. +}
  78064. +
  78065. +/* This function is called after write-back is finished. We update journal
  78066. + footer block and free blocks which were occupied by wandered blocks and
  78067. + transaction wander records */
  78068. +static int update_journal_footer(struct commit_handle *ch)
  78069. +{
  78070. + reiser4_super_info_data *sbinfo = get_super_private(ch->super);
  78071. +
  78072. + jnode *jf = sbinfo->journal_footer;
  78073. +
  78074. + int ret;
  78075. +
  78076. + format_journal_footer(ch);
  78077. +
  78078. + ret = write_jnodes_to_disk_extent(jf, 1, jnode_get_block(jf), NULL,
  78079. + WRITEOUT_FLUSH_FUA);
  78080. + if (ret)
  78081. + return ret;
  78082. +
  78083. + /* blk_run_address_space(sbinfo->fake->i_mapping);
  78084. + * blk_run_queue(); */
  78085. +
  78086. + ret = jwait_io(jf, WRITE);
  78087. + if (ret)
  78088. + return ret;
  78089. +
  78090. + return 0;
  78091. +}
  78092. +
  78093. +/* free block numbers of wander records of already written in place transaction */
  78094. +static void dealloc_tx_list(struct commit_handle *ch)
  78095. +{
  78096. + while (!list_empty(&ch->tx_list)) {
  78097. + jnode *cur = list_entry(ch->tx_list.next, jnode, capture_link);
  78098. + list_del(&cur->capture_link);
  78099. + ON_DEBUG(INIT_LIST_HEAD(&cur->capture_link));
  78100. + reiser4_dealloc_block(jnode_get_block(cur), 0,
  78101. + BA_DEFER | BA_FORMATTED);
  78102. +
  78103. + unpin_jnode_data(cur);
  78104. + reiser4_drop_io_head(cur);
  78105. + }
  78106. +}
  78107. +
  78108. +/* An actor for use in block_nr_iterator() routine which frees wandered blocks
  78109. + from atom's overwrite set. */
  78110. +static int
  78111. +dealloc_wmap_actor(txn_atom * atom UNUSED_ARG,
  78112. + const reiser4_block_nr * a UNUSED_ARG,
  78113. + const reiser4_block_nr * b, void *data UNUSED_ARG)
  78114. +{
  78115. +
  78116. + assert("zam-499", b != NULL);
  78117. + assert("zam-500", *b != 0);
  78118. + assert("zam-501", !reiser4_blocknr_is_fake(b));
  78119. +
  78120. + reiser4_dealloc_block(b, 0, BA_DEFER | BA_FORMATTED);
  78121. + return 0;
  78122. +}
  78123. +
  78124. +/* free wandered block locations of already written in place transaction */
  78125. +static void dealloc_wmap(struct commit_handle *ch)
  78126. +{
  78127. + assert("zam-696", ch->atom != NULL);
  78128. +
  78129. + blocknr_set_iterator(ch->atom, &ch->atom->wandered_map,
  78130. + dealloc_wmap_actor, NULL, 1);
  78131. +}
  78132. +
  78133. +/* helper function for alloc wandered blocks, which refill set of block
  78134. + numbers needed for wandered blocks */
  78135. +static int
  78136. +get_more_wandered_blocks(int count, reiser4_block_nr * start, int *len)
  78137. +{
  78138. + reiser4_blocknr_hint hint;
  78139. + int ret;
  78140. +
  78141. + reiser4_block_nr wide_len = count;
  78142. +
  78143. + /* FIXME-ZAM: A special policy needed for allocation of wandered blocks
  78144. + ZAM-FIXME-HANS: yes, what happened to our discussion of using a fixed
  78145. + reserved allocation area so as to get the best qualities of fixed
  78146. + journals? */
  78147. + reiser4_blocknr_hint_init(&hint);
  78148. + hint.block_stage = BLOCK_GRABBED;
  78149. +
  78150. + ret = reiser4_alloc_blocks(&hint, start, &wide_len,
  78151. + BA_FORMATTED | BA_USE_DEFAULT_SEARCH_START);
  78152. + *len = (int)wide_len;
  78153. +
  78154. + return ret;
  78155. +}
  78156. +
  78157. +/*
  78158. + * roll back changes made before issuing BIO in the case of IO error.
  78159. + */
  78160. +static void undo_bio(struct bio *bio)
  78161. +{
  78162. + int i;
  78163. +
  78164. + for (i = 0; i < bio->bi_vcnt; ++i) {
  78165. + struct page *pg;
  78166. + jnode *node;
  78167. +
  78168. + pg = bio->bi_io_vec[i].bv_page;
  78169. + end_page_writeback(pg);
  78170. + node = jprivate(pg);
  78171. + spin_lock_jnode(node);
  78172. + JF_CLR(node, JNODE_WRITEBACK);
  78173. + JF_SET(node, JNODE_DIRTY);
  78174. + spin_unlock_jnode(node);
  78175. + }
  78176. + bio_put(bio);
  78177. +}
  78178. +
  78179. +/* put overwrite set back to atom's clean list */
  78180. +static void put_overwrite_set(struct commit_handle *ch)
  78181. +{
  78182. + jnode *cur;
  78183. +
  78184. + list_for_each_entry(cur, ch->overwrite_set, capture_link)
  78185. + jrelse_tail(cur);
  78186. +}
  78187. +
  78188. +/* Count overwrite set size, grab disk space for wandered blocks allocation.
  78189. + Since we have a separate list for atom's overwrite set we just scan the list,
  78190. + count bitmap and other not leaf nodes which wandered blocks allocation we
  78191. + have to grab space for. */
  78192. +static int get_overwrite_set(struct commit_handle *ch)
  78193. +{
  78194. + int ret;
  78195. + jnode *cur;
  78196. + __u64 nr_not_leaves = 0;
  78197. +#if REISER4_DEBUG
  78198. + __u64 nr_formatted_leaves = 0;
  78199. + __u64 nr_unformatted_leaves = 0;
  78200. +#endif
  78201. +
  78202. + assert("zam-697", ch->overwrite_set_size == 0);
  78203. +
  78204. + ch->overwrite_set = ATOM_OVRWR_LIST(ch->atom);
  78205. + cur = list_entry(ch->overwrite_set->next, jnode, capture_link);
  78206. +
  78207. + while (ch->overwrite_set != &cur->capture_link) {
  78208. + jnode *next = list_entry(cur->capture_link.next, jnode, capture_link);
  78209. +
  78210. + /* Count bitmap locks for getting correct statistics what number
  78211. + * of blocks were cleared by the transaction commit. */
  78212. + if (jnode_get_type(cur) == JNODE_BITMAP)
  78213. + ch->nr_bitmap++;
  78214. +
  78215. + assert("zam-939", JF_ISSET(cur, JNODE_OVRWR)
  78216. + || jnode_get_type(cur) == JNODE_BITMAP);
  78217. +
  78218. + if (jnode_is_znode(cur) && znode_above_root(JZNODE(cur))) {
  78219. + /* we replace fake znode by another (real)
  78220. + znode which is suggested by disk_layout
  78221. + plugin */
  78222. +
  78223. + /* FIXME: it looks like fake znode should be
  78224. + replaced by jnode supplied by
  78225. + disk_layout. */
  78226. +
  78227. + struct super_block *s = reiser4_get_current_sb();
  78228. + reiser4_super_info_data *sbinfo =
  78229. + get_current_super_private();
  78230. +
  78231. + if (sbinfo->df_plug->log_super) {
  78232. + jnode *sj = sbinfo->df_plug->log_super(s);
  78233. +
  78234. + assert("zam-593", sj != NULL);
  78235. +
  78236. + if (IS_ERR(sj))
  78237. + return PTR_ERR(sj);
  78238. +
  78239. + spin_lock_jnode(sj);
  78240. + JF_SET(sj, JNODE_OVRWR);
  78241. + insert_into_atom_ovrwr_list(ch->atom, sj);
  78242. + spin_unlock_jnode(sj);
  78243. +
  78244. + /* jload it as the rest of overwrite set */
  78245. + jload_gfp(sj, reiser4_ctx_gfp_mask_get(), 0);
  78246. +
  78247. + ch->overwrite_set_size++;
  78248. + }
  78249. + spin_lock_jnode(cur);
  78250. + reiser4_uncapture_block(cur);
  78251. + jput(cur);
  78252. +
  78253. + } else {
  78254. + int ret;
  78255. + ch->overwrite_set_size++;
  78256. + ret = jload_gfp(cur, reiser4_ctx_gfp_mask_get(), 0);
  78257. + if (ret)
  78258. + reiser4_panic("zam-783",
  78259. + "cannot load e-flushed jnode back (ret = %d)\n",
  78260. + ret);
  78261. + }
  78262. +
  78263. + /* Count not leaves here because we have to grab disk space
  78264. + * for wandered blocks. They were not counted as "flush
  78265. + * reserved". Counting should be done _after_ nodes are pinned
  78266. + * into memory by jload(). */
  78267. + if (!jnode_is_leaf(cur))
  78268. + nr_not_leaves++;
  78269. + else {
  78270. +#if REISER4_DEBUG
  78271. + /* at this point @cur either has JNODE_FLUSH_RESERVED
  78272. + * or is eflushed. Locking is not strong enough to
  78273. + * write an assertion checking for this. */
  78274. + if (jnode_is_znode(cur))
  78275. + nr_formatted_leaves++;
  78276. + else
  78277. + nr_unformatted_leaves++;
  78278. +#endif
  78279. + JF_CLR(cur, JNODE_FLUSH_RESERVED);
  78280. + }
  78281. +
  78282. + cur = next;
  78283. + }
  78284. +
  78285. + /* Grab space for writing (wandered blocks) of not leaves found in
  78286. + * overwrite set. */
  78287. + ret = reiser4_grab_space_force(nr_not_leaves, BA_RESERVED);
  78288. + if (ret)
  78289. + return ret;
  78290. +
  78291. + /* Disk space for allocation of wandered blocks of leaf nodes already
  78292. + * reserved as "flush reserved", move it to grabbed space counter. */
  78293. + spin_lock_atom(ch->atom);
  78294. + assert("zam-940",
  78295. + nr_formatted_leaves + nr_unformatted_leaves <=
  78296. + ch->atom->flush_reserved);
  78297. + flush_reserved2grabbed(ch->atom, ch->atom->flush_reserved);
  78298. + spin_unlock_atom(ch->atom);
  78299. +
  78300. + return ch->overwrite_set_size;
  78301. +}
  78302. +
  78303. +/**
  78304. + * write_jnodes_to_disk_extent - submit write request
  78305. + * @head:
  78306. + * @first: first jnode of the list
  78307. + * @nr: number of jnodes on the list
  78308. + * @block_p:
  78309. + * @fq:
  78310. + * @flags: used to decide whether page is to get PG_reclaim flag
  78311. + *
  78312. + * Submits a write request for @nr jnodes beginning from the @first, other
  78313. + * jnodes are after the @first on the double-linked "capture" list. All jnodes
  78314. + * will be written to the disk region of @nr blocks starting with @block_p block
  78315. + * number. If @fq is not NULL it means that waiting for i/o completion will be
  78316. + * done more efficiently by using flush_queue_t objects.
  78317. + * This function is the one which writes list of jnodes in batch mode. It does
  78318. + * all low-level things as bio construction and page states manipulation.
  78319. + *
  78320. + * ZAM-FIXME-HANS: brief me on why this function exists, and why bios are
  78321. + * aggregated in this function instead of being left to the layers below
  78322. + *
  78323. + * FIXME: ZAM->HANS: What layer are you talking about? Can you point me to that?
  78324. + * Why that layer needed? Why BIOs cannot be constructed here?
  78325. + */
  78326. +static int write_jnodes_to_disk_extent(
  78327. + jnode *first, int nr, const reiser4_block_nr *block_p,
  78328. + flush_queue_t *fq, int flags)
  78329. +{
  78330. + struct super_block *super = reiser4_get_current_sb();
  78331. + int op_flags = (flags & WRITEOUT_FLUSH_FUA) ? REQ_PREFLUSH | REQ_FUA : 0;
  78332. + jnode *cur = first;
  78333. + reiser4_block_nr block;
  78334. +
  78335. + assert("zam-571", first != NULL);
  78336. + assert("zam-572", block_p != NULL);
  78337. + assert("zam-570", nr > 0);
  78338. +
  78339. + block = *block_p;
  78340. +
  78341. + while (nr > 0) {
  78342. + struct bio *bio;
  78343. + int nr_blocks = min(nr, BIO_MAX_PAGES);
  78344. + int i;
  78345. + int nr_used;
  78346. +
  78347. + bio = bio_alloc(GFP_NOIO, nr_blocks);
  78348. + if (!bio)
  78349. + return RETERR(-ENOMEM);
  78350. +
  78351. + bio_set_dev(bio, super->s_bdev);
  78352. + bio->bi_iter.bi_sector = block * (super->s_blocksize >> 9);
  78353. + for (nr_used = 0, i = 0; i < nr_blocks; i++) {
  78354. + struct page *pg;
  78355. +
  78356. + pg = jnode_page(cur);
  78357. + assert("zam-573", pg != NULL);
  78358. +
  78359. + get_page(pg);
  78360. +
  78361. + lock_and_wait_page_writeback(pg);
  78362. +
  78363. + if (!bio_add_page(bio, pg, super->s_blocksize, 0)) {
  78364. + /*
  78365. + * underlying device is satiated. Stop adding
  78366. + * pages to the bio.
  78367. + */
  78368. + unlock_page(pg);
  78369. + put_page(pg);
  78370. + break;
  78371. + }
  78372. +
  78373. + spin_lock_jnode(cur);
  78374. + assert("nikita-3166",
  78375. + pg->mapping == jnode_get_mapping(cur));
  78376. + assert("zam-912", !JF_ISSET(cur, JNODE_WRITEBACK));
  78377. +#if REISER4_DEBUG
  78378. + spin_lock(&cur->load);
  78379. + assert("nikita-3165", !jnode_is_releasable(cur));
  78380. + spin_unlock(&cur->load);
  78381. +#endif
  78382. + JF_SET(cur, JNODE_WRITEBACK);
  78383. + JF_CLR(cur, JNODE_DIRTY);
  78384. + ON_DEBUG(cur->written++);
  78385. +
  78386. + assert("edward-1647",
  78387. + ergo(jnode_is_znode(cur), JF_ISSET(cur, JNODE_PARSED)));
  78388. + spin_unlock_jnode(cur);
  78389. + /*
  78390. + * update checksum
  78391. + */
  78392. + if (jnode_is_znode(cur)) {
  78393. + zload(JZNODE(cur));
  78394. + if (node_plugin_by_node(JZNODE(cur))->csum)
  78395. + node_plugin_by_node(JZNODE(cur))->csum(JZNODE(cur), 0);
  78396. + zrelse(JZNODE(cur));
  78397. + }
  78398. + ClearPageError(pg);
  78399. + set_page_writeback(pg);
  78400. +
  78401. + if (get_current_context()->entd) {
  78402. + /* this is ent thread */
  78403. + entd_context *ent = get_entd_context(super);
  78404. + struct wbq *rq, *next;
  78405. +
  78406. + spin_lock(&ent->guard);
  78407. +
  78408. + if (pg == ent->cur_request->page) {
  78409. + /*
  78410. + * entd is called for this page. This
  78411. + * request is not in th etodo list
  78412. + */
  78413. + ent->cur_request->written = 1;
  78414. + } else {
  78415. + /*
  78416. + * if we have written a page for which writepage
  78417. + * is called for - move request to another list.
  78418. + */
  78419. + list_for_each_entry_safe(rq, next, &ent->todo_list, link) {
  78420. + assert("", rq->magic == WBQ_MAGIC);
  78421. + if (pg == rq->page) {
  78422. + /*
  78423. + * remove request from
  78424. + * entd's queue, but do
  78425. + * not wake up a thread
  78426. + * which put this
  78427. + * request
  78428. + */
  78429. + list_del_init(&rq->link);
  78430. + ent->nr_todo_reqs --;
  78431. + list_add_tail(&rq->link, &ent->done_list);
  78432. + ent->nr_done_reqs ++;
  78433. + rq->written = 1;
  78434. + break;
  78435. + }
  78436. + }
  78437. + }
  78438. + spin_unlock(&ent->guard);
  78439. + }
  78440. +
  78441. + clear_page_dirty_for_io(pg);
  78442. +
  78443. + unlock_page(pg);
  78444. +
  78445. + cur = list_entry(cur->capture_link.next, jnode, capture_link);
  78446. + nr_used++;
  78447. + }
  78448. + if (nr_used > 0) {
  78449. + assert("nikita-3453",
  78450. + bio->bi_iter.bi_size == super->s_blocksize * nr_used);
  78451. + assert("nikita-3454", bio->bi_vcnt == nr_used);
  78452. +
  78453. + /* Check if we are allowed to write at all */
  78454. + if (super->s_flags & MS_RDONLY)
  78455. + undo_bio(bio);
  78456. + else {
  78457. + add_fq_to_bio(fq, bio);
  78458. + bio_get(bio);
  78459. + bio_set_op_attrs(bio, WRITE, op_flags);
  78460. + submit_bio(bio);
  78461. + bio_put(bio);
  78462. + }
  78463. +
  78464. + block += nr_used - 1;
  78465. + update_blocknr_hint_default(super, &block);
  78466. + block += 1;
  78467. + } else {
  78468. + bio_put(bio);
  78469. + }
  78470. + nr -= nr_used;
  78471. + }
  78472. +
  78473. + return 0;
  78474. +}
  78475. +
  78476. +/* This is a procedure which recovers a contiguous sequences of disk block
  78477. + numbers in the given list of j-nodes and submits write requests on this
  78478. + per-sequence basis */
  78479. +int
  78480. +write_jnode_list(struct list_head *head, flush_queue_t *fq,
  78481. + long *nr_submitted, int flags)
  78482. +{
  78483. + int ret;
  78484. + jnode *beg = list_entry(head->next, jnode, capture_link);
  78485. +
  78486. + while (head != &beg->capture_link) {
  78487. + int nr = 1;
  78488. + jnode *cur = list_entry(beg->capture_link.next, jnode, capture_link);
  78489. +
  78490. + while (head != &cur->capture_link) {
  78491. + if (*jnode_get_block(cur) != *jnode_get_block(beg) + nr)
  78492. + break;
  78493. + ++nr;
  78494. + cur = list_entry(cur->capture_link.next, jnode, capture_link);
  78495. + }
  78496. +
  78497. + ret = write_jnodes_to_disk_extent(
  78498. + beg, nr, jnode_get_block(beg), fq, flags);
  78499. + if (ret)
  78500. + return ret;
  78501. +
  78502. + if (nr_submitted)
  78503. + *nr_submitted += nr;
  78504. +
  78505. + beg = cur;
  78506. + }
  78507. +
  78508. + return 0;
  78509. +}
  78510. +
  78511. +/* add given wandered mapping to atom's wandered map */
  78512. +static int
  78513. +add_region_to_wmap(jnode * cur, int len, const reiser4_block_nr * block_p)
  78514. +{
  78515. + int ret;
  78516. + blocknr_set_entry *new_bsep = NULL;
  78517. + reiser4_block_nr block;
  78518. +
  78519. + txn_atom *atom;
  78520. +
  78521. + assert("zam-568", block_p != NULL);
  78522. + block = *block_p;
  78523. + assert("zam-569", len > 0);
  78524. +
  78525. + while ((len--) > 0) {
  78526. + do {
  78527. + atom = get_current_atom_locked();
  78528. + assert("zam-536",
  78529. + !reiser4_blocknr_is_fake(jnode_get_block(cur)));
  78530. + ret =
  78531. + blocknr_set_add_pair(atom, &atom->wandered_map,
  78532. + &new_bsep,
  78533. + jnode_get_block(cur), &block);
  78534. + } while (ret == -E_REPEAT);
  78535. +
  78536. + if (ret) {
  78537. + /* deallocate blocks which were not added to wandered
  78538. + map */
  78539. + reiser4_block_nr wide_len = len;
  78540. +
  78541. + reiser4_dealloc_blocks(&block, &wide_len,
  78542. + BLOCK_NOT_COUNTED,
  78543. + BA_FORMATTED
  78544. + /* formatted, without defer */ );
  78545. +
  78546. + return ret;
  78547. + }
  78548. +
  78549. + spin_unlock_atom(atom);
  78550. +
  78551. + cur = list_entry(cur->capture_link.next, jnode, capture_link);
  78552. + ++block;
  78553. + }
  78554. +
  78555. + return 0;
  78556. +}
  78557. +
  78558. +/* Allocate wandered blocks for current atom's OVERWRITE SET and immediately
  78559. + submit IO for allocated blocks. We assume that current atom is in a stage
  78560. + when any atom fusion is impossible and atom is unlocked and it is safe. */
  78561. +static int alloc_wandered_blocks(struct commit_handle *ch, flush_queue_t *fq)
  78562. +{
  78563. + reiser4_block_nr block;
  78564. +
  78565. + int rest;
  78566. + int len;
  78567. + int ret;
  78568. +
  78569. + jnode *cur;
  78570. +
  78571. + assert("zam-534", ch->overwrite_set_size > 0);
  78572. +
  78573. + rest = ch->overwrite_set_size;
  78574. +
  78575. + cur = list_entry(ch->overwrite_set->next, jnode, capture_link);
  78576. + while (ch->overwrite_set != &cur->capture_link) {
  78577. + assert("zam-567", JF_ISSET(cur, JNODE_OVRWR));
  78578. +
  78579. + ret = get_more_wandered_blocks(rest, &block, &len);
  78580. + if (ret)
  78581. + return ret;
  78582. +
  78583. + rest -= len;
  78584. +
  78585. + ret = add_region_to_wmap(cur, len, &block);
  78586. + if (ret)
  78587. + return ret;
  78588. +
  78589. + ret = write_jnodes_to_disk_extent(cur, len, &block, fq, 0);
  78590. + if (ret)
  78591. + return ret;
  78592. +
  78593. + while ((len--) > 0) {
  78594. + assert("zam-604",
  78595. + ch->overwrite_set != &cur->capture_link);
  78596. + cur = list_entry(cur->capture_link.next, jnode, capture_link);
  78597. + }
  78598. + }
  78599. +
  78600. + return 0;
  78601. +}
  78602. +
  78603. +/* allocate given number of nodes over the journal area and link them into a
  78604. + list, return pointer to the first jnode in the list */
  78605. +static int alloc_tx(struct commit_handle *ch, flush_queue_t * fq)
  78606. +{
  78607. + reiser4_blocknr_hint hint;
  78608. + reiser4_block_nr allocated = 0;
  78609. + reiser4_block_nr first, len;
  78610. + jnode *cur;
  78611. + jnode *txhead;
  78612. + int ret;
  78613. + reiser4_context *ctx;
  78614. + reiser4_super_info_data *sbinfo;
  78615. +
  78616. + assert("zam-698", ch->tx_size > 0);
  78617. + assert("zam-699", list_empty_careful(&ch->tx_list));
  78618. +
  78619. + ctx = get_current_context();
  78620. + sbinfo = get_super_private(ctx->super);
  78621. +
  78622. + while (allocated < (unsigned)ch->tx_size) {
  78623. + len = (ch->tx_size - allocated);
  78624. +
  78625. + reiser4_blocknr_hint_init(&hint);
  78626. +
  78627. + hint.block_stage = BLOCK_GRABBED;
  78628. +
  78629. + /* FIXME: there should be some block allocation policy for
  78630. + nodes which contain wander records */
  78631. +
  78632. + /* We assume that disk space for wandered record blocks can be
  78633. + * taken from reserved area. */
  78634. + ret = reiser4_alloc_blocks(&hint, &first, &len,
  78635. + BA_FORMATTED | BA_RESERVED |
  78636. + BA_USE_DEFAULT_SEARCH_START);
  78637. + reiser4_blocknr_hint_done(&hint);
  78638. +
  78639. + if (ret)
  78640. + return ret;
  78641. +
  78642. + allocated += len;
  78643. +
  78644. + /* create jnodes for all wander records */
  78645. + while (len--) {
  78646. + cur = reiser4_alloc_io_head(&first);
  78647. +
  78648. + if (cur == NULL) {
  78649. + ret = RETERR(-ENOMEM);
  78650. + goto free_not_assigned;
  78651. + }
  78652. +
  78653. + ret = jinit_new(cur, reiser4_ctx_gfp_mask_get());
  78654. +
  78655. + if (ret != 0) {
  78656. + jfree(cur);
  78657. + goto free_not_assigned;
  78658. + }
  78659. +
  78660. + pin_jnode_data(cur);
  78661. +
  78662. + list_add_tail(&cur->capture_link, &ch->tx_list);
  78663. +
  78664. + first++;
  78665. + }
  78666. + }
  78667. +
  78668. + { /* format a on-disk linked list of wander records */
  78669. + int serial = 1;
  78670. +
  78671. + txhead = list_entry(ch->tx_list.next, jnode, capture_link);
  78672. + format_tx_head(ch);
  78673. +
  78674. + cur = list_entry(txhead->capture_link.next, jnode, capture_link);
  78675. + while (&ch->tx_list != &cur->capture_link) {
  78676. + format_wander_record(ch, cur, serial++);
  78677. + cur = list_entry(cur->capture_link.next, jnode, capture_link);
  78678. + }
  78679. + }
  78680. +
  78681. + { /* Fill wander records with Wandered Set */
  78682. + struct store_wmap_params params;
  78683. + txn_atom *atom;
  78684. +
  78685. + params.cur = list_entry(txhead->capture_link.next, jnode, capture_link);
  78686. +
  78687. + params.idx = 0;
  78688. + params.capacity =
  78689. + wander_record_capacity(reiser4_get_current_sb());
  78690. +
  78691. + atom = get_current_atom_locked();
  78692. + blocknr_set_iterator(atom, &atom->wandered_map,
  78693. + &store_wmap_actor, &params, 0);
  78694. + spin_unlock_atom(atom);
  78695. + }
  78696. +
  78697. + { /* relse all jnodes from tx_list */
  78698. + cur = list_entry(ch->tx_list.next, jnode, capture_link);
  78699. + while (&ch->tx_list != &cur->capture_link) {
  78700. + jrelse(cur);
  78701. + cur = list_entry(cur->capture_link.next, jnode, capture_link);
  78702. + }
  78703. + }
  78704. +
  78705. + ret = write_jnode_list(&ch->tx_list, fq, NULL, 0);
  78706. +
  78707. + return ret;
  78708. +
  78709. + free_not_assigned:
  78710. + /* We deallocate blocks not yet assigned to jnodes on tx_list. The
  78711. + caller takes care about invalidating of tx list */
  78712. + reiser4_dealloc_blocks(&first, &len, BLOCK_NOT_COUNTED, BA_FORMATTED);
  78713. +
  78714. + return ret;
  78715. +}
  78716. +
  78717. +static int commit_tx(struct commit_handle *ch)
  78718. +{
  78719. + flush_queue_t *fq;
  78720. + int ret;
  78721. +
  78722. + /* Grab more space for wandered records. */
  78723. + ret = reiser4_grab_space_force((__u64) (ch->tx_size), BA_RESERVED);
  78724. + if (ret)
  78725. + return ret;
  78726. +
  78727. + fq = get_fq_for_current_atom();
  78728. + if (IS_ERR(fq))
  78729. + return PTR_ERR(fq);
  78730. +
  78731. + spin_unlock_atom(fq->atom);
  78732. + do {
  78733. + ret = alloc_wandered_blocks(ch, fq);
  78734. + if (ret)
  78735. + break;
  78736. + ret = alloc_tx(ch, fq);
  78737. + if (ret)
  78738. + break;
  78739. + } while (0);
  78740. +
  78741. + reiser4_fq_put(fq);
  78742. + if (ret)
  78743. + return ret;
  78744. + ret = current_atom_finish_all_fq();
  78745. + if (ret)
  78746. + return ret;
  78747. + return update_journal_header(ch);
  78748. +}
  78749. +
  78750. +static int write_tx_back(struct commit_handle * ch)
  78751. +{
  78752. + flush_queue_t *fq;
  78753. + int ret;
  78754. +
  78755. + fq = get_fq_for_current_atom();
  78756. + if (IS_ERR(fq))
  78757. + return PTR_ERR(fq);
  78758. + spin_unlock_atom(fq->atom);
  78759. + ret = write_jnode_list(
  78760. + ch->overwrite_set, fq, NULL, WRITEOUT_FOR_PAGE_RECLAIM);
  78761. + reiser4_fq_put(fq);
  78762. + if (ret)
  78763. + return ret;
  78764. + ret = current_atom_finish_all_fq();
  78765. + if (ret)
  78766. + return ret;
  78767. + return update_journal_footer(ch);
  78768. +}
  78769. +
  78770. +/* We assume that at this moment all captured blocks are marked as RELOC or
  78771. + WANDER (belong to Relocate o Overwrite set), all nodes from Relocate set
  78772. + are submitted to write.
  78773. +*/
  78774. +
  78775. +int reiser4_write_logs(long *nr_submitted)
  78776. +{
  78777. + txn_atom *atom;
  78778. + struct super_block *super = reiser4_get_current_sb();
  78779. + reiser4_super_info_data *sbinfo = get_super_private(super);
  78780. + struct commit_handle ch;
  78781. + int ret;
  78782. +
  78783. + writeout_mode_enable();
  78784. +
  78785. + /* block allocator may add j-nodes to the clean_list */
  78786. + ret = reiser4_pre_commit_hook();
  78787. + if (ret)
  78788. + return ret;
  78789. +
  78790. + /* No locks are required if we take atom which stage >=
  78791. + * ASTAGE_PRE_COMMIT */
  78792. + atom = get_current_context()->trans->atom;
  78793. + assert("zam-965", atom != NULL);
  78794. +
  78795. + /* relocate set is on the atom->clean_nodes list after
  78796. + * current_atom_complete_writes() finishes. It can be safely
  78797. + * uncaptured after commit_mutex is locked, because any atom that
  78798. + * captures these nodes is guaranteed to commit after current one.
  78799. + *
  78800. + * This can only be done after reiser4_pre_commit_hook(), because it is where
  78801. + * early flushed jnodes with CREATED bit are transferred to the
  78802. + * overwrite list. */
  78803. + reiser4_invalidate_list(ATOM_CLEAN_LIST(atom));
  78804. + spin_lock_atom(atom);
  78805. + /* There might be waiters for the relocate nodes which we have
  78806. + * released, wake them up. */
  78807. + reiser4_atom_send_event(atom);
  78808. + spin_unlock_atom(atom);
  78809. +
  78810. + if (REISER4_DEBUG) {
  78811. + int level;
  78812. +
  78813. + for (level = 0; level < REAL_MAX_ZTREE_HEIGHT + 1; ++level)
  78814. + assert("nikita-3352",
  78815. + list_empty_careful(ATOM_DIRTY_LIST(atom, level)));
  78816. + }
  78817. +
  78818. + sbinfo->nr_files_committed += (unsigned)atom->nr_objects_created;
  78819. + sbinfo->nr_files_committed -= (unsigned)atom->nr_objects_deleted;
  78820. +
  78821. + init_commit_handle(&ch, atom);
  78822. +
  78823. + ch.free_blocks = sbinfo->blocks_free_committed;
  78824. + ch.nr_files = sbinfo->nr_files_committed;
  78825. + /* ZAM-FIXME-HANS: email me what the contention level is for the super
  78826. + * lock. */
  78827. + ch.next_oid = oid_next(super);
  78828. +
  78829. + /* count overwrite set and place it in a separate list */
  78830. + ret = get_overwrite_set(&ch);
  78831. +
  78832. + if (ret <= 0) {
  78833. + /* It is possible that overwrite set is empty here, it means
  78834. + all captured nodes are clean */
  78835. + goto up_and_ret;
  78836. + }
  78837. +
  78838. + /* Inform the caller about what number of dirty pages will be
  78839. + * submitted to disk. */
  78840. + *nr_submitted += ch.overwrite_set_size - ch.nr_bitmap;
  78841. +
  78842. + /* count all records needed for storing of the wandered set */
  78843. + get_tx_size(&ch);
  78844. +
  78845. + ret = commit_tx(&ch);
  78846. + if (ret)
  78847. + goto up_and_ret;
  78848. +
  78849. + spin_lock_atom(atom);
  78850. + reiser4_atom_set_stage(atom, ASTAGE_POST_COMMIT);
  78851. + spin_unlock_atom(atom);
  78852. + reiser4_post_commit_hook();
  78853. +
  78854. + ret = write_tx_back(&ch);
  78855. +
  78856. + up_and_ret:
  78857. + if (ret) {
  78858. + /* there could be fq attached to current atom; the only way to
  78859. + remove them is: */
  78860. + current_atom_finish_all_fq();
  78861. + }
  78862. +
  78863. + /* free blocks of flushed transaction */
  78864. + dealloc_tx_list(&ch);
  78865. + dealloc_wmap(&ch);
  78866. +
  78867. + reiser4_post_write_back_hook();
  78868. +
  78869. + put_overwrite_set(&ch);
  78870. +
  78871. + done_commit_handle(&ch);
  78872. +
  78873. + writeout_mode_disable();
  78874. +
  78875. + return ret;
  78876. +}
  78877. +
  78878. +/* consistency checks for journal data/control blocks: header, footer, log
  78879. + records, transactions head blocks. All functions return zero on success. */
  78880. +
  78881. +static int check_journal_header(const jnode * node UNUSED_ARG)
  78882. +{
  78883. + /* FIXME: journal header has no magic field yet. */
  78884. + return 0;
  78885. +}
  78886. +
  78887. +/* wait for write completion for all jnodes from given list */
  78888. +static int wait_on_jnode_list(struct list_head *head)
  78889. +{
  78890. + jnode *scan;
  78891. + int ret = 0;
  78892. +
  78893. + list_for_each_entry(scan, head, capture_link) {
  78894. + struct page *pg = jnode_page(scan);
  78895. +
  78896. + if (pg) {
  78897. + if (PageWriteback(pg))
  78898. + wait_on_page_writeback(pg);
  78899. +
  78900. + if (PageError(pg))
  78901. + ret++;
  78902. + }
  78903. + }
  78904. +
  78905. + return ret;
  78906. +}
  78907. +
  78908. +static int check_journal_footer(const jnode * node UNUSED_ARG)
  78909. +{
  78910. + /* FIXME: journal footer has no magic field yet. */
  78911. + return 0;
  78912. +}
  78913. +
  78914. +static int check_tx_head(const jnode * node)
  78915. +{
  78916. + struct tx_header *header = (struct tx_header *)jdata(node);
  78917. +
  78918. + if (memcmp(&header->magic, TX_HEADER_MAGIC, TX_HEADER_MAGIC_SIZE) != 0) {
  78919. + warning("zam-627", "tx head at block %s corrupted\n",
  78920. + sprint_address(jnode_get_block(node)));
  78921. + return RETERR(-EIO);
  78922. + }
  78923. +
  78924. + return 0;
  78925. +}
  78926. +
  78927. +static int check_wander_record(const jnode * node)
  78928. +{
  78929. + struct wander_record_header *RH =
  78930. + (struct wander_record_header *)jdata(node);
  78931. +
  78932. + if (memcmp(&RH->magic, WANDER_RECORD_MAGIC, WANDER_RECORD_MAGIC_SIZE) !=
  78933. + 0) {
  78934. + warning("zam-628", "wander record at block %s corrupted\n",
  78935. + sprint_address(jnode_get_block(node)));
  78936. + return RETERR(-EIO);
  78937. + }
  78938. +
  78939. + return 0;
  78940. +}
  78941. +
  78942. +/* fill commit_handler structure by everything what is needed for update_journal_footer */
  78943. +static int restore_commit_handle(struct commit_handle *ch, jnode *tx_head)
  78944. +{
  78945. + struct tx_header *TXH;
  78946. + int ret;
  78947. +
  78948. + ret = jload(tx_head);
  78949. + if (ret)
  78950. + return ret;
  78951. +
  78952. + TXH = (struct tx_header *)jdata(tx_head);
  78953. +
  78954. + ch->free_blocks = le64_to_cpu(get_unaligned(&TXH->free_blocks));
  78955. + ch->nr_files = le64_to_cpu(get_unaligned(&TXH->nr_files));
  78956. + ch->next_oid = le64_to_cpu(get_unaligned(&TXH->next_oid));
  78957. +
  78958. + jrelse(tx_head);
  78959. +
  78960. + list_add(&tx_head->capture_link, &ch->tx_list);
  78961. +
  78962. + return 0;
  78963. +}
  78964. +
  78965. +/* replay one transaction: restore and write overwrite set in place */
  78966. +static int replay_transaction(const struct super_block *s,
  78967. + jnode * tx_head,
  78968. + const reiser4_block_nr * log_rec_block_p,
  78969. + const reiser4_block_nr * end_block,
  78970. + unsigned int nr_wander_records)
  78971. +{
  78972. + reiser4_block_nr log_rec_block = *log_rec_block_p;
  78973. + struct commit_handle ch;
  78974. + LIST_HEAD(overwrite_set);
  78975. + jnode *log;
  78976. + int ret;
  78977. +
  78978. + init_commit_handle(&ch, NULL);
  78979. + ch.overwrite_set = &overwrite_set;
  78980. +
  78981. + restore_commit_handle(&ch, tx_head);
  78982. +
  78983. + while (log_rec_block != *end_block) {
  78984. + struct wander_record_header *header;
  78985. + struct wander_entry *entry;
  78986. +
  78987. + int i;
  78988. +
  78989. + if (nr_wander_records == 0) {
  78990. + warning("zam-631",
  78991. + "number of wander records in the linked list"
  78992. + " greater than number stored in tx head.\n");
  78993. + ret = RETERR(-EIO);
  78994. + goto free_ow_set;
  78995. + }
  78996. +
  78997. + log = reiser4_alloc_io_head(&log_rec_block);
  78998. + if (log == NULL)
  78999. + return RETERR(-ENOMEM);
  79000. +
  79001. + ret = jload(log);
  79002. + if (ret < 0) {
  79003. + reiser4_drop_io_head(log);
  79004. + return ret;
  79005. + }
  79006. +
  79007. + ret = check_wander_record(log);
  79008. + if (ret) {
  79009. + jrelse(log);
  79010. + reiser4_drop_io_head(log);
  79011. + return ret;
  79012. + }
  79013. +
  79014. + header = (struct wander_record_header *)jdata(log);
  79015. + log_rec_block = le64_to_cpu(get_unaligned(&header->next_block));
  79016. +
  79017. + entry = (struct wander_entry *)(header + 1);
  79018. +
  79019. + /* restore overwrite set from wander record content */
  79020. + for (i = 0; i < wander_record_capacity(s); i++) {
  79021. + reiser4_block_nr block;
  79022. + jnode *node;
  79023. +
  79024. + block = le64_to_cpu(get_unaligned(&entry->wandered));
  79025. + if (block == 0)
  79026. + break;
  79027. +
  79028. + node = reiser4_alloc_io_head(&block);
  79029. + if (node == NULL) {
  79030. + ret = RETERR(-ENOMEM);
  79031. + /*
  79032. + * FIXME-VS:???
  79033. + */
  79034. + jrelse(log);
  79035. + reiser4_drop_io_head(log);
  79036. + goto free_ow_set;
  79037. + }
  79038. +
  79039. + ret = jload(node);
  79040. +
  79041. + if (ret < 0) {
  79042. + reiser4_drop_io_head(node);
  79043. + /*
  79044. + * FIXME-VS:???
  79045. + */
  79046. + jrelse(log);
  79047. + reiser4_drop_io_head(log);
  79048. + goto free_ow_set;
  79049. + }
  79050. +
  79051. + block = le64_to_cpu(get_unaligned(&entry->original));
  79052. +
  79053. + assert("zam-603", block != 0);
  79054. +
  79055. + jnode_set_block(node, &block);
  79056. +
  79057. + list_add_tail(&node->capture_link, ch.overwrite_set);
  79058. +
  79059. + ++entry;
  79060. + }
  79061. +
  79062. + jrelse(log);
  79063. + reiser4_drop_io_head(log);
  79064. +
  79065. + --nr_wander_records;
  79066. + }
  79067. +
  79068. + if (nr_wander_records != 0) {
  79069. + warning("zam-632", "number of wander records in the linked list"
  79070. + " less than number stored in tx head.\n");
  79071. + ret = RETERR(-EIO);
  79072. + goto free_ow_set;
  79073. + }
  79074. +
  79075. + { /* write wandered set in place */
  79076. + write_jnode_list(ch.overwrite_set, NULL, NULL, 0);
  79077. + ret = wait_on_jnode_list(ch.overwrite_set);
  79078. +
  79079. + if (ret) {
  79080. + ret = RETERR(-EIO);
  79081. + goto free_ow_set;
  79082. + }
  79083. + }
  79084. +
  79085. + ret = update_journal_footer(&ch);
  79086. +
  79087. + free_ow_set:
  79088. +
  79089. + while (!list_empty(ch.overwrite_set)) {
  79090. + jnode *cur = list_entry(ch.overwrite_set->next, jnode, capture_link);
  79091. + list_del_init(&cur->capture_link);
  79092. + jrelse(cur);
  79093. + reiser4_drop_io_head(cur);
  79094. + }
  79095. +
  79096. + list_del_init(&tx_head->capture_link);
  79097. +
  79098. + done_commit_handle(&ch);
  79099. +
  79100. + return ret;
  79101. +}
  79102. +
  79103. +/* find oldest committed and not played transaction and play it. The transaction
  79104. + * was committed and journal header block was updated but the blocks from the
  79105. + * process of writing the atom's overwrite set in-place and updating of journal
  79106. + * footer block were not completed. This function completes the process by
  79107. + * recovering the atom's overwrite set from their wandered locations and writes
  79108. + * them in-place and updating the journal footer. */
  79109. +static int replay_oldest_transaction(struct super_block *s)
  79110. +{
  79111. + reiser4_super_info_data *sbinfo = get_super_private(s);
  79112. + jnode *jf = sbinfo->journal_footer;
  79113. + unsigned int total;
  79114. + struct journal_footer *F;
  79115. + struct tx_header *T;
  79116. +
  79117. + reiser4_block_nr prev_tx;
  79118. + reiser4_block_nr last_flushed_tx;
  79119. + reiser4_block_nr log_rec_block = 0;
  79120. +
  79121. + jnode *tx_head;
  79122. +
  79123. + int ret;
  79124. +
  79125. + if ((ret = jload(jf)) < 0)
  79126. + return ret;
  79127. +
  79128. + F = (struct journal_footer *)jdata(jf);
  79129. +
  79130. + last_flushed_tx = le64_to_cpu(get_unaligned(&F->last_flushed_tx));
  79131. +
  79132. + jrelse(jf);
  79133. +
  79134. + if (sbinfo->last_committed_tx == last_flushed_tx) {
  79135. + /* all transactions are replayed */
  79136. + return 0;
  79137. + }
  79138. +
  79139. + prev_tx = sbinfo->last_committed_tx;
  79140. +
  79141. + /* searching for oldest not flushed transaction */
  79142. + while (1) {
  79143. + tx_head = reiser4_alloc_io_head(&prev_tx);
  79144. + if (!tx_head)
  79145. + return RETERR(-ENOMEM);
  79146. +
  79147. + ret = jload(tx_head);
  79148. + if (ret < 0) {
  79149. + reiser4_drop_io_head(tx_head);
  79150. + return ret;
  79151. + }
  79152. +
  79153. + ret = check_tx_head(tx_head);
  79154. + if (ret) {
  79155. + jrelse(tx_head);
  79156. + reiser4_drop_io_head(tx_head);
  79157. + return ret;
  79158. + }
  79159. +
  79160. + T = (struct tx_header *)jdata(tx_head);
  79161. +
  79162. + prev_tx = le64_to_cpu(get_unaligned(&T->prev_tx));
  79163. +
  79164. + if (prev_tx == last_flushed_tx)
  79165. + break;
  79166. +
  79167. + jrelse(tx_head);
  79168. + reiser4_drop_io_head(tx_head);
  79169. + }
  79170. +
  79171. + total = le32_to_cpu(get_unaligned(&T->total));
  79172. + log_rec_block = le64_to_cpu(get_unaligned(&T->next_block));
  79173. +
  79174. + pin_jnode_data(tx_head);
  79175. + jrelse(tx_head);
  79176. +
  79177. + ret =
  79178. + replay_transaction(s, tx_head, &log_rec_block,
  79179. + jnode_get_block(tx_head), total - 1);
  79180. +
  79181. + unpin_jnode_data(tx_head);
  79182. + reiser4_drop_io_head(tx_head);
  79183. +
  79184. + if (ret)
  79185. + return ret;
  79186. + return -E_REPEAT;
  79187. +}
  79188. +
  79189. +/* The reiser4 journal current implementation was optimized to not to capture
  79190. + super block if certain super blocks fields are modified. Currently, the set
  79191. + is (<free block count>, <OID allocator>). These fields are logged by
  79192. + special way which includes storing them in each transaction head block at
  79193. + atom commit time and writing that information to journal footer block at
  79194. + atom flush time. For getting info from journal footer block to the
  79195. + in-memory super block there is a special function
  79196. + reiser4_journal_recover_sb_data() which should be called after disk format
  79197. + plugin re-reads super block after journal replaying.
  79198. +*/
  79199. +
  79200. +/* get the information from journal footer in-memory super block */
  79201. +int reiser4_journal_recover_sb_data(struct super_block *s)
  79202. +{
  79203. + reiser4_super_info_data *sbinfo = get_super_private(s);
  79204. + struct journal_footer *jf;
  79205. + int ret;
  79206. +
  79207. + assert("zam-673", sbinfo->journal_footer != NULL);
  79208. +
  79209. + ret = jload(sbinfo->journal_footer);
  79210. + if (ret != 0)
  79211. + return ret;
  79212. +
  79213. + ret = check_journal_footer(sbinfo->journal_footer);
  79214. + if (ret != 0)
  79215. + goto out;
  79216. +
  79217. + jf = (struct journal_footer *)jdata(sbinfo->journal_footer);
  79218. +
  79219. + /* was there at least one flushed transaction? */
  79220. + if (jf->last_flushed_tx) {
  79221. +
  79222. + /* restore free block counter logged in this transaction */
  79223. + reiser4_set_free_blocks(s, le64_to_cpu(get_unaligned(&jf->free_blocks)));
  79224. +
  79225. + /* restore oid allocator state */
  79226. + oid_init_allocator(s,
  79227. + le64_to_cpu(get_unaligned(&jf->nr_files)),
  79228. + le64_to_cpu(get_unaligned(&jf->next_oid)));
  79229. + }
  79230. + out:
  79231. + jrelse(sbinfo->journal_footer);
  79232. + return ret;
  79233. +}
  79234. +
  79235. +/* reiser4 replay journal procedure */
  79236. +int reiser4_journal_replay(struct super_block *s)
  79237. +{
  79238. + reiser4_super_info_data *sbinfo = get_super_private(s);
  79239. + jnode *jh, *jf;
  79240. + struct journal_header *header;
  79241. + int nr_tx_replayed = 0;
  79242. + int ret;
  79243. +
  79244. + assert("zam-582", sbinfo != NULL);
  79245. +
  79246. + jh = sbinfo->journal_header;
  79247. + jf = sbinfo->journal_footer;
  79248. +
  79249. + if (!jh || !jf) {
  79250. + /* it is possible that disk layout does not support journal
  79251. + structures, we just warn about this */
  79252. + warning("zam-583",
  79253. + "journal control blocks were not loaded by disk layout plugin. "
  79254. + "journal replaying is not possible.\n");
  79255. + return 0;
  79256. + }
  79257. +
  79258. + /* Take free block count from journal footer block. The free block
  79259. + counter value corresponds the last flushed transaction state */
  79260. + ret = jload(jf);
  79261. + if (ret < 0)
  79262. + return ret;
  79263. +
  79264. + ret = check_journal_footer(jf);
  79265. + if (ret) {
  79266. + jrelse(jf);
  79267. + return ret;
  79268. + }
  79269. +
  79270. + jrelse(jf);
  79271. +
  79272. + /* store last committed transaction info in reiser4 in-memory super
  79273. + block */
  79274. + ret = jload(jh);
  79275. + if (ret < 0)
  79276. + return ret;
  79277. +
  79278. + ret = check_journal_header(jh);
  79279. + if (ret) {
  79280. + jrelse(jh);
  79281. + return ret;
  79282. + }
  79283. +
  79284. + header = (struct journal_header *)jdata(jh);
  79285. + sbinfo->last_committed_tx = le64_to_cpu(get_unaligned(&header->last_committed_tx));
  79286. +
  79287. + jrelse(jh);
  79288. +
  79289. + /* replay committed transactions */
  79290. + while ((ret = replay_oldest_transaction(s)) == -E_REPEAT)
  79291. + nr_tx_replayed++;
  79292. +
  79293. + return ret;
  79294. +}
  79295. +
  79296. +/* load journal control block (either journal header or journal footer block) */
  79297. +static int
  79298. +load_journal_control_block(jnode ** node, const reiser4_block_nr * block)
  79299. +{
  79300. + int ret;
  79301. +
  79302. + *node = reiser4_alloc_io_head(block);
  79303. + if (!(*node))
  79304. + return RETERR(-ENOMEM);
  79305. +
  79306. + ret = jload(*node);
  79307. +
  79308. + if (ret) {
  79309. + reiser4_drop_io_head(*node);
  79310. + *node = NULL;
  79311. + return ret;
  79312. + }
  79313. +
  79314. + pin_jnode_data(*node);
  79315. + jrelse(*node);
  79316. +
  79317. + return 0;
  79318. +}
  79319. +
  79320. +/* unload journal header or footer and free jnode */
  79321. +static void unload_journal_control_block(jnode ** node)
  79322. +{
  79323. + if (*node) {
  79324. + unpin_jnode_data(*node);
  79325. + reiser4_drop_io_head(*node);
  79326. + *node = NULL;
  79327. + }
  79328. +}
  79329. +
  79330. +/* release journal control blocks */
  79331. +void reiser4_done_journal_info(struct super_block *s)
  79332. +{
  79333. + reiser4_super_info_data *sbinfo = get_super_private(s);
  79334. +
  79335. + assert("zam-476", sbinfo != NULL);
  79336. +
  79337. + unload_journal_control_block(&sbinfo->journal_header);
  79338. + unload_journal_control_block(&sbinfo->journal_footer);
  79339. + rcu_barrier();
  79340. +}
  79341. +
  79342. +/* load journal control blocks */
  79343. +int reiser4_init_journal_info(struct super_block *s)
  79344. +{
  79345. + reiser4_super_info_data *sbinfo = get_super_private(s);
  79346. + journal_location *loc;
  79347. + int ret;
  79348. +
  79349. + loc = &sbinfo->jloc;
  79350. +
  79351. + assert("zam-651", loc != NULL);
  79352. + assert("zam-652", loc->header != 0);
  79353. + assert("zam-653", loc->footer != 0);
  79354. +
  79355. + ret = load_journal_control_block(&sbinfo->journal_header, &loc->header);
  79356. +
  79357. + if (ret)
  79358. + return ret;
  79359. +
  79360. + ret = load_journal_control_block(&sbinfo->journal_footer, &loc->footer);
  79361. +
  79362. + if (ret) {
  79363. + unload_journal_control_block(&sbinfo->journal_header);
  79364. + }
  79365. +
  79366. + return ret;
  79367. +}
  79368. +
  79369. +/* Make Linus happy.
  79370. + Local variables:
  79371. + c-indentation-style: "K&R"
  79372. + mode-name: "LC"
  79373. + c-basic-offset: 8
  79374. + tab-width: 8
  79375. + fill-column: 80
  79376. + End:
  79377. +*/
  79378. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/wander.h linux-4.14.2/fs/reiser4/wander.h
  79379. --- linux-4.14.2.orig/fs/reiser4/wander.h 1970-01-01 01:00:00.000000000 +0100
  79380. +++ linux-4.14.2/fs/reiser4/wander.h 2017-11-26 22:13:09.000000000 +0100
  79381. @@ -0,0 +1,135 @@
  79382. +/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
  79383. +
  79384. +#if !defined (__FS_REISER4_WANDER_H__)
  79385. +#define __FS_REISER4_WANDER_H__
  79386. +
  79387. +#include "dformat.h"
  79388. +
  79389. +#include <linux/fs.h> /* for struct super_block */
  79390. +
  79391. +/* REISER4 JOURNAL ON-DISK DATA STRUCTURES */
  79392. +
  79393. +#define TX_HEADER_MAGIC "TxMagic4"
  79394. +#define WANDER_RECORD_MAGIC "LogMagc4"
  79395. +
  79396. +#define TX_HEADER_MAGIC_SIZE (8)
  79397. +#define WANDER_RECORD_MAGIC_SIZE (8)
  79398. +
  79399. +/* journal header block format */
  79400. +struct journal_header {
  79401. + /* last written transaction head location */
  79402. + d64 last_committed_tx;
  79403. +};
  79404. +
  79405. +typedef struct journal_location {
  79406. + reiser4_block_nr footer;
  79407. + reiser4_block_nr header;
  79408. +} journal_location;
  79409. +
  79410. +/* The wander.c head comment describes usage and semantic of all these structures */
  79411. +/* journal footer block format */
  79412. +struct journal_footer {
  79413. + /* last flushed transaction location. */
  79414. + /* This block number is no more valid after the transaction it points
  79415. + to gets flushed, this number is used only at journal replaying time
  79416. + for detection of the end of on-disk list of committed transactions
  79417. + which were not flushed completely */
  79418. + d64 last_flushed_tx;
  79419. +
  79420. + /* free block counter is written in journal footer at transaction
  79421. + flushing , not in super block because free blocks counter is logged
  79422. + by another way than super block fields (root pointer, for
  79423. + example). */
  79424. + d64 free_blocks;
  79425. +
  79426. + /* number of used OIDs and maximal used OID are logged separately from
  79427. + super block */
  79428. + d64 nr_files;
  79429. + d64 next_oid;
  79430. +};
  79431. +
  79432. +/* Each wander record (except the first one) has unified format with wander
  79433. + record header followed by an array of log entries */
  79434. +struct wander_record_header {
  79435. + /* when there is no predefined location for wander records, this magic
  79436. + string should help reiser4fsck. */
  79437. + char magic[WANDER_RECORD_MAGIC_SIZE];
  79438. +
  79439. + /* transaction id */
  79440. + d64 id;
  79441. +
  79442. + /* total number of wander records in current transaction */
  79443. + d32 total;
  79444. +
  79445. + /* this block number in transaction */
  79446. + d32 serial;
  79447. +
  79448. + /* number of previous block in commit */
  79449. + d64 next_block;
  79450. +};
  79451. +
  79452. +/* The first wander record (transaction head) of written transaction has the
  79453. + special format */
  79454. +struct tx_header {
  79455. + /* magic string makes first block in transaction different from other
  79456. + logged blocks, it should help fsck. */
  79457. + char magic[TX_HEADER_MAGIC_SIZE];
  79458. +
  79459. + /* transaction id */
  79460. + d64 id;
  79461. +
  79462. + /* total number of records (including this first tx head) in the
  79463. + transaction */
  79464. + d32 total;
  79465. +
  79466. + /* align next field to 8-byte boundary; this field always is zero */
  79467. + d32 padding;
  79468. +
  79469. + /* block number of previous transaction head */
  79470. + d64 prev_tx;
  79471. +
  79472. + /* next wander record location */
  79473. + d64 next_block;
  79474. +
  79475. + /* committed versions of free blocks counter */
  79476. + d64 free_blocks;
  79477. +
  79478. + /* number of used OIDs (nr_files) and maximal used OID are logged
  79479. + separately from super block */
  79480. + d64 nr_files;
  79481. + d64 next_oid;
  79482. +};
  79483. +
  79484. +/* A transaction gets written to disk as a set of wander records (each wander
  79485. + record size is fs block) */
  79486. +
  79487. +/* As it was told above a wander The rest of wander record is filled by these log entries, unused space filled
  79488. + by zeroes */
  79489. +struct wander_entry {
  79490. + d64 original; /* block original location */
  79491. + d64 wandered; /* block wandered location */
  79492. +};
  79493. +
  79494. +/* REISER4 JOURNAL WRITER FUNCTIONS */
  79495. +
  79496. +extern int reiser4_write_logs(long *);
  79497. +extern int reiser4_journal_replay(struct super_block *);
  79498. +extern int reiser4_journal_recover_sb_data(struct super_block *);
  79499. +
  79500. +extern int reiser4_init_journal_info(struct super_block *);
  79501. +extern void reiser4_done_journal_info(struct super_block *);
  79502. +
  79503. +extern int write_jnode_list(struct list_head *, flush_queue_t *, long *, int);
  79504. +
  79505. +#endif /* __FS_REISER4_WANDER_H__ */
  79506. +
  79507. +/* Make Linus happy.
  79508. + Local variables:
  79509. + c-indentation-style: "K&R"
  79510. + mode-name: "LC"
  79511. + c-basic-offset: 8
  79512. + tab-width: 8
  79513. + fill-column: 80
  79514. + scroll-step: 1
  79515. + End:
  79516. +*/
  79517. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/writeout.h linux-4.14.2/fs/reiser4/writeout.h
  79518. --- linux-4.14.2.orig/fs/reiser4/writeout.h 1970-01-01 01:00:00.000000000 +0100
  79519. +++ linux-4.14.2/fs/reiser4/writeout.h 2017-11-26 22:13:09.000000000 +0100
  79520. @@ -0,0 +1,21 @@
  79521. +/* Copyright 2002, 2003, 2004 by Hans Reiser, licensing governed by reiser4/README */
  79522. +
  79523. +#if !defined (__FS_REISER4_WRITEOUT_H__)
  79524. +
  79525. +#define WRITEOUT_SINGLE_STREAM (0x1)
  79526. +#define WRITEOUT_FOR_PAGE_RECLAIM (0x2)
  79527. +#define WRITEOUT_FLUSH_FUA (0x4)
  79528. +
  79529. +extern int reiser4_get_writeout_flags(void);
  79530. +
  79531. +#endif /* __FS_REISER4_WRITEOUT_H__ */
  79532. +
  79533. +/* Make Linus happy.
  79534. + Local variables:
  79535. + c-indentation-style: "K&R"
  79536. + mode-name: "LC"
  79537. + c-basic-offset: 8
  79538. + tab-width: 8
  79539. + fill-column: 80
  79540. + End:
  79541. +*/
  79542. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/znode.c linux-4.14.2/fs/reiser4/znode.c
  79543. --- linux-4.14.2.orig/fs/reiser4/znode.c 1970-01-01 01:00:00.000000000 +0100
  79544. +++ linux-4.14.2/fs/reiser4/znode.c 2017-11-26 22:13:09.000000000 +0100
  79545. @@ -0,0 +1,1027 @@
  79546. +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
  79547. + * reiser4/README */
  79548. +/* Znode manipulation functions. */
  79549. +/* Znode is the in-memory header for a tree node. It is stored
  79550. + separately from the node itself so that it does not get written to
  79551. + disk. In this respect znode is like buffer head or page head. We
  79552. + also use znodes for additional reiser4 specific purposes:
  79553. +
  79554. + . they are organized into tree structure which is a part of whole
  79555. + reiser4 tree.
  79556. + . they are used to implement node grained locking
  79557. + . they are used to keep additional state associated with a
  79558. + node
  79559. + . they contain links to lists used by the transaction manager
  79560. +
  79561. + Znode is attached to some variable "block number" which is instance of
  79562. + fs/reiser4/tree.h:reiser4_block_nr type. Znode can exist without
  79563. + appropriate node being actually loaded in memory. Existence of znode itself
  79564. + is regulated by reference count (->x_count) in it. Each time thread
  79565. + acquires reference to znode through call to zget(), ->x_count is
  79566. + incremented and decremented on call to zput(). Data (content of node) are
  79567. + brought in memory through call to zload(), which also increments ->d_count
  79568. + reference counter. zload can block waiting on IO. Call to zrelse()
  79569. + decreases this counter. Also, ->c_count keeps track of number of child
  79570. + znodes and prevents parent znode from being recycled until all of its
  79571. + children are. ->c_count is decremented whenever child goes out of existence
  79572. + (being actually recycled in zdestroy()) which can be some time after last
  79573. + reference to this child dies if we support some form of LRU cache for
  79574. + znodes.
  79575. +
  79576. +*/
  79577. +/* EVERY ZNODE'S STORY
  79578. +
  79579. + 1. His infancy.
  79580. +
  79581. + Once upon a time, the znode was born deep inside of zget() by call to
  79582. + zalloc(). At the return from zget() znode had:
  79583. +
  79584. + . reference counter (x_count) of 1
  79585. + . assigned block number, marked as used in bitmap
  79586. + . pointer to parent znode. Root znode parent pointer points
  79587. + to its father: "fake" znode. This, in turn, has NULL parent pointer.
  79588. + . hash table linkage
  79589. + . no data loaded from disk
  79590. + . no node plugin
  79591. + . no sibling linkage
  79592. +
  79593. + 2. His childhood
  79594. +
  79595. + Each node is either brought into memory as a result of tree traversal, or
  79596. + created afresh, creation of the root being a special case of the latter. In
  79597. + either case it's inserted into sibling list. This will typically require
  79598. + some ancillary tree traversing, but ultimately both sibling pointers will
  79599. + exist and JNODE_LEFT_CONNECTED and JNODE_RIGHT_CONNECTED will be true in
  79600. + zjnode.state.
  79601. +
  79602. + 3. His youth.
  79603. +
  79604. + If znode is bound to already existing node in a tree, its content is read
  79605. + from the disk by call to zload(). At that moment, JNODE_LOADED bit is set
  79606. + in zjnode.state and zdata() function starts to return non null for this
  79607. + znode. zload() further calls zparse() that determines which node layout
  79608. + this node is rendered in, and sets ->nplug on success.
  79609. +
  79610. + If znode is for new node just created, memory for it is allocated and
  79611. + zinit_new() function is called to initialise data, according to selected
  79612. + node layout.
  79613. +
  79614. + 4. His maturity.
  79615. +
  79616. + After this point, znode lingers in memory for some time. Threads can
  79617. + acquire references to znode either by blocknr through call to zget(), or by
  79618. + following a pointer to unallocated znode from internal item. Each time
  79619. + reference to znode is obtained, x_count is increased. Thread can read/write
  79620. + lock znode. Znode data can be loaded through calls to zload(), d_count will
  79621. + be increased appropriately. If all references to znode are released
  79622. + (x_count drops to 0), znode is not recycled immediately. Rather, it is
  79623. + still cached in the hash table in the hope that it will be accessed
  79624. + shortly.
  79625. +
  79626. + There are two ways in which znode existence can be terminated:
  79627. +
  79628. + . sudden death: node bound to this znode is removed from the tree
  79629. + . overpopulation: znode is purged out of memory due to memory pressure
  79630. +
  79631. + 5. His death.
  79632. +
  79633. + Death is complex process.
  79634. +
  79635. + When we irrevocably commit ourselves to decision to remove node from the
  79636. + tree, JNODE_HEARD_BANSHEE bit is set in zjnode.state of corresponding
  79637. + znode. This is done either in ->kill_hook() of internal item or in
  79638. + reiser4_kill_root() function when tree root is removed.
  79639. +
  79640. + At this moment znode still has:
  79641. +
  79642. + . locks held on it, necessary write ones
  79643. + . references to it
  79644. + . disk block assigned to it
  79645. + . data loaded from the disk
  79646. + . pending requests for lock
  79647. +
  79648. + But once JNODE_HEARD_BANSHEE bit set, last call to unlock_znode() does node
  79649. + deletion. Node deletion includes two phases. First all ways to get
  79650. + references to that znode (sibling and parent links and hash lookup using
  79651. + block number stored in parent node) should be deleted -- it is done through
  79652. + sibling_list_remove(), also we assume that nobody uses down link from
  79653. + parent node due to its nonexistence or proper parent node locking and
  79654. + nobody uses parent pointers from children due to absence of them. Second we
  79655. + invalidate all pending lock requests which still are on znode's lock
  79656. + request queue, this is done by reiser4_invalidate_lock(). Another
  79657. + JNODE_IS_DYING znode status bit is used to invalidate pending lock requests.
  79658. + Once it set all requesters are forced to return -EINVAL from
  79659. + longterm_lock_znode(). Future locking attempts are not possible because all
  79660. + ways to get references to that znode are removed already. Last, node is
  79661. + uncaptured from transaction.
  79662. +
  79663. + When last reference to the dying znode is just about to be released,
  79664. + block number for this lock is released and znode is removed from the
  79665. + hash table.
  79666. +
  79667. + Now znode can be recycled.
  79668. +
  79669. + [it's possible to free bitmap block and remove znode from the hash
  79670. + table when last lock is released. This will result in having
  79671. + referenced but completely orphaned znode]
  79672. +
  79673. + 6. Limbo
  79674. +
  79675. + As have been mentioned above znodes with reference counter 0 are
  79676. + still cached in a hash table. Once memory pressure increases they are
  79677. + purged out of there [this requires something like LRU list for
  79678. + efficient implementation. LRU list would also greatly simplify
  79679. + implementation of coord cache that would in this case morph to just
  79680. + scanning some initial segment of LRU list]. Data loaded into
  79681. + unreferenced znode are flushed back to the durable storage if
  79682. + necessary and memory is freed. Znodes themselves can be recycled at
  79683. + this point too.
  79684. +
  79685. +*/
  79686. +
  79687. +#include "debug.h"
  79688. +#include "dformat.h"
  79689. +#include "key.h"
  79690. +#include "coord.h"
  79691. +#include "plugin/plugin_header.h"
  79692. +#include "plugin/node/node.h"
  79693. +#include "plugin/plugin.h"
  79694. +#include "txnmgr.h"
  79695. +#include "jnode.h"
  79696. +#include "znode.h"
  79697. +#include "block_alloc.h"
  79698. +#include "tree.h"
  79699. +#include "tree_walk.h"
  79700. +#include "super.h"
  79701. +#include "reiser4.h"
  79702. +
  79703. +#include <linux/pagemap.h>
  79704. +#include <linux/spinlock.h>
  79705. +#include <linux/slab.h>
  79706. +#include <linux/err.h>
  79707. +
  79708. +static z_hash_table *get_htable(reiser4_tree *,
  79709. + const reiser4_block_nr * const blocknr);
  79710. +static z_hash_table *znode_get_htable(const znode *);
  79711. +static void zdrop(znode *);
  79712. +
  79713. +/* hash table support */
  79714. +
  79715. +/* compare two block numbers for equality. Used by hash-table macros */
  79716. +static inline int
  79717. +blknreq(const reiser4_block_nr * b1, const reiser4_block_nr * b2)
  79718. +{
  79719. + assert("nikita-534", b1 != NULL);
  79720. + assert("nikita-535", b2 != NULL);
  79721. +
  79722. + return *b1 == *b2;
  79723. +}
  79724. +
  79725. +/* Hash znode by block number. Used by hash-table macros */
  79726. +/* Audited by: umka (2002.06.11) */
  79727. +static inline __u32
  79728. +blknrhashfn(z_hash_table * table, const reiser4_block_nr * b)
  79729. +{
  79730. + assert("nikita-536", b != NULL);
  79731. +
  79732. + return *b & (REISER4_ZNODE_HASH_TABLE_SIZE - 1);
  79733. +}
  79734. +
  79735. +/* The hash table definition */
  79736. +#define KMALLOC(size) reiser4_vmalloc(size)
  79737. +#define KFREE(ptr, size) vfree(ptr)
  79738. +TYPE_SAFE_HASH_DEFINE(z, znode, reiser4_block_nr, zjnode.key.z, zjnode.link.z,
  79739. + blknrhashfn, blknreq);
  79740. +#undef KFREE
  79741. +#undef KMALLOC
  79742. +
  79743. +/* slab for znodes */
  79744. +static struct kmem_cache *znode_cache;
  79745. +
  79746. +int znode_shift_order;
  79747. +
  79748. +/**
  79749. + * init_znodes - create znode cache
  79750. + *
  79751. + * Initializes slab cache of znodes. It is part of reiser4 module initialization.
  79752. + */
  79753. +int init_znodes(void)
  79754. +{
  79755. + znode_cache = kmem_cache_create("znode", sizeof(znode), 0,
  79756. + SLAB_HWCACHE_ALIGN |
  79757. + SLAB_RECLAIM_ACCOUNT, NULL);
  79758. + if (znode_cache == NULL)
  79759. + return RETERR(-ENOMEM);
  79760. +
  79761. + for (znode_shift_order = 0; (1 << znode_shift_order) < sizeof(znode);
  79762. + ++znode_shift_order);
  79763. + --znode_shift_order;
  79764. + return 0;
  79765. +}
  79766. +
  79767. +/**
  79768. + * done_znodes - delete znode cache
  79769. + *
  79770. + * This is called on reiser4 module unloading or system shutdown.
  79771. + */
  79772. +void done_znodes(void)
  79773. +{
  79774. + destroy_reiser4_cache(&znode_cache);
  79775. +}
  79776. +
  79777. +/* call this to initialise tree of znodes */
  79778. +int znodes_tree_init(reiser4_tree * tree /* tree to initialise znodes for */ )
  79779. +{
  79780. + int result;
  79781. + assert("umka-050", tree != NULL);
  79782. +
  79783. + rwlock_init(&tree->dk_lock);
  79784. +
  79785. + result = z_hash_init(&tree->zhash_table, REISER4_ZNODE_HASH_TABLE_SIZE);
  79786. + if (result != 0)
  79787. + return result;
  79788. + result = z_hash_init(&tree->zfake_table, REISER4_ZNODE_HASH_TABLE_SIZE);
  79789. + return result;
  79790. +}
  79791. +
  79792. +/* free this znode */
  79793. +void zfree(znode * node /* znode to free */ )
  79794. +{
  79795. + assert("nikita-465", node != NULL);
  79796. + assert("nikita-2120", znode_page(node) == NULL);
  79797. + assert("nikita-2301", list_empty_careful(&node->lock.owners));
  79798. + assert("nikita-2302", list_empty_careful(&node->lock.requestors));
  79799. + assert("nikita-2663", (list_empty_careful(&ZJNODE(node)->capture_link) &&
  79800. + NODE_LIST(ZJNODE(node)) == NOT_CAPTURED));
  79801. + assert("nikita-3220", list_empty(&ZJNODE(node)->jnodes));
  79802. + assert("nikita-3293", !znode_is_right_connected(node));
  79803. + assert("nikita-3294", !znode_is_left_connected(node));
  79804. + assert("nikita-3295", node->left == NULL);
  79805. + assert("nikita-3296", node->right == NULL);
  79806. +
  79807. + /* not yet phash_jnode_destroy(ZJNODE(node)); */
  79808. +
  79809. + kmem_cache_free(znode_cache, node);
  79810. +}
  79811. +
  79812. +/* call this to free tree of znodes */
  79813. +void znodes_tree_done(reiser4_tree * tree /* tree to finish with znodes of */ )
  79814. +{
  79815. + znode *node;
  79816. + znode *next;
  79817. + z_hash_table *ztable;
  79818. +
  79819. + /* scan znode hash-tables and kill all znodes, then free hash tables
  79820. + * themselves. */
  79821. +
  79822. + assert("nikita-795", tree != NULL);
  79823. +
  79824. + ztable = &tree->zhash_table;
  79825. +
  79826. + if (ztable->_table != NULL) {
  79827. + for_all_in_htable(ztable, z, node, next) {
  79828. + node->c_count = 0;
  79829. + node->in_parent.node = NULL;
  79830. + assert("nikita-2179", atomic_read(&ZJNODE(node)->x_count) == 0);
  79831. + zdrop(node);
  79832. + }
  79833. +
  79834. + z_hash_done(&tree->zhash_table);
  79835. + }
  79836. +
  79837. + ztable = &tree->zfake_table;
  79838. +
  79839. + if (ztable->_table != NULL) {
  79840. + for_all_in_htable(ztable, z, node, next) {
  79841. + node->c_count = 0;
  79842. + node->in_parent.node = NULL;
  79843. + assert("nikita-2179", atomic_read(&ZJNODE(node)->x_count) == 0);
  79844. + zdrop(node);
  79845. + }
  79846. +
  79847. + z_hash_done(&tree->zfake_table);
  79848. + }
  79849. +}
  79850. +
  79851. +/* ZNODE STRUCTURES */
  79852. +
  79853. +/* allocate fresh znode */
  79854. +znode *zalloc(gfp_t gfp_flag /* allocation flag */ )
  79855. +{
  79856. + znode *node;
  79857. +
  79858. + node = kmem_cache_alloc(znode_cache, gfp_flag);
  79859. + return node;
  79860. +}
  79861. +
  79862. +/* Initialize fields of znode
  79863. + @node: znode to initialize;
  79864. + @parent: parent znode;
  79865. + @tree: tree we are in. */
  79866. +void zinit(znode * node, const znode * parent, reiser4_tree * tree)
  79867. +{
  79868. + assert("nikita-466", node != NULL);
  79869. + assert("umka-268", current_tree != NULL);
  79870. +
  79871. + memset(node, 0, sizeof *node);
  79872. +
  79873. + assert("umka-051", tree != NULL);
  79874. +
  79875. + jnode_init(&node->zjnode, tree, JNODE_FORMATTED_BLOCK);
  79876. + reiser4_init_lock(&node->lock);
  79877. + init_parent_coord(&node->in_parent, parent);
  79878. +}
  79879. +
  79880. +/*
  79881. + * remove znode from indices. This is called jput() when last reference on
  79882. + * znode is released.
  79883. + */
  79884. +void znode_remove(znode * node /* znode to remove */ , reiser4_tree * tree)
  79885. +{
  79886. + assert("nikita-2108", node != NULL);
  79887. + assert("nikita-470", node->c_count == 0);
  79888. + assert_rw_write_locked(&(tree->tree_lock));
  79889. +
  79890. + /* remove reference to this znode from cbk cache */
  79891. + cbk_cache_invalidate(node, tree);
  79892. +
  79893. + /* update c_count of parent */
  79894. + if (znode_parent(node) != NULL) {
  79895. + assert("nikita-472", znode_parent(node)->c_count > 0);
  79896. + /* father, onto your hands I forward my spirit... */
  79897. + znode_parent(node)->c_count--;
  79898. + node->in_parent.node = NULL;
  79899. + } else {
  79900. + /* orphaned znode?! Root? */
  79901. + }
  79902. +
  79903. + /* remove znode from hash-table */
  79904. + z_hash_remove_rcu(znode_get_htable(node), node);
  79905. +}
  79906. +
  79907. +/* zdrop() -- Remove znode from the tree.
  79908. +
  79909. + This is called when znode is removed from the memory. */
  79910. +static void zdrop(znode * node /* znode to finish with */ )
  79911. +{
  79912. + jdrop(ZJNODE(node));
  79913. +}
  79914. +
  79915. +/*
  79916. + * put znode into right place in the hash table. This is called by relocate
  79917. + * code.
  79918. + */
  79919. +int znode_rehash(znode * node /* node to rehash */ ,
  79920. + const reiser4_block_nr * new_block_nr /* new block number */ )
  79921. +{
  79922. + z_hash_table *oldtable;
  79923. + z_hash_table *newtable;
  79924. + reiser4_tree *tree;
  79925. +
  79926. + assert("nikita-2018", node != NULL);
  79927. +
  79928. + tree = znode_get_tree(node);
  79929. + oldtable = znode_get_htable(node);
  79930. + newtable = get_htable(tree, new_block_nr);
  79931. +
  79932. + write_lock_tree(tree);
  79933. + /* remove znode from hash-table */
  79934. + z_hash_remove_rcu(oldtable, node);
  79935. +
  79936. + /* assertion no longer valid due to RCU */
  79937. + /* assert("nikita-2019", z_hash_find(newtable, new_block_nr) == NULL); */
  79938. +
  79939. + /* update blocknr */
  79940. + znode_set_block(node, new_block_nr);
  79941. + node->zjnode.key.z = *new_block_nr;
  79942. +
  79943. + /* insert it into hash */
  79944. + z_hash_insert_rcu(newtable, node);
  79945. + write_unlock_tree(tree);
  79946. + return 0;
  79947. +}
  79948. +
  79949. +/* ZNODE LOOKUP, GET, PUT */
  79950. +
  79951. +/* zlook() - get znode with given block_nr in a hash table or return NULL
  79952. +
  79953. + If result is non-NULL then the znode's x_count is incremented. Internal version
  79954. + accepts pre-computed hash index. The hash table is accessed under caller's
  79955. + tree->hash_lock.
  79956. +*/
  79957. +znode *zlook(reiser4_tree * tree, const reiser4_block_nr * const blocknr)
  79958. +{
  79959. + znode *result;
  79960. + __u32 hash;
  79961. + z_hash_table *htable;
  79962. +
  79963. + assert("jmacd-506", tree != NULL);
  79964. + assert("jmacd-507", blocknr != NULL);
  79965. +
  79966. + htable = get_htable(tree, blocknr);
  79967. + hash = blknrhashfn(htable, blocknr);
  79968. +
  79969. + rcu_read_lock();
  79970. + result = z_hash_find_index(htable, hash, blocknr);
  79971. +
  79972. + if (result != NULL) {
  79973. + add_x_ref(ZJNODE(result));
  79974. + result = znode_rip_check(tree, result);
  79975. + }
  79976. + rcu_read_unlock();
  79977. +
  79978. + return result;
  79979. +}
  79980. +
  79981. +/* return hash table where znode with block @blocknr is (or should be)
  79982. + * stored */
  79983. +static z_hash_table *get_htable(reiser4_tree * tree,
  79984. + const reiser4_block_nr * const blocknr)
  79985. +{
  79986. + z_hash_table *table;
  79987. + if (is_disk_addr_unallocated(blocknr))
  79988. + table = &tree->zfake_table;
  79989. + else
  79990. + table = &tree->zhash_table;
  79991. + return table;
  79992. +}
  79993. +
  79994. +/* return hash table where znode @node is (or should be) stored */
  79995. +static z_hash_table *znode_get_htable(const znode * node)
  79996. +{
  79997. + return get_htable(znode_get_tree(node), znode_get_block(node));
  79998. +}
  79999. +
  80000. +/* zget() - get znode from hash table, allocating it if necessary.
  80001. +
  80002. + First a call to zlook, locating a x-referenced znode if one
  80003. + exists. If znode is not found, allocate new one and return. Result
  80004. + is returned with x_count reference increased.
  80005. +
  80006. + LOCKS TAKEN: TREE_LOCK, ZNODE_LOCK
  80007. + LOCK ORDERING: NONE
  80008. +*/
  80009. +znode *zget(reiser4_tree * tree,
  80010. + const reiser4_block_nr * const blocknr,
  80011. + znode * parent, tree_level level, gfp_t gfp_flag)
  80012. +{
  80013. + znode *result;
  80014. + __u32 hashi;
  80015. +
  80016. + z_hash_table *zth;
  80017. +
  80018. + assert("jmacd-512", tree != NULL);
  80019. + assert("jmacd-513", blocknr != NULL);
  80020. + assert("jmacd-514", level < REISER4_MAX_ZTREE_HEIGHT);
  80021. +
  80022. + zth = get_htable(tree, blocknr);
  80023. + hashi = blknrhashfn(zth, blocknr);
  80024. +
  80025. + /* NOTE-NIKITA address-as-unallocated-blocknr still is not
  80026. + implemented. */
  80027. +
  80028. + z_hash_prefetch_bucket(zth, hashi);
  80029. +
  80030. + rcu_read_lock();
  80031. + /* Find a matching BLOCKNR in the hash table. If the znode is found,
  80032. + we obtain an reference (x_count) but the znode remains unlocked.
  80033. + Have to worry about race conditions later. */
  80034. + result = z_hash_find_index(zth, hashi, blocknr);
  80035. + /* According to the current design, the hash table lock protects new
  80036. + znode references. */
  80037. + if (result != NULL) {
  80038. + add_x_ref(ZJNODE(result));
  80039. + /* NOTE-NIKITA it should be so, but special case during
  80040. + creation of new root makes such assertion highly
  80041. + complicated. */
  80042. + assert("nikita-2131", 1 || znode_parent(result) == parent ||
  80043. + (ZF_ISSET(result, JNODE_ORPHAN)
  80044. + && (znode_parent(result) == NULL)));
  80045. + result = znode_rip_check(tree, result);
  80046. + }
  80047. +
  80048. + rcu_read_unlock();
  80049. +
  80050. + if (!result) {
  80051. + znode *shadow;
  80052. +
  80053. + result = zalloc(gfp_flag);
  80054. + if (!result) {
  80055. + return ERR_PTR(RETERR(-ENOMEM));
  80056. + }
  80057. +
  80058. + zinit(result, parent, tree);
  80059. + ZJNODE(result)->blocknr = *blocknr;
  80060. + ZJNODE(result)->key.z = *blocknr;
  80061. + result->level = level;
  80062. +
  80063. + write_lock_tree(tree);
  80064. +
  80065. + shadow = z_hash_find_index(zth, hashi, blocknr);
  80066. + if (unlikely(shadow != NULL && !ZF_ISSET(shadow, JNODE_RIP))) {
  80067. + jnode_list_remove(ZJNODE(result));
  80068. + zfree(result);
  80069. + result = shadow;
  80070. + } else {
  80071. + result->version = znode_build_version(tree);
  80072. + z_hash_insert_index_rcu(zth, hashi, result);
  80073. +
  80074. + if (parent != NULL)
  80075. + ++parent->c_count;
  80076. + }
  80077. +
  80078. + add_x_ref(ZJNODE(result));
  80079. +
  80080. + write_unlock_tree(tree);
  80081. + }
  80082. +
  80083. + assert("intelfx-6",
  80084. + ergo(!reiser4_blocknr_is_fake(blocknr) && *blocknr != 0,
  80085. + reiser4_check_block(blocknr, 1)));
  80086. +
  80087. + /* Check for invalid tree level, return -EIO */
  80088. + if (unlikely(znode_get_level(result) != level)) {
  80089. + warning("jmacd-504",
  80090. + "Wrong level for cached block %llu: %i expecting %i",
  80091. + (unsigned long long)(*blocknr), znode_get_level(result),
  80092. + level);
  80093. + zput(result);
  80094. + return ERR_PTR(RETERR(-EIO));
  80095. + }
  80096. +
  80097. + assert("nikita-1227", znode_invariant(result));
  80098. +
  80099. + return result;
  80100. +}
  80101. +
  80102. +/* ZNODE PLUGINS/DATA */
  80103. +
  80104. +/* "guess" plugin for node loaded from the disk. Plugin id of node plugin is
  80105. + stored at the fixed offset from the beginning of the node. */
  80106. +static node_plugin *znode_guess_plugin(const znode * node /* znode to guess
  80107. + * plugin of */ )
  80108. +{
  80109. + reiser4_tree *tree;
  80110. +
  80111. + assert("nikita-1053", node != NULL);
  80112. + assert("nikita-1055", zdata(node) != NULL);
  80113. +
  80114. + tree = znode_get_tree(node);
  80115. + assert("umka-053", tree != NULL);
  80116. +
  80117. + if (reiser4_is_set(tree->super, REISER4_ONE_NODE_PLUGIN)) {
  80118. + return tree->nplug;
  80119. + } else {
  80120. + return node_plugin_by_disk_id
  80121. + (tree, &((common_node_header *) zdata(node))->plugin_id);
  80122. +#ifdef GUESS_EXISTS
  80123. + reiser4_plugin *plugin;
  80124. +
  80125. + /* NOTE-NIKITA add locking here when dynamic plugins will be
  80126. + * implemented */
  80127. + for_all_plugins(REISER4_NODE_PLUGIN_TYPE, plugin) {
  80128. + if ((plugin->u.node.guess != NULL)
  80129. + && plugin->u.node.guess(node))
  80130. + return plugin;
  80131. + }
  80132. + warning("nikita-1057", "Cannot guess node plugin");
  80133. + print_znode("node", node);
  80134. + return NULL;
  80135. +#endif
  80136. + }
  80137. +}
  80138. +
  80139. +/* parse node header and install ->node_plugin */
  80140. +int zparse(znode * node /* znode to parse */ )
  80141. +{
  80142. + int result;
  80143. +
  80144. + assert("nikita-1233", node != NULL);
  80145. + assert("nikita-2370", zdata(node) != NULL);
  80146. +
  80147. + if (node->nplug == NULL) {
  80148. + node_plugin *nplug;
  80149. +
  80150. + nplug = znode_guess_plugin(node);
  80151. + if (likely(nplug != NULL)) {
  80152. + result = nplug->parse(node);
  80153. + if (likely(result == 0))
  80154. + node->nplug = nplug;
  80155. + } else {
  80156. + result = RETERR(-EIO);
  80157. + }
  80158. + } else
  80159. + result = 0;
  80160. + return result;
  80161. +}
  80162. +
  80163. +/* zload with readahead */
  80164. +int zload_ra(znode * node /* znode to load */ , ra_info_t * info)
  80165. +{
  80166. + int result;
  80167. +
  80168. + assert("nikita-484", node != NULL);
  80169. + assert("nikita-1377", znode_invariant(node));
  80170. + assert("jmacd-7771", !znode_above_root(node));
  80171. + assert("nikita-2125", atomic_read(&ZJNODE(node)->x_count) > 0);
  80172. + assert("nikita-3016", reiser4_schedulable());
  80173. +
  80174. + if (info)
  80175. + formatted_readahead(node, info);
  80176. +
  80177. + result = jload(ZJNODE(node));
  80178. + assert("nikita-1378", znode_invariant(node));
  80179. + return result;
  80180. +}
  80181. +
  80182. +/* load content of node into memory */
  80183. +int zload(znode *node)
  80184. +{
  80185. + return zload_ra(node, NULL);
  80186. +}
  80187. +
  80188. +/* call node plugin to initialise newly allocated node. */
  80189. +int zinit_new(znode * node /* znode to initialise */ , gfp_t gfp_flags)
  80190. +{
  80191. + return jinit_new(ZJNODE(node), gfp_flags);
  80192. +}
  80193. +
  80194. +/* drop reference to node data. When last reference is dropped, data are
  80195. + unloaded. */
  80196. +void zrelse(znode * node /* znode to release references to */ )
  80197. +{
  80198. + assert("nikita-1381", znode_invariant(node));
  80199. + jrelse(ZJNODE(node));
  80200. +}
  80201. +
  80202. +/* returns free space in node */
  80203. +unsigned znode_free_space(znode * node /* znode to query */ )
  80204. +{
  80205. + assert("nikita-852", node != NULL);
  80206. + return node_plugin_by_node(node)->free_space(node);
  80207. +}
  80208. +
  80209. +/* left delimiting key of znode */
  80210. +reiser4_key *znode_get_rd_key(znode * node /* znode to query */ )
  80211. +{
  80212. + assert("nikita-958", node != NULL);
  80213. + assert_rw_locked(&(znode_get_tree(node)->dk_lock));
  80214. + assert("nikita-3067", LOCK_CNT_GTZ(rw_locked_dk));
  80215. + assert("nikita-30671", node->rd_key_version != 0);
  80216. + return &node->rd_key;
  80217. +}
  80218. +
  80219. +/* right delimiting key of znode */
  80220. +reiser4_key *znode_get_ld_key(znode * node /* znode to query */ )
  80221. +{
  80222. + assert("nikita-974", node != NULL);
  80223. + assert_rw_locked(&(znode_get_tree(node)->dk_lock));
  80224. + assert("nikita-3068", LOCK_CNT_GTZ(rw_locked_dk));
  80225. + assert("nikita-30681", node->ld_key_version != 0);
  80226. + return &node->ld_key;
  80227. +}
  80228. +
  80229. +ON_DEBUG(atomic_t delim_key_version = ATOMIC_INIT(0);
  80230. + )
  80231. +
  80232. +/* update right-delimiting key of @node */
  80233. +reiser4_key *znode_set_rd_key(znode * node, const reiser4_key * key)
  80234. +{
  80235. + assert("nikita-2937", node != NULL);
  80236. + assert("nikita-2939", key != NULL);
  80237. + assert_rw_write_locked(&(znode_get_tree(node)->dk_lock));
  80238. + assert("nikita-3069", LOCK_CNT_GTZ(write_locked_dk));
  80239. + assert("nikita-2944",
  80240. + znode_is_any_locked(node) ||
  80241. + znode_get_level(node) != LEAF_LEVEL ||
  80242. + keyge(key, &node->rd_key) ||
  80243. + keyeq(&node->rd_key, reiser4_min_key()) ||
  80244. + ZF_ISSET(node, JNODE_HEARD_BANSHEE));
  80245. +
  80246. + node->rd_key = *key;
  80247. + ON_DEBUG(node->rd_key_version = atomic_inc_return(&delim_key_version));
  80248. + return &node->rd_key;
  80249. +}
  80250. +
  80251. +/* update left-delimiting key of @node */
  80252. +reiser4_key *znode_set_ld_key(znode * node, const reiser4_key * key)
  80253. +{
  80254. + assert("nikita-2940", node != NULL);
  80255. + assert("nikita-2941", key != NULL);
  80256. + assert_rw_write_locked(&(znode_get_tree(node)->dk_lock));
  80257. + assert("nikita-3070", LOCK_CNT_GTZ(write_locked_dk));
  80258. + assert("nikita-2943",
  80259. + znode_is_any_locked(node) || keyeq(&node->ld_key,
  80260. + reiser4_min_key()));
  80261. +
  80262. + node->ld_key = *key;
  80263. + ON_DEBUG(node->ld_key_version = atomic_inc_return(&delim_key_version));
  80264. + return &node->ld_key;
  80265. +}
  80266. +
  80267. +/* true if @key is inside key range for @node */
  80268. +int znode_contains_key(znode * node /* znode to look in */ ,
  80269. + const reiser4_key * key /* key to look for */ )
  80270. +{
  80271. + assert("nikita-1237", node != NULL);
  80272. + assert("nikita-1238", key != NULL);
  80273. +
  80274. + /* left_delimiting_key <= key <= right_delimiting_key */
  80275. + return keyle(znode_get_ld_key(node), key)
  80276. + && keyle(key, znode_get_rd_key(node));
  80277. +}
  80278. +
  80279. +/* same as znode_contains_key(), but lock dk lock */
  80280. +int znode_contains_key_lock(znode * node /* znode to look in */ ,
  80281. + const reiser4_key * key /* key to look for */ )
  80282. +{
  80283. + int result;
  80284. +
  80285. + assert("umka-056", node != NULL);
  80286. + assert("umka-057", key != NULL);
  80287. +
  80288. + read_lock_dk(znode_get_tree(node));
  80289. + result = znode_contains_key(node, key);
  80290. + read_unlock_dk(znode_get_tree(node));
  80291. + return result;
  80292. +}
  80293. +
  80294. +/* get parent pointer, assuming tree is not locked */
  80295. +znode *znode_parent_nolock(const znode * node /* child znode */ )
  80296. +{
  80297. + assert("nikita-1444", node != NULL);
  80298. + return node->in_parent.node;
  80299. +}
  80300. +
  80301. +/* get parent pointer of znode */
  80302. +znode *znode_parent(const znode * node /* child znode */ )
  80303. +{
  80304. + assert("nikita-1226", node != NULL);
  80305. + assert("nikita-1406", LOCK_CNT_GTZ(rw_locked_tree));
  80306. + return znode_parent_nolock(node);
  80307. +}
  80308. +
  80309. +/* detect uber znode used to protect in-superblock tree root pointer */
  80310. +int znode_above_root(const znode * node /* znode to query */ )
  80311. +{
  80312. + assert("umka-059", node != NULL);
  80313. +
  80314. + return disk_addr_eq(&ZJNODE(node)->blocknr, &UBER_TREE_ADDR);
  80315. +}
  80316. +
  80317. +/* check that @node is root---that its block number is recorder in the tree as
  80318. + that of root node */
  80319. +#if REISER4_DEBUG
  80320. +static int znode_is_true_root(const znode * node /* znode to query */ )
  80321. +{
  80322. + assert("umka-060", node != NULL);
  80323. + assert("umka-061", current_tree != NULL);
  80324. +
  80325. + return disk_addr_eq(znode_get_block(node),
  80326. + &znode_get_tree(node)->root_block);
  80327. +}
  80328. +#endif
  80329. +
  80330. +/* check that @node is root */
  80331. +int znode_is_root(const znode * node /* znode to query */ )
  80332. +{
  80333. + return znode_get_level(node) == znode_get_tree(node)->height;
  80334. +}
  80335. +
  80336. +/* Returns true is @node was just created by zget() and wasn't ever loaded
  80337. + into memory. */
  80338. +/* NIKITA-HANS: yes */
  80339. +int znode_just_created(const znode * node)
  80340. +{
  80341. + assert("nikita-2188", node != NULL);
  80342. + return (znode_page(node) == NULL);
  80343. +}
  80344. +
  80345. +/* obtain updated ->znode_epoch. See seal.c for description. */
  80346. +__u64 znode_build_version(reiser4_tree * tree)
  80347. +{
  80348. + __u64 result;
  80349. +
  80350. + spin_lock(&tree->epoch_lock);
  80351. + result = ++tree->znode_epoch;
  80352. + spin_unlock(&tree->epoch_lock);
  80353. + return result;
  80354. +}
  80355. +
  80356. +void init_load_count(load_count * dh)
  80357. +{
  80358. + assert("nikita-2105", dh != NULL);
  80359. + memset(dh, 0, sizeof *dh);
  80360. +}
  80361. +
  80362. +void done_load_count(load_count * dh)
  80363. +{
  80364. + assert("nikita-2106", dh != NULL);
  80365. + if (dh->node != NULL) {
  80366. + for (; dh->d_ref > 0; --dh->d_ref)
  80367. + zrelse(dh->node);
  80368. + dh->node = NULL;
  80369. + }
  80370. +}
  80371. +
  80372. +static int incr_load_count(load_count * dh)
  80373. +{
  80374. + int result;
  80375. +
  80376. + assert("nikita-2110", dh != NULL);
  80377. + assert("nikita-2111", dh->node != NULL);
  80378. +
  80379. + result = zload(dh->node);
  80380. + if (result == 0)
  80381. + ++dh->d_ref;
  80382. + return result;
  80383. +}
  80384. +
  80385. +int incr_load_count_znode(load_count * dh, znode * node)
  80386. +{
  80387. + assert("nikita-2107", dh != NULL);
  80388. + assert("nikita-2158", node != NULL);
  80389. + assert("nikita-2109",
  80390. + ergo(dh->node != NULL, (dh->node == node) || (dh->d_ref == 0)));
  80391. +
  80392. + dh->node = node;
  80393. + return incr_load_count(dh);
  80394. +}
  80395. +
  80396. +int incr_load_count_jnode(load_count * dh, jnode * node)
  80397. +{
  80398. + if (jnode_is_znode(node)) {
  80399. + return incr_load_count_znode(dh, JZNODE(node));
  80400. + }
  80401. + return 0;
  80402. +}
  80403. +
  80404. +void copy_load_count(load_count * new, load_count * old)
  80405. +{
  80406. + int ret = 0;
  80407. + done_load_count(new);
  80408. + new->node = old->node;
  80409. + new->d_ref = 0;
  80410. +
  80411. + while ((new->d_ref < old->d_ref) && (ret = incr_load_count(new)) == 0) {
  80412. + }
  80413. +
  80414. + assert("jmacd-87589", ret == 0);
  80415. +}
  80416. +
  80417. +void move_load_count(load_count * new, load_count * old)
  80418. +{
  80419. + done_load_count(new);
  80420. + new->node = old->node;
  80421. + new->d_ref = old->d_ref;
  80422. + old->node = NULL;
  80423. + old->d_ref = 0;
  80424. +}
  80425. +
  80426. +/* convert parent pointer into coord */
  80427. +void parent_coord_to_coord(const parent_coord_t * pcoord, coord_t * coord)
  80428. +{
  80429. + assert("nikita-3204", pcoord != NULL);
  80430. + assert("nikita-3205", coord != NULL);
  80431. +
  80432. + coord_init_first_unit_nocheck(coord, pcoord->node);
  80433. + coord_set_item_pos(coord, pcoord->item_pos);
  80434. + coord->between = AT_UNIT;
  80435. +}
  80436. +
  80437. +/* pack coord into parent_coord_t */
  80438. +void coord_to_parent_coord(const coord_t * coord, parent_coord_t * pcoord)
  80439. +{
  80440. + assert("nikita-3206", pcoord != NULL);
  80441. + assert("nikita-3207", coord != NULL);
  80442. +
  80443. + pcoord->node = coord->node;
  80444. + pcoord->item_pos = coord->item_pos;
  80445. +}
  80446. +
  80447. +/* Initialize a parent hint pointer. (parent hint pointer is a field in znode,
  80448. + look for comments there) */
  80449. +void init_parent_coord(parent_coord_t * pcoord, const znode * node)
  80450. +{
  80451. + pcoord->node = (znode *) node;
  80452. + pcoord->item_pos = (unsigned short)~0;
  80453. +}
  80454. +
  80455. +#if REISER4_DEBUG
  80456. +
  80457. +/* debugging aid: znode invariant */
  80458. +static int znode_invariant_f(const znode * node /* znode to check */ ,
  80459. + char const **msg /* where to store error
  80460. + * message, if any */ )
  80461. +{
  80462. +#define _ergo(ant, con) \
  80463. + ((*msg) = "{" #ant "} ergo {" #con "}", ergo((ant), (con)))
  80464. +
  80465. +#define _equi(e1, e2) \
  80466. + ((*msg) = "{" #e1 "} <=> {" #e2 "}", equi((e1), (e2)))
  80467. +
  80468. +#define _check(exp) ((*msg) = #exp, (exp))
  80469. +
  80470. + return jnode_invariant_f(ZJNODE(node), msg) &&
  80471. + /* [znode-fake] invariant */
  80472. + /* fake znode doesn't have a parent, and */
  80473. + _ergo(znode_get_level(node) == 0, znode_parent(node) == NULL) &&
  80474. + /* there is another way to express this very check, and */
  80475. + _ergo(znode_above_root(node), znode_parent(node) == NULL) &&
  80476. + /* it has special block number, and */
  80477. + _ergo(znode_get_level(node) == 0,
  80478. + disk_addr_eq(znode_get_block(node), &UBER_TREE_ADDR)) &&
  80479. + /* it is the only znode with such block number, and */
  80480. + _ergo(!znode_above_root(node) && znode_is_loaded(node),
  80481. + !disk_addr_eq(znode_get_block(node), &UBER_TREE_ADDR)) &&
  80482. + /* it is parent of the tree root node */
  80483. + _ergo(znode_is_true_root(node),
  80484. + znode_above_root(znode_parent(node))) &&
  80485. + /* [znode-level] invariant */
  80486. + /* level of parent znode is one larger than that of child,
  80487. + except for the fake znode, and */
  80488. + _ergo(znode_parent(node) && !znode_above_root(znode_parent(node)),
  80489. + znode_get_level(znode_parent(node)) ==
  80490. + znode_get_level(node) + 1) &&
  80491. + /* left neighbor is at the same level, and */
  80492. + _ergo(znode_is_left_connected(node) && node->left != NULL,
  80493. + znode_get_level(node) == znode_get_level(node->left)) &&
  80494. + /* right neighbor is at the same level */
  80495. + _ergo(znode_is_right_connected(node) && node->right != NULL,
  80496. + znode_get_level(node) == znode_get_level(node->right)) &&
  80497. + /* [znode-connected] invariant */
  80498. + _ergo(node->left != NULL, znode_is_left_connected(node)) &&
  80499. + _ergo(node->right != NULL, znode_is_right_connected(node)) &&
  80500. + _ergo(!znode_is_root(node) && node->left != NULL,
  80501. + znode_is_right_connected(node->left) &&
  80502. + node->left->right == node) &&
  80503. + _ergo(!znode_is_root(node) && node->right != NULL,
  80504. + znode_is_left_connected(node->right) &&
  80505. + node->right->left == node) &&
  80506. + /* [znode-c_count] invariant */
  80507. + /* for any znode, c_count of its parent is greater than 0 */
  80508. + _ergo(znode_parent(node) != NULL &&
  80509. + !znode_above_root(znode_parent(node)),
  80510. + znode_parent(node)->c_count > 0) &&
  80511. + /* leaves don't have children */
  80512. + _ergo(znode_get_level(node) == LEAF_LEVEL,
  80513. + node->c_count == 0) &&
  80514. + _check(node->zjnode.jnodes.prev != NULL) &&
  80515. + _check(node->zjnode.jnodes.next != NULL) &&
  80516. + /* orphan doesn't have a parent */
  80517. + _ergo(ZF_ISSET(node, JNODE_ORPHAN), znode_parent(node) == 0) &&
  80518. + /* [znode-modify] invariant */
  80519. + /* if znode is not write-locked, its checksum remains
  80520. + * invariant */
  80521. + /* unfortunately, zlock is unordered w.r.t. jnode_lock, so we
  80522. + * cannot check this. */
  80523. + /* [znode-refs] invariant */
  80524. + /* only referenced znode can be long-term locked */
  80525. + _ergo(znode_is_locked(node),
  80526. + atomic_read(&ZJNODE(node)->x_count) != 0);
  80527. +}
  80528. +
  80529. +/* debugging aid: check znode invariant and panic if it doesn't hold */
  80530. +int znode_invariant(znode * node /* znode to check */ )
  80531. +{
  80532. + char const *failed_msg;
  80533. + int result;
  80534. +
  80535. + assert("umka-063", node != NULL);
  80536. + assert("umka-064", current_tree != NULL);
  80537. +
  80538. + spin_lock_znode(node);
  80539. + read_lock_tree(znode_get_tree(node));
  80540. + result = znode_invariant_f(node, &failed_msg);
  80541. + if (!result) {
  80542. + /* print_znode("corrupted node", node); */
  80543. + warning("jmacd-555", "Condition %s failed", failed_msg);
  80544. + }
  80545. + read_unlock_tree(znode_get_tree(node));
  80546. + spin_unlock_znode(node);
  80547. + return result;
  80548. +}
  80549. +
  80550. +/* return non-0 iff data are loaded into znode */
  80551. +int znode_is_loaded(const znode * node /* znode to query */ )
  80552. +{
  80553. + assert("nikita-497", node != NULL);
  80554. + return jnode_is_loaded(ZJNODE(node));
  80555. +}
  80556. +
  80557. +unsigned long znode_times_locked(const znode * z)
  80558. +{
  80559. + return z->times_locked;
  80560. +}
  80561. +
  80562. +#endif /* REISER4_DEBUG */
  80563. +
  80564. +/* Make Linus happy.
  80565. + Local variables:
  80566. + c-indentation-style: "K&R"
  80567. + mode-name: "LC"
  80568. + c-basic-offset: 8
  80569. + tab-width: 8
  80570. + fill-column: 120
  80571. + End:
  80572. +*/
  80573. diff -urN --no-dereference linux-4.14.2.orig/fs/reiser4/znode.h linux-4.14.2/fs/reiser4/znode.h
  80574. --- linux-4.14.2.orig/fs/reiser4/znode.h 1970-01-01 01:00:00.000000000 +0100
  80575. +++ linux-4.14.2/fs/reiser4/znode.h 2017-11-26 22:13:09.000000000 +0100
  80576. @@ -0,0 +1,435 @@
  80577. +/* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
  80578. + * reiser4/README */
  80579. +
  80580. +/* Declaration of znode (Zam's node). See znode.c for more details. */
  80581. +
  80582. +#ifndef __ZNODE_H__
  80583. +#define __ZNODE_H__
  80584. +
  80585. +#include "forward.h"
  80586. +#include "debug.h"
  80587. +#include "dformat.h"
  80588. +#include "key.h"
  80589. +#include "coord.h"
  80590. +#include "plugin/node/node.h"
  80591. +#include "jnode.h"
  80592. +#include "lock.h"
  80593. +#include "readahead.h"
  80594. +
  80595. +#include <linux/types.h>
  80596. +#include <linux/spinlock.h>
  80597. +#include <linux/pagemap.h> /* for PAGE_CACHE_SIZE */
  80598. +#include <asm/atomic.h>
  80599. +
  80600. +/* znode tracks its position within parent (internal item in a parent node,
  80601. + * that contains znode's block number). */
  80602. +typedef struct parent_coord {
  80603. + znode *node;
  80604. + pos_in_node_t item_pos;
  80605. +} parent_coord_t;
  80606. +
  80607. +/* &znode - node in a reiser4 tree.
  80608. +
  80609. + NOTE-NIKITA fields in this struct have to be rearranged (later) to reduce
  80610. + cacheline pressure.
  80611. +
  80612. + Locking:
  80613. +
  80614. + Long term: data in a disk node attached to this znode are protected
  80615. + by long term, deadlock aware lock ->lock;
  80616. +
  80617. + Spin lock: the following fields are protected by the spin lock:
  80618. +
  80619. + ->lock
  80620. +
  80621. + Following fields are protected by the global tree lock:
  80622. +
  80623. + ->left
  80624. + ->right
  80625. + ->in_parent
  80626. + ->c_count
  80627. +
  80628. + Following fields are protected by the global delimiting key lock (dk_lock):
  80629. +
  80630. + ->ld_key (to update ->ld_key long-term lock on the node is also required)
  80631. + ->rd_key
  80632. +
  80633. + Following fields are protected by the long term lock:
  80634. +
  80635. + ->nr_items
  80636. +
  80637. + ->node_plugin is never changed once set. This means that after code made
  80638. + itself sure that field is valid it can be accessed without any additional
  80639. + locking.
  80640. +
  80641. + ->level is immutable.
  80642. +
  80643. + Invariants involving this data-type:
  80644. +
  80645. + [znode-fake]
  80646. + [znode-level]
  80647. + [znode-connected]
  80648. + [znode-c_count]
  80649. + [znode-refs]
  80650. + [jnode-refs]
  80651. + [jnode-queued]
  80652. + [znode-modify]
  80653. +
  80654. + For this to be made into a clustering or NUMA filesystem, we would want to eliminate all of the global locks.
  80655. + Suggestions for how to do that are desired.*/
  80656. +struct znode {
  80657. + /* Embedded jnode. */
  80658. + jnode zjnode;
  80659. +
  80660. + /* contains three subfields, node, pos_in_node, and pos_in_unit.
  80661. +
  80662. + pos_in_node and pos_in_unit are only hints that are cached to
  80663. + speed up lookups during balancing. They are not required to be up to
  80664. + date. Synched in find_child_ptr().
  80665. +
  80666. + This value allows us to avoid expensive binary searches.
  80667. +
  80668. + in_parent->node points to the parent of this node, and is NOT a
  80669. + hint.
  80670. + */
  80671. + parent_coord_t in_parent;
  80672. +
  80673. + /*
  80674. + * sibling list pointers
  80675. + */
  80676. +
  80677. + /* left-neighbor */
  80678. + znode *left;
  80679. + /* right-neighbor */
  80680. + znode *right;
  80681. +
  80682. + /* long term lock on node content. This lock supports deadlock
  80683. + detection. See lock.c
  80684. + */
  80685. + zlock lock;
  80686. +
  80687. + /* You cannot remove from memory a node that has children in
  80688. + memory. This is because we rely on the fact that parent of given
  80689. + node can always be reached without blocking for io. When reading a
  80690. + node into memory you must increase the c_count of its parent, when
  80691. + removing it from memory you must decrease the c_count. This makes
  80692. + the code simpler, and the cases where it is suboptimal are truly
  80693. + obscure.
  80694. + */
  80695. + int c_count;
  80696. +
  80697. + /* plugin of node attached to this znode. NULL if znode is not
  80698. + loaded. */
  80699. + node_plugin *nplug;
  80700. +
  80701. + /* version of znode data. This is increased on each modification. This
  80702. + * is necessary to implement seals (see seal.[ch]) efficiently. */
  80703. + __u64 version;
  80704. +
  80705. + /* left delimiting key. Necessary to efficiently perform
  80706. + balancing with node-level locking. Kept in memory only. */
  80707. + reiser4_key ld_key;
  80708. + /* right delimiting key. */
  80709. + reiser4_key rd_key;
  80710. +
  80711. + /* znode's tree level */
  80712. + __u16 level;
  80713. + /* number of items in this node. This field is modified by node
  80714. + * plugin. */
  80715. + __u16 nr_items;
  80716. +
  80717. +#if REISER4_DEBUG
  80718. + void *creator;
  80719. + reiser4_key first_key;
  80720. + unsigned long times_locked;
  80721. + int left_version; /* when node->left was updated */
  80722. + int right_version; /* when node->right was updated */
  80723. + int ld_key_version; /* when node->ld_key was updated */
  80724. + int rd_key_version; /* when node->rd_key was updated */
  80725. +#endif
  80726. +
  80727. +} __attribute__ ((aligned(16)));
  80728. +
  80729. +ON_DEBUG(extern atomic_t delim_key_version;
  80730. + )
  80731. +
  80732. +/* In general I think these macros should not be exposed. */
  80733. +#define znode_is_locked(node) (lock_is_locked(&node->lock))
  80734. +#define znode_is_rlocked(node) (lock_is_rlocked(&node->lock))
  80735. +#define znode_is_wlocked(node) (lock_is_wlocked(&node->lock))
  80736. +#define znode_is_wlocked_once(node) (lock_is_wlocked_once(&node->lock))
  80737. +#define znode_can_be_rlocked(node) (lock_can_be_rlocked(&node->lock))
  80738. +#define is_lock_compatible(node, mode) (lock_mode_compatible(&node->lock, mode))
  80739. +/* Macros for accessing the znode state. */
  80740. +#define ZF_CLR(p,f) JF_CLR (ZJNODE(p), (f))
  80741. +#define ZF_ISSET(p,f) JF_ISSET(ZJNODE(p), (f))
  80742. +#define ZF_SET(p,f) JF_SET (ZJNODE(p), (f))
  80743. +extern znode *zget(reiser4_tree * tree, const reiser4_block_nr * const block,
  80744. + znode * parent, tree_level level, gfp_t gfp_flag);
  80745. +extern znode *zlook(reiser4_tree * tree, const reiser4_block_nr * const block);
  80746. +extern int zload(znode * node);
  80747. +extern int zload_ra(znode * node, ra_info_t * info);
  80748. +extern int zinit_new(znode * node, gfp_t gfp_flags);
  80749. +extern void zrelse(znode * node);
  80750. +extern void znode_change_parent(znode * new_parent, reiser4_block_nr * block);
  80751. +extern void znode_update_csum(znode *node);
  80752. +
  80753. +/* size of data in znode */
  80754. +static inline unsigned
  80755. +znode_size(const znode * node UNUSED_ARG /* znode to query */ )
  80756. +{
  80757. + assert("nikita-1416", node != NULL);
  80758. + return PAGE_SIZE;
  80759. +}
  80760. +
  80761. +extern void parent_coord_to_coord(const parent_coord_t * pcoord,
  80762. + coord_t * coord);
  80763. +extern void coord_to_parent_coord(const coord_t * coord,
  80764. + parent_coord_t * pcoord);
  80765. +extern void init_parent_coord(parent_coord_t * pcoord, const znode * node);
  80766. +
  80767. +extern unsigned znode_free_space(znode * node);
  80768. +
  80769. +extern reiser4_key *znode_get_rd_key(znode * node);
  80770. +extern reiser4_key *znode_get_ld_key(znode * node);
  80771. +
  80772. +extern reiser4_key *znode_set_rd_key(znode * node, const reiser4_key * key);
  80773. +extern reiser4_key *znode_set_ld_key(znode * node, const reiser4_key * key);
  80774. +
  80775. +/* `connected' state checks */
  80776. +static inline int znode_is_right_connected(const znode * node)
  80777. +{
  80778. + return ZF_ISSET(node, JNODE_RIGHT_CONNECTED);
  80779. +}
  80780. +
  80781. +static inline int znode_is_left_connected(const znode * node)
  80782. +{
  80783. + return ZF_ISSET(node, JNODE_LEFT_CONNECTED);
  80784. +}
  80785. +
  80786. +static inline int znode_is_connected(const znode * node)
  80787. +{
  80788. + return znode_is_right_connected(node) && znode_is_left_connected(node);
  80789. +}
  80790. +
  80791. +extern int znode_shift_order;
  80792. +extern int znode_rehash(znode * node, const reiser4_block_nr * new_block_nr);
  80793. +extern void znode_remove(znode *, reiser4_tree *);
  80794. +extern znode *znode_parent(const znode * node);
  80795. +extern znode *znode_parent_nolock(const znode * node);
  80796. +extern int znode_above_root(const znode * node);
  80797. +extern int init_znode(jnode *node);
  80798. +extern int init_znodes(void);
  80799. +extern void done_znodes(void);
  80800. +extern int znodes_tree_init(reiser4_tree * ztree);
  80801. +extern void znodes_tree_done(reiser4_tree * ztree);
  80802. +extern int znode_contains_key(znode * node, const reiser4_key * key);
  80803. +extern int znode_contains_key_lock(znode * node, const reiser4_key * key);
  80804. +extern unsigned znode_save_free_space(znode * node);
  80805. +extern unsigned znode_recover_free_space(znode * node);
  80806. +extern znode *zalloc(gfp_t gfp_flag);
  80807. +extern void zinit(znode *, const znode * parent, reiser4_tree *);
  80808. +extern int zparse(znode * node);
  80809. +
  80810. +extern int znode_just_created(const znode * node);
  80811. +
  80812. +extern void zfree(znode * node);
  80813. +
  80814. +#if REISER4_DEBUG
  80815. +extern void print_znode(const char *prefix, const znode * node);
  80816. +#else
  80817. +#define print_znode( p, n ) noop
  80818. +#endif
  80819. +
  80820. +/* Make it look like various znode functions exist instead of treating znodes as
  80821. + jnodes in znode-specific code. */
  80822. +#define znode_page(x) jnode_page ( ZJNODE(x) )
  80823. +#define zdata(x) jdata ( ZJNODE(x) )
  80824. +#define znode_get_block(x) jnode_get_block ( ZJNODE(x) )
  80825. +#define znode_created(x) jnode_created ( ZJNODE(x) )
  80826. +#define znode_set_created(x) jnode_set_created ( ZJNODE(x) )
  80827. +#define znode_convertible(x) jnode_convertible (ZJNODE(x))
  80828. +#define znode_set_convertible(x) jnode_set_convertible (ZJNODE(x))
  80829. +
  80830. +#define znode_is_dirty(x) jnode_is_dirty ( ZJNODE(x) )
  80831. +#define znode_check_dirty(x) jnode_check_dirty ( ZJNODE(x) )
  80832. +#define znode_make_clean(x) jnode_make_clean ( ZJNODE(x) )
  80833. +#define znode_set_block(x, b) jnode_set_block ( ZJNODE(x), (b) )
  80834. +
  80835. +#define spin_lock_znode(x) spin_lock_jnode ( ZJNODE(x) )
  80836. +#define spin_unlock_znode(x) spin_unlock_jnode ( ZJNODE(x) )
  80837. +#define spin_trylock_znode(x) spin_trylock_jnode ( ZJNODE(x) )
  80838. +#define spin_znode_is_locked(x) spin_jnode_is_locked ( ZJNODE(x) )
  80839. +#define spin_znode_is_not_locked(x) spin_jnode_is_not_locked ( ZJNODE(x) )
  80840. +
  80841. +#if REISER4_DEBUG
  80842. +extern int znode_x_count_is_protected(const znode * node);
  80843. +extern int znode_invariant(znode * node);
  80844. +#endif
  80845. +
  80846. +/* acquire reference to @node */
  80847. +static inline znode *zref(znode * node)
  80848. +{
  80849. + /* change of x_count from 0 to 1 is protected by tree spin-lock */
  80850. + return JZNODE(jref(ZJNODE(node)));
  80851. +}
  80852. +
  80853. +/* release reference to @node */
  80854. +static inline void zput(znode * node)
  80855. +{
  80856. + assert("nikita-3564", znode_invariant(node));
  80857. + jput(ZJNODE(node));
  80858. +}
  80859. +
  80860. +/* get the level field for a znode */
  80861. +static inline tree_level znode_get_level(const znode * node)
  80862. +{
  80863. + return node->level;
  80864. +}
  80865. +
  80866. +/* get the level field for a jnode */
  80867. +static inline tree_level jnode_get_level(const jnode * node)
  80868. +{
  80869. + if (jnode_is_znode(node))
  80870. + return znode_get_level(JZNODE(node));
  80871. + else
  80872. + /* unformatted nodes are all at the LEAF_LEVEL and for
  80873. + "semi-formatted" nodes like bitmaps, level doesn't matter. */
  80874. + return LEAF_LEVEL;
  80875. +}
  80876. +
  80877. +/* true if jnode is on leaf level */
  80878. +static inline int jnode_is_leaf(const jnode * node)
  80879. +{
  80880. + if (jnode_is_znode(node))
  80881. + return (znode_get_level(JZNODE(node)) == LEAF_LEVEL);
  80882. + if (jnode_get_type(node) == JNODE_UNFORMATTED_BLOCK)
  80883. + return 1;
  80884. + return 0;
  80885. +}
  80886. +
  80887. +/* return znode's tree */
  80888. +static inline reiser4_tree *znode_get_tree(const znode * node)
  80889. +{
  80890. + assert("nikita-2692", node != NULL);
  80891. + return jnode_get_tree(ZJNODE(node));
  80892. +}
  80893. +
  80894. +/* resolve race with zput */
  80895. +static inline znode *znode_rip_check(reiser4_tree * tree, znode * node)
  80896. +{
  80897. + jnode *j;
  80898. +
  80899. + j = jnode_rip_sync(tree, ZJNODE(node));
  80900. + if (likely(j != NULL))
  80901. + node = JZNODE(j);
  80902. + else
  80903. + node = NULL;
  80904. + return node;
  80905. +}
  80906. +
  80907. +#if defined(REISER4_DEBUG)
  80908. +int znode_is_loaded(const znode * node /* znode to query */ );
  80909. +#endif
  80910. +
  80911. +extern __u64 znode_build_version(reiser4_tree * tree);
  80912. +
  80913. +/* Data-handles. A data handle object manages pairing calls to zload() and zrelse(). We
  80914. + must load the data for a node in many places. We could do this by simply calling
  80915. + zload() everywhere, the difficulty arises when we must release the loaded data by
  80916. + calling zrelse. In a function with many possible error/return paths, it requires extra
  80917. + work to figure out which exit paths must call zrelse and those which do not. The data
  80918. + handle automatically calls zrelse for every zload that it is responsible for. In that
  80919. + sense, it acts much like a lock_handle.
  80920. +*/
  80921. +typedef struct load_count {
  80922. + znode *node;
  80923. + int d_ref;
  80924. +} load_count;
  80925. +
  80926. +extern void init_load_count(load_count * lc); /* Initialize a load_count set the current node to NULL. */
  80927. +extern void done_load_count(load_count * dh); /* Finalize a load_count: call zrelse() if necessary */
  80928. +extern int incr_load_count_znode(load_count * dh, znode * node); /* Set the argument znode to the current node, call zload(). */
  80929. +extern int incr_load_count_jnode(load_count * dh, jnode * node); /* If the argument jnode is formatted, do the same as
  80930. + * incr_load_count_znode, otherwise do nothing (unformatted nodes
  80931. + * don't require zload/zrelse treatment). */
  80932. +extern void move_load_count(load_count * new, load_count * old); /* Move the contents of a load_count. Old handle is released. */
  80933. +extern void copy_load_count(load_count * new, load_count * old); /* Copy the contents of a load_count. Old handle remains held. */
  80934. +
  80935. +/* Variable initializers for load_count. */
  80936. +#define INIT_LOAD_COUNT ( load_count * ){ .node = NULL, .d_ref = 0 }
  80937. +#define INIT_LOAD_COUNT_NODE( n ) ( load_count ){ .node = ( n ), .d_ref = 0 }
  80938. +/* A convenience macro for use in assertions or debug-only code, where loaded
  80939. + data is only required to perform the debugging check. This macro
  80940. + encapsulates an expression inside a pair of calls to zload()/zrelse(). */
  80941. +#define WITH_DATA( node, exp ) \
  80942. +({ \
  80943. + long __with_dh_result; \
  80944. + znode *__with_dh_node; \
  80945. + \
  80946. + __with_dh_node = ( node ); \
  80947. + __with_dh_result = zload( __with_dh_node ); \
  80948. + if( __with_dh_result == 0 ) { \
  80949. + __with_dh_result = ( long )( exp ); \
  80950. + zrelse( __with_dh_node ); \
  80951. + } \
  80952. + __with_dh_result; \
  80953. +})
  80954. +
  80955. +/* Same as above, but accepts a return value in case zload fails. */
  80956. +#define WITH_DATA_RET( node, ret, exp ) \
  80957. +({ \
  80958. + int __with_dh_result; \
  80959. + znode *__with_dh_node; \
  80960. + \
  80961. + __with_dh_node = ( node ); \
  80962. + __with_dh_result = zload( __with_dh_node ); \
  80963. + if( __with_dh_result == 0 ) { \
  80964. + __with_dh_result = ( int )( exp ); \
  80965. + zrelse( __with_dh_node ); \
  80966. + } else \
  80967. + __with_dh_result = ( ret ); \
  80968. + __with_dh_result; \
  80969. +})
  80970. +
  80971. +#define WITH_COORD(coord, exp) \
  80972. +({ \
  80973. + coord_t *__coord; \
  80974. + \
  80975. + __coord = (coord); \
  80976. + coord_clear_iplug(__coord); \
  80977. + WITH_DATA(__coord->node, exp); \
  80978. +})
  80979. +
  80980. +#if REISER4_DEBUG
  80981. +#define STORE_COUNTERS \
  80982. + reiser4_lock_cnt_info __entry_counters = \
  80983. + *reiser4_lock_counters()
  80984. +#define CHECK_COUNTERS \
  80985. +ON_DEBUG_CONTEXT( \
  80986. +({ \
  80987. + __entry_counters.x_refs = reiser4_lock_counters() -> x_refs; \
  80988. + __entry_counters.t_refs = reiser4_lock_counters() -> t_refs; \
  80989. + __entry_counters.d_refs = reiser4_lock_counters() -> d_refs; \
  80990. + assert("nikita-2159", \
  80991. + !memcmp(&__entry_counters, reiser4_lock_counters(), \
  80992. + sizeof __entry_counters)); \
  80993. +}) )
  80994. +
  80995. +#else
  80996. +#define STORE_COUNTERS
  80997. +#define CHECK_COUNTERS noop
  80998. +#endif
  80999. +
  81000. +/* __ZNODE_H__ */
  81001. +#endif
  81002. +
  81003. +/* Make Linus happy.
  81004. + Local variables:
  81005. + c-indentation-style: "K&R"
  81006. + mode-name: "LC"
  81007. + c-basic-offset: 8
  81008. + tab-width: 8
  81009. + fill-column: 120
  81010. + End:
  81011. +*/
  81012. diff -urN --no-dereference linux-4.14.2.orig/include/linux/fs.h linux-4.14.2/include/linux/fs.h
  81013. --- linux-4.14.2.orig/include/linux/fs.h 2017-11-27 23:32:41.000000000 +0100
  81014. +++ linux-4.14.2/include/linux/fs.h 2017-11-26 22:13:09.000000000 +0100
  81015. @@ -228,7 +228,7 @@
  81016. */
  81017. #define FILESYSTEM_MAX_STACK_DEPTH 2
  81018. -/**
  81019. +/**
  81020. * enum positive_aop_returns - aop return codes with specific semantics
  81021. *
  81022. * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has
  81023. @@ -238,7 +238,7 @@
  81024. * be a candidate for writeback again in the near
  81025. * future. Other callers must be careful to unlock
  81026. * the page if they get this return. Returned by
  81027. - * writepage();
  81028. + * writepage();
  81029. *
  81030. * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has
  81031. * unlocked it and the page might have been truncated.
  81032. @@ -270,6 +270,8 @@
  81033. struct page;
  81034. struct address_space;
  81035. struct writeback_control;
  81036. +struct wb_writeback_work;
  81037. +struct bdi_writeback;
  81038. /*
  81039. * Write life time hint values.
  81040. @@ -1817,6 +1819,14 @@
  81041. int (*remount_fs) (struct super_block *, int *, char *);
  81042. void (*umount_begin) (struct super_block *);
  81043. + long (*writeback_inodes)(struct super_block *sb,
  81044. + struct bdi_writeback *wb,
  81045. + struct writeback_control *wbc,
  81046. + struct wb_writeback_work *work,
  81047. + bool flush_all);
  81048. + void (*sync_inodes) (struct super_block *sb,
  81049. + struct writeback_control *wbc);
  81050. +
  81051. int (*show_options)(struct seq_file *, struct dentry *);
  81052. int (*show_devname)(struct seq_file *, struct dentry *);
  81053. int (*show_path)(struct seq_file *, struct dentry *);
  81054. @@ -2567,6 +2577,13 @@
  81055. extern int invalidate_inode_pages2_range(struct address_space *mapping,
  81056. pgoff_t start, pgoff_t end);
  81057. extern int write_inode_now(struct inode *, int);
  81058. +extern void writeback_skip_sb_inodes(struct super_block *sb,
  81059. + struct bdi_writeback *wb);
  81060. +extern long generic_writeback_sb_inodes(struct super_block *sb,
  81061. + struct bdi_writeback *wb,
  81062. + struct writeback_control *wbc,
  81063. + struct wb_writeback_work *work,
  81064. + bool flush_all);
  81065. extern int filemap_fdatawrite(struct address_space *);
  81066. extern int filemap_flush(struct address_space *);
  81067. extern int filemap_fdatawait_keep_errors(struct address_space *mapping);
  81068. @@ -2824,7 +2841,7 @@
  81069. extern ssize_t kernel_write(struct file *, const void *, size_t, loff_t *);
  81070. extern ssize_t __kernel_write(struct file *, const void *, size_t, loff_t *);
  81071. extern struct file * open_exec(const char *);
  81072. -
  81073. +
  81074. /* fs/dcache.c -- generic fs support functions */
  81075. extern bool is_subdir(struct dentry *, struct dentry *);
  81076. extern bool path_is_under(const struct path *, const struct path *);
  81077. @@ -2834,6 +2851,8 @@
  81078. #include <linux/err.h>
  81079. /* needed for stackable file system support */
  81080. +extern loff_t default_llseek_unlocked(struct file *file, loff_t offset,
  81081. + int whence);
  81082. extern loff_t default_llseek(struct file *file, loff_t offset, int whence);
  81083. extern loff_t vfs_llseek(struct file *file, loff_t offset, int whence);
  81084. @@ -2916,6 +2935,8 @@
  81085. extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *);
  81086. extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t);
  81087. +ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len,
  81088. + loff_t *ppos);
  81089. ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
  81090. rwf_t flags);
  81091. ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
  81092. diff -urN --no-dereference linux-4.14.2.orig/include/linux/mm.h linux-4.14.2/include/linux/mm.h
  81093. --- linux-4.14.2.orig/include/linux/mm.h 2017-11-27 23:32:41.000000000 +0100
  81094. +++ linux-4.14.2/include/linux/mm.h 2017-11-26 22:13:09.000000000 +0100
  81095. @@ -1431,6 +1431,7 @@
  81096. struct bdi_writeback *wb);
  81097. int set_page_dirty(struct page *page);
  81098. int set_page_dirty_lock(struct page *page);
  81099. +int set_page_dirty_notag(struct page *page);
  81100. void cancel_dirty_page(struct page *page);
  81101. int clear_page_dirty_for_io(struct page *page);
  81102. diff -urN --no-dereference linux-4.14.2.orig/include/linux/sched.h linux-4.14.2/include/linux/sched.h
  81103. --- linux-4.14.2.orig/include/linux/sched.h 2017-11-27 23:32:41.000000000 +0100
  81104. +++ linux-4.14.2/include/linux/sched.h 2017-11-26 22:13:09.000000000 +0100
  81105. @@ -1292,6 +1292,7 @@
  81106. /*
  81107. * Per process flags
  81108. */
  81109. +#define PF_FLUSHER 0x00000001 /* responsible for disk writeback */
  81110. #define PF_IDLE 0x00000002 /* I am an IDLE thread */
  81111. #define PF_EXITING 0x00000004 /* Getting shut down */
  81112. #define PF_EXITPIDONE 0x00000008 /* PI exit done on shut down */
  81113. diff -urN --no-dereference linux-4.14.2.orig/include/linux/writeback.h linux-4.14.2/include/linux/writeback.h
  81114. --- linux-4.14.2.orig/include/linux/writeback.h 2017-11-27 23:32:41.000000000 +0100
  81115. +++ linux-4.14.2/include/linux/writeback.h 2017-11-26 22:13:09.000000000 +0100
  81116. @@ -16,6 +16,12 @@
  81117. DECLARE_PER_CPU(int, dirty_throttle_leaks);
  81118. +static inline int is_flush_bd_task(struct task_struct *task)
  81119. +{
  81120. + return task->flags & PF_FLUSHER;
  81121. +}
  81122. +#define current_is_flush_bd_task() is_flush_bd_task(current)
  81123. +
  81124. /*
  81125. * The 1/4 region under the global dirty thresh is for smooth dirty throttling:
  81126. *
  81127. @@ -180,8 +186,28 @@
  81128. }
  81129. /*
  81130. + * Passed into wb_writeback(), essentially a subset of writeback_control
  81131. + */
  81132. +struct wb_writeback_work {
  81133. + long nr_pages;
  81134. + struct super_block *sb;
  81135. + unsigned long *older_than_this;
  81136. + enum writeback_sync_modes sync_mode;
  81137. + unsigned int tagged_writepages:1;
  81138. + unsigned int for_kupdate:1;
  81139. + unsigned int range_cyclic:1;
  81140. + unsigned int for_background:1;
  81141. + unsigned int for_sync:1; /* sync(2) WB_SYNC_ALL writeback */
  81142. + unsigned int auto_free:1; /* free on completion */
  81143. + enum wb_reason reason; /* why was writeback initiated? */
  81144. +
  81145. + struct list_head list; /* pending work list */
  81146. + struct wb_completion *done; /* set if the caller waits */
  81147. +};
  81148. +
  81149. +/*
  81150. * fs/fs-writeback.c
  81151. - */
  81152. + */
  81153. struct bdi_writeback;
  81154. void writeback_inodes_sb(struct super_block *, enum wb_reason reason);
  81155. void writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
  81156. diff -urN --no-dereference linux-4.14.2.orig/mm/filemap.c linux-4.14.2/mm/filemap.c
  81157. --- linux-4.14.2.orig/mm/filemap.c 2017-11-27 23:32:41.000000000 +0100
  81158. +++ linux-4.14.2/mm/filemap.c 2017-11-27 23:18:03.000000000 +0100
  81159. @@ -1673,6 +1673,7 @@
  81160. return ret;
  81161. }
  81162. +EXPORT_SYMBOL(find_get_pages_range);
  81163. /**
  81164. * find_get_pages_contig - gang contiguous pagecache lookup
  81165. diff -urN --no-dereference linux-4.14.2.orig/mm/page-writeback.c linux-4.14.2/mm/page-writeback.c
  81166. --- linux-4.14.2.orig/mm/page-writeback.c 2017-11-27 23:32:41.000000000 +0100
  81167. +++ linux-4.14.2/mm/page-writeback.c 2017-11-26 22:13:09.000000000 +0100
  81168. @@ -2530,6 +2530,35 @@
  81169. EXPORT_SYMBOL(account_page_redirty);
  81170. /*
  81171. + * set_page_dirty_notag() -- similar to __set_page_dirty_nobuffers()
  81172. + * except it doesn't tag the page dirty in the page-cache radix tree.
  81173. + * This means that the address space using this cannot use the regular
  81174. + * filemap ->writepages() helpers and must provide its own means of
  81175. + * tracking and finding non-tagged dirty pages.
  81176. + *
  81177. + * NOTE: furthermore, this version also doesn't handle truncate races.
  81178. + */
  81179. +int set_page_dirty_notag(struct page *page)
  81180. +{
  81181. + struct address_space *mapping = page->mapping;
  81182. +
  81183. + lock_page_memcg(page);
  81184. + if (!TestSetPageDirty(page)) {
  81185. + unsigned long flags;
  81186. + WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
  81187. + local_irq_save(flags);
  81188. + account_page_dirtied(page, mapping);
  81189. + local_irq_restore(flags);
  81190. + unlock_page_memcg(page);
  81191. + __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
  81192. + return 1;
  81193. + }
  81194. + unlock_page_memcg(page);
  81195. + return 0;
  81196. +}
  81197. +EXPORT_SYMBOL(set_page_dirty_notag);
  81198. +
  81199. +/*
  81200. * When a writepage implementation decides that it doesn't want to write this
  81201. * page for some reason, it should redirty the locked page via
  81202. * redirty_page_for_writepage() and it should then unlock the page and return 0
  81203. diff -urN --no-dereference linux-4.14.2.orig/mm/vmscan.c linux-4.14.2/mm/vmscan.c
  81204. --- linux-4.14.2.orig/mm/vmscan.c 2017-11-27 23:32:41.000000000 +0100
  81205. +++ linux-4.14.2/mm/vmscan.c 2017-11-26 22:13:09.000000000 +0100
  81206. @@ -2847,7 +2847,11 @@
  81207. pg_data_t *last_pgdat;
  81208. struct zoneref *z;
  81209. struct zone *zone;
  81210. + void *saved;
  81211. retry:
  81212. + saved = current->journal_info; /* save journal info */
  81213. + current->journal_info = NULL;
  81214. +
  81215. delayacct_freepages_start();
  81216. if (global_reclaim(sc))
  81217. @@ -2883,6 +2887,8 @@
  81218. }
  81219. delayacct_freepages_end();
  81220. + /* restore journal info */
  81221. + current->journal_info = saved;
  81222. if (sc->nr_reclaimed)
  81223. return sc->nr_reclaimed;