stree.c 64 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265
  1. /*
  2. * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
  3. */
  4. /*
  5. * Written by Anatoly P. Pinchuk pap@namesys.botik.ru
  6. * Programm System Institute
  7. * Pereslavl-Zalessky Russia
  8. */
  9. #include <linux/time.h>
  10. #include <linux/string.h>
  11. #include <linux/pagemap.h>
  12. #include <linux/bio.h>
  13. #include "reiserfs.h"
  14. #include <linux/buffer_head.h>
  15. #include <linux/quotaops.h>
  16. /* Does the buffer contain a disk block which is in the tree. */
  17. inline int B_IS_IN_TREE(const struct buffer_head *bh)
  18. {
  19. RFALSE(B_LEVEL(bh) > MAX_HEIGHT,
  20. "PAP-1010: block (%b) has too big level (%z)", bh, bh);
  21. return (B_LEVEL(bh) != FREE_LEVEL);
  22. }
  23. /* to get item head in le form */
  24. inline void copy_item_head(struct item_head *to,
  25. const struct item_head *from)
  26. {
  27. memcpy(to, from, IH_SIZE);
  28. }
  29. /*
  30. * k1 is pointer to on-disk structure which is stored in little-endian
  31. * form. k2 is pointer to cpu variable. For key of items of the same
  32. * object this returns 0.
  33. * Returns: -1 if key1 < key2
  34. * 0 if key1 == key2
  35. * 1 if key1 > key2
  36. */
  37. inline int comp_short_keys(const struct reiserfs_key *le_key,
  38. const struct cpu_key *cpu_key)
  39. {
  40. __u32 n;
  41. n = le32_to_cpu(le_key->k_dir_id);
  42. if (n < cpu_key->on_disk_key.k_dir_id)
  43. return -1;
  44. if (n > cpu_key->on_disk_key.k_dir_id)
  45. return 1;
  46. n = le32_to_cpu(le_key->k_objectid);
  47. if (n < cpu_key->on_disk_key.k_objectid)
  48. return -1;
  49. if (n > cpu_key->on_disk_key.k_objectid)
  50. return 1;
  51. return 0;
  52. }
  53. /*
  54. * k1 is pointer to on-disk structure which is stored in little-endian
  55. * form. k2 is pointer to cpu variable.
  56. * Compare keys using all 4 key fields.
  57. * Returns: -1 if key1 < key2 0
  58. * if key1 = key2 1 if key1 > key2
  59. */
  60. static inline int comp_keys(const struct reiserfs_key *le_key,
  61. const struct cpu_key *cpu_key)
  62. {
  63. int retval;
  64. retval = comp_short_keys(le_key, cpu_key);
  65. if (retval)
  66. return retval;
  67. if (le_key_k_offset(le_key_version(le_key), le_key) <
  68. cpu_key_k_offset(cpu_key))
  69. return -1;
  70. if (le_key_k_offset(le_key_version(le_key), le_key) >
  71. cpu_key_k_offset(cpu_key))
  72. return 1;
  73. if (cpu_key->key_length == 3)
  74. return 0;
  75. /* this part is needed only when tail conversion is in progress */
  76. if (le_key_k_type(le_key_version(le_key), le_key) <
  77. cpu_key_k_type(cpu_key))
  78. return -1;
  79. if (le_key_k_type(le_key_version(le_key), le_key) >
  80. cpu_key_k_type(cpu_key))
  81. return 1;
  82. return 0;
  83. }
  84. inline int comp_short_le_keys(const struct reiserfs_key *key1,
  85. const struct reiserfs_key *key2)
  86. {
  87. __u32 *k1_u32, *k2_u32;
  88. int key_length = REISERFS_SHORT_KEY_LEN;
  89. k1_u32 = (__u32 *) key1;
  90. k2_u32 = (__u32 *) key2;
  91. for (; key_length--; ++k1_u32, ++k2_u32) {
  92. if (le32_to_cpu(*k1_u32) < le32_to_cpu(*k2_u32))
  93. return -1;
  94. if (le32_to_cpu(*k1_u32) > le32_to_cpu(*k2_u32))
  95. return 1;
  96. }
  97. return 0;
  98. }
  99. inline void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from)
  100. {
  101. int version;
  102. to->on_disk_key.k_dir_id = le32_to_cpu(from->k_dir_id);
  103. to->on_disk_key.k_objectid = le32_to_cpu(from->k_objectid);
  104. /* find out version of the key */
  105. version = le_key_version(from);
  106. to->version = version;
  107. to->on_disk_key.k_offset = le_key_k_offset(version, from);
  108. to->on_disk_key.k_type = le_key_k_type(version, from);
  109. }
  110. /*
  111. * this does not say which one is bigger, it only returns 1 if keys
  112. * are not equal, 0 otherwise
  113. */
  114. inline int comp_le_keys(const struct reiserfs_key *k1,
  115. const struct reiserfs_key *k2)
  116. {
  117. return memcmp(k1, k2, sizeof(struct reiserfs_key));
  118. }
  119. /**************************************************************************
  120. * Binary search toolkit function *
  121. * Search for an item in the array by the item key *
  122. * Returns: 1 if found, 0 if not found; *
  123. * *pos = number of the searched element if found, else the *
  124. * number of the first element that is larger than key. *
  125. **************************************************************************/
  126. /*
  127. * For those not familiar with binary search: lbound is the leftmost item
  128. * that it could be, rbound the rightmost item that it could be. We examine
  129. * the item halfway between lbound and rbound, and that tells us either
  130. * that we can increase lbound, or decrease rbound, or that we have found it,
  131. * or if lbound <= rbound that there are no possible items, and we have not
  132. * found it. With each examination we cut the number of possible items it
  133. * could be by one more than half rounded down, or we find it.
  134. */
  135. static inline int bin_search(const void *key, /* Key to search for. */
  136. const void *base, /* First item in the array. */
  137. int num, /* Number of items in the array. */
  138. /*
  139. * Item size in the array. searched. Lest the
  140. * reader be confused, note that this is crafted
  141. * as a general function, and when it is applied
  142. * specifically to the array of item headers in a
  143. * node, width is actually the item header size
  144. * not the item size.
  145. */
  146. int width,
  147. int *pos /* Number of the searched for element. */
  148. )
  149. {
  150. int rbound, lbound, j;
  151. for (j = ((rbound = num - 1) + (lbound = 0)) / 2;
  152. lbound <= rbound; j = (rbound + lbound) / 2)
  153. switch (comp_keys
  154. ((struct reiserfs_key *)((char *)base + j * width),
  155. (struct cpu_key *)key)) {
  156. case -1:
  157. lbound = j + 1;
  158. continue;
  159. case 1:
  160. rbound = j - 1;
  161. continue;
  162. case 0:
  163. *pos = j;
  164. return ITEM_FOUND; /* Key found in the array. */
  165. }
  166. /*
  167. * bin_search did not find given key, it returns position of key,
  168. * that is minimal and greater than the given one.
  169. */
  170. *pos = lbound;
  171. return ITEM_NOT_FOUND;
  172. }
  173. /* Minimal possible key. It is never in the tree. */
  174. const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} };
  175. /* Maximal possible key. It is never in the tree. */
  176. static const struct reiserfs_key MAX_KEY = {
  177. cpu_to_le32(0xffffffff),
  178. cpu_to_le32(0xffffffff),
  179. {{cpu_to_le32(0xffffffff),
  180. cpu_to_le32(0xffffffff)},}
  181. };
  182. /*
  183. * Get delimiting key of the buffer by looking for it in the buffers in the
  184. * path, starting from the bottom of the path, and going upwards. We must
  185. * check the path's validity at each step. If the key is not in the path,
  186. * there is no delimiting key in the tree (buffer is first or last buffer
  187. * in tree), and in this case we return a special key, either MIN_KEY or
  188. * MAX_KEY.
  189. */
  190. static inline const struct reiserfs_key *get_lkey(const struct treepath *chk_path,
  191. const struct super_block *sb)
  192. {
  193. int position, path_offset = chk_path->path_length;
  194. struct buffer_head *parent;
  195. RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET,
  196. "PAP-5010: invalid offset in the path");
  197. /* While not higher in path than first element. */
  198. while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
  199. RFALSE(!buffer_uptodate
  200. (PATH_OFFSET_PBUFFER(chk_path, path_offset)),
  201. "PAP-5020: parent is not uptodate");
  202. /* Parent at the path is not in the tree now. */
  203. if (!B_IS_IN_TREE
  204. (parent =
  205. PATH_OFFSET_PBUFFER(chk_path, path_offset)))
  206. return &MAX_KEY;
  207. /* Check whether position in the parent is correct. */
  208. if ((position =
  209. PATH_OFFSET_POSITION(chk_path,
  210. path_offset)) >
  211. B_NR_ITEMS(parent))
  212. return &MAX_KEY;
  213. /* Check whether parent at the path really points to the child. */
  214. if (B_N_CHILD_NUM(parent, position) !=
  215. PATH_OFFSET_PBUFFER(chk_path,
  216. path_offset + 1)->b_blocknr)
  217. return &MAX_KEY;
  218. /*
  219. * Return delimiting key if position in the parent
  220. * is not equal to zero.
  221. */
  222. if (position)
  223. return internal_key(parent, position - 1);
  224. }
  225. /* Return MIN_KEY if we are in the root of the buffer tree. */
  226. if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)->
  227. b_blocknr == SB_ROOT_BLOCK(sb))
  228. return &MIN_KEY;
  229. return &MAX_KEY;
  230. }
  231. /* Get delimiting key of the buffer at the path and its right neighbor. */
  232. inline const struct reiserfs_key *get_rkey(const struct treepath *chk_path,
  233. const struct super_block *sb)
  234. {
  235. int position, path_offset = chk_path->path_length;
  236. struct buffer_head *parent;
  237. RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET,
  238. "PAP-5030: invalid offset in the path");
  239. while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
  240. RFALSE(!buffer_uptodate
  241. (PATH_OFFSET_PBUFFER(chk_path, path_offset)),
  242. "PAP-5040: parent is not uptodate");
  243. /* Parent at the path is not in the tree now. */
  244. if (!B_IS_IN_TREE
  245. (parent =
  246. PATH_OFFSET_PBUFFER(chk_path, path_offset)))
  247. return &MIN_KEY;
  248. /* Check whether position in the parent is correct. */
  249. if ((position =
  250. PATH_OFFSET_POSITION(chk_path,
  251. path_offset)) >
  252. B_NR_ITEMS(parent))
  253. return &MIN_KEY;
  254. /*
  255. * Check whether parent at the path really points
  256. * to the child.
  257. */
  258. if (B_N_CHILD_NUM(parent, position) !=
  259. PATH_OFFSET_PBUFFER(chk_path,
  260. path_offset + 1)->b_blocknr)
  261. return &MIN_KEY;
  262. /*
  263. * Return delimiting key if position in the parent
  264. * is not the last one.
  265. */
  266. if (position != B_NR_ITEMS(parent))
  267. return internal_key(parent, position);
  268. }
  269. /* Return MAX_KEY if we are in the root of the buffer tree. */
  270. if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)->
  271. b_blocknr == SB_ROOT_BLOCK(sb))
  272. return &MAX_KEY;
  273. return &MIN_KEY;
  274. }
  275. /*
  276. * Check whether a key is contained in the tree rooted from a buffer at a path.
  277. * This works by looking at the left and right delimiting keys for the buffer
  278. * in the last path_element in the path. These delimiting keys are stored
  279. * at least one level above that buffer in the tree. If the buffer is the
  280. * first or last node in the tree order then one of the delimiting keys may
  281. * be absent, and in this case get_lkey and get_rkey return a special key
  282. * which is MIN_KEY or MAX_KEY.
  283. */
  284. static inline int key_in_buffer(
  285. /* Path which should be checked. */
  286. struct treepath *chk_path,
  287. /* Key which should be checked. */
  288. const struct cpu_key *key,
  289. struct super_block *sb
  290. )
  291. {
  292. RFALSE(!key || chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET
  293. || chk_path->path_length > MAX_HEIGHT,
  294. "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)",
  295. key, chk_path->path_length);
  296. RFALSE(!PATH_PLAST_BUFFER(chk_path)->b_bdev,
  297. "PAP-5060: device must not be NODEV");
  298. if (comp_keys(get_lkey(chk_path, sb), key) == 1)
  299. /* left delimiting key is bigger, that the key we look for */
  300. return 0;
  301. /* if ( comp_keys(key, get_rkey(chk_path, sb)) != -1 ) */
  302. if (comp_keys(get_rkey(chk_path, sb), key) != 1)
  303. /* key must be less than right delimitiing key */
  304. return 0;
  305. return 1;
  306. }
  307. int reiserfs_check_path(struct treepath *p)
  308. {
  309. RFALSE(p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET,
  310. "path not properly relsed");
  311. return 0;
  312. }
  313. /*
  314. * Drop the reference to each buffer in a path and restore
  315. * dirty bits clean when preparing the buffer for the log.
  316. * This version should only be called from fix_nodes()
  317. */
  318. void pathrelse_and_restore(struct super_block *sb,
  319. struct treepath *search_path)
  320. {
  321. int path_offset = search_path->path_length;
  322. RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
  323. "clm-4000: invalid path offset");
  324. while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) {
  325. struct buffer_head *bh;
  326. bh = PATH_OFFSET_PBUFFER(search_path, path_offset--);
  327. reiserfs_restore_prepared_buffer(sb, bh);
  328. brelse(bh);
  329. }
  330. search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
  331. }
  332. /* Drop the reference to each buffer in a path */
  333. void pathrelse(struct treepath *search_path)
  334. {
  335. int path_offset = search_path->path_length;
  336. RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
  337. "PAP-5090: invalid path offset");
  338. while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET)
  339. brelse(PATH_OFFSET_PBUFFER(search_path, path_offset--));
  340. search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
  341. }
  342. static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
  343. {
  344. struct block_head *blkh;
  345. struct item_head *ih;
  346. int used_space;
  347. int prev_location;
  348. int i;
  349. int nr;
  350. blkh = (struct block_head *)buf;
  351. if (blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) {
  352. reiserfs_warning(NULL, "reiserfs-5080",
  353. "this should be caught earlier");
  354. return 0;
  355. }
  356. nr = blkh_nr_item(blkh);
  357. if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) {
  358. /* item number is too big or too small */
  359. reiserfs_warning(NULL, "reiserfs-5081",
  360. "nr_item seems wrong: %z", bh);
  361. return 0;
  362. }
  363. ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1;
  364. used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih));
  365. /* free space does not match to calculated amount of use space */
  366. if (used_space != blocksize - blkh_free_space(blkh)) {
  367. reiserfs_warning(NULL, "reiserfs-5082",
  368. "free space seems wrong: %z", bh);
  369. return 0;
  370. }
  371. /*
  372. * FIXME: it is_leaf will hit performance too much - we may have
  373. * return 1 here
  374. */
  375. /* check tables of item heads */
  376. ih = (struct item_head *)(buf + BLKH_SIZE);
  377. prev_location = blocksize;
  378. for (i = 0; i < nr; i++, ih++) {
  379. if (le_ih_k_type(ih) == TYPE_ANY) {
  380. reiserfs_warning(NULL, "reiserfs-5083",
  381. "wrong item type for item %h",
  382. ih);
  383. return 0;
  384. }
  385. if (ih_location(ih) >= blocksize
  386. || ih_location(ih) < IH_SIZE * nr) {
  387. reiserfs_warning(NULL, "reiserfs-5084",
  388. "item location seems wrong: %h",
  389. ih);
  390. return 0;
  391. }
  392. if (ih_item_len(ih) < 1
  393. || ih_item_len(ih) > MAX_ITEM_LEN(blocksize)) {
  394. reiserfs_warning(NULL, "reiserfs-5085",
  395. "item length seems wrong: %h",
  396. ih);
  397. return 0;
  398. }
  399. if (prev_location - ih_location(ih) != ih_item_len(ih)) {
  400. reiserfs_warning(NULL, "reiserfs-5086",
  401. "item location seems wrong "
  402. "(second one): %h", ih);
  403. return 0;
  404. }
  405. prev_location = ih_location(ih);
  406. }
  407. /* one may imagine many more checks */
  408. return 1;
  409. }
  410. /* returns 1 if buf looks like an internal node, 0 otherwise */
  411. static int is_internal(char *buf, int blocksize, struct buffer_head *bh)
  412. {
  413. struct block_head *blkh;
  414. int nr;
  415. int used_space;
  416. blkh = (struct block_head *)buf;
  417. nr = blkh_level(blkh);
  418. if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) {
  419. /* this level is not possible for internal nodes */
  420. reiserfs_warning(NULL, "reiserfs-5087",
  421. "this should be caught earlier");
  422. return 0;
  423. }
  424. nr = blkh_nr_item(blkh);
  425. /* for internal which is not root we might check min number of keys */
  426. if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) {
  427. reiserfs_warning(NULL, "reiserfs-5088",
  428. "number of key seems wrong: %z", bh);
  429. return 0;
  430. }
  431. used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1);
  432. if (used_space != blocksize - blkh_free_space(blkh)) {
  433. reiserfs_warning(NULL, "reiserfs-5089",
  434. "free space seems wrong: %z", bh);
  435. return 0;
  436. }
  437. /* one may imagine many more checks */
  438. return 1;
  439. }
  440. /*
  441. * make sure that bh contains formatted node of reiserfs tree of
  442. * 'level'-th level
  443. */
  444. static int is_tree_node(struct buffer_head *bh, int level)
  445. {
  446. if (B_LEVEL(bh) != level) {
  447. reiserfs_warning(NULL, "reiserfs-5090", "node level %d does "
  448. "not match to the expected one %d",
  449. B_LEVEL(bh), level);
  450. return 0;
  451. }
  452. if (level == DISK_LEAF_NODE_LEVEL)
  453. return is_leaf(bh->b_data, bh->b_size, bh);
  454. return is_internal(bh->b_data, bh->b_size, bh);
  455. }
  456. #define SEARCH_BY_KEY_READA 16
  457. /*
  458. * The function is NOT SCHEDULE-SAFE!
  459. * It might unlock the write lock if we needed to wait for a block
  460. * to be read. Note that in this case it won't recover the lock to avoid
  461. * high contention resulting from too much lock requests, especially
  462. * the caller (search_by_key) will perform other schedule-unsafe
  463. * operations just after calling this function.
  464. *
  465. * @return depth of lock to be restored after read completes
  466. */
  467. static int search_by_key_reada(struct super_block *s,
  468. struct buffer_head **bh,
  469. b_blocknr_t *b, int num)
  470. {
  471. int i, j;
  472. int depth = -1;
  473. for (i = 0; i < num; i++) {
  474. bh[i] = sb_getblk(s, b[i]);
  475. }
  476. /*
  477. * We are going to read some blocks on which we
  478. * have a reference. It's safe, though we might be
  479. * reading blocks concurrently changed if we release
  480. * the lock. But it's still fine because we check later
  481. * if the tree changed
  482. */
  483. for (j = 0; j < i; j++) {
  484. /*
  485. * note, this needs attention if we are getting rid of the BKL
  486. * you have to make sure the prepared bit isn't set on this
  487. * buffer
  488. */
  489. if (!buffer_uptodate(bh[j])) {
  490. if (depth == -1)
  491. depth = reiserfs_write_unlock_nested(s);
  492. ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, bh + j);
  493. }
  494. brelse(bh[j]);
  495. }
  496. return depth;
  497. }
  498. /*
  499. * This function fills up the path from the root to the leaf as it
  500. * descends the tree looking for the key. It uses reiserfs_bread to
  501. * try to find buffers in the cache given their block number. If it
  502. * does not find them in the cache it reads them from disk. For each
  503. * node search_by_key finds using reiserfs_bread it then uses
  504. * bin_search to look through that node. bin_search will find the
  505. * position of the block_number of the next node if it is looking
  506. * through an internal node. If it is looking through a leaf node
  507. * bin_search will find the position of the item which has key either
  508. * equal to given key, or which is the maximal key less than the given
  509. * key. search_by_key returns a path that must be checked for the
  510. * correctness of the top of the path but need not be checked for the
  511. * correctness of the bottom of the path
  512. */
  513. /*
  514. * search_by_key - search for key (and item) in stree
  515. * @sb: superblock
  516. * @key: pointer to key to search for
  517. * @search_path: Allocated and initialized struct treepath; Returned filled
  518. * on success.
  519. * @stop_level: How far down the tree to search, Use DISK_LEAF_NODE_LEVEL to
  520. * stop at leaf level.
  521. *
  522. * The function is NOT SCHEDULE-SAFE!
  523. */
  524. int search_by_key(struct super_block *sb, const struct cpu_key *key,
  525. struct treepath *search_path, int stop_level)
  526. {
  527. b_blocknr_t block_number;
  528. int expected_level;
  529. struct buffer_head *bh;
  530. struct path_element *last_element;
  531. int node_level, retval;
  532. int right_neighbor_of_leaf_node;
  533. int fs_gen;
  534. struct buffer_head *reada_bh[SEARCH_BY_KEY_READA];
  535. b_blocknr_t reada_blocks[SEARCH_BY_KEY_READA];
  536. int reada_count = 0;
  537. #ifdef CONFIG_REISERFS_CHECK
  538. int repeat_counter = 0;
  539. #endif
  540. PROC_INFO_INC(sb, search_by_key);
  541. /*
  542. * As we add each node to a path we increase its count. This means
  543. * that we must be careful to release all nodes in a path before we
  544. * either discard the path struct or re-use the path struct, as we
  545. * do here.
  546. */
  547. pathrelse(search_path);
  548. right_neighbor_of_leaf_node = 0;
  549. /*
  550. * With each iteration of this loop we search through the items in the
  551. * current node, and calculate the next current node(next path element)
  552. * for the next iteration of this loop..
  553. */
  554. block_number = SB_ROOT_BLOCK(sb);
  555. expected_level = -1;
  556. while (1) {
  557. #ifdef CONFIG_REISERFS_CHECK
  558. if (!(++repeat_counter % 50000))
  559. reiserfs_warning(sb, "PAP-5100",
  560. "%s: there were %d iterations of "
  561. "while loop looking for key %K",
  562. current->comm, repeat_counter,
  563. key);
  564. #endif
  565. /* prep path to have another element added to it. */
  566. last_element =
  567. PATH_OFFSET_PELEMENT(search_path,
  568. ++search_path->path_length);
  569. fs_gen = get_generation(sb);
  570. /*
  571. * Read the next tree node, and set the last element
  572. * in the path to have a pointer to it.
  573. */
  574. if ((bh = last_element->pe_buffer =
  575. sb_getblk(sb, block_number))) {
  576. /*
  577. * We'll need to drop the lock if we encounter any
  578. * buffers that need to be read. If all of them are
  579. * already up to date, we don't need to drop the lock.
  580. */
  581. int depth = -1;
  582. if (!buffer_uptodate(bh) && reada_count > 1)
  583. depth = search_by_key_reada(sb, reada_bh,
  584. reada_blocks, reada_count);
  585. if (!buffer_uptodate(bh) && depth == -1)
  586. depth = reiserfs_write_unlock_nested(sb);
  587. ll_rw_block(REQ_OP_READ, 0, 1, &bh);
  588. wait_on_buffer(bh);
  589. if (depth != -1)
  590. reiserfs_write_lock_nested(sb, depth);
  591. if (!buffer_uptodate(bh))
  592. goto io_error;
  593. } else {
  594. io_error:
  595. search_path->path_length--;
  596. pathrelse(search_path);
  597. return IO_ERROR;
  598. }
  599. reada_count = 0;
  600. if (expected_level == -1)
  601. expected_level = SB_TREE_HEIGHT(sb);
  602. expected_level--;
  603. /*
  604. * It is possible that schedule occurred. We must check
  605. * whether the key to search is still in the tree rooted
  606. * from the current buffer. If not then repeat search
  607. * from the root.
  608. */
  609. if (fs_changed(fs_gen, sb) &&
  610. (!B_IS_IN_TREE(bh) ||
  611. B_LEVEL(bh) != expected_level ||
  612. !key_in_buffer(search_path, key, sb))) {
  613. PROC_INFO_INC(sb, search_by_key_fs_changed);
  614. PROC_INFO_INC(sb, search_by_key_restarted);
  615. PROC_INFO_INC(sb,
  616. sbk_restarted[expected_level - 1]);
  617. pathrelse(search_path);
  618. /*
  619. * Get the root block number so that we can
  620. * repeat the search starting from the root.
  621. */
  622. block_number = SB_ROOT_BLOCK(sb);
  623. expected_level = -1;
  624. right_neighbor_of_leaf_node = 0;
  625. /* repeat search from the root */
  626. continue;
  627. }
  628. /*
  629. * only check that the key is in the buffer if key is not
  630. * equal to the MAX_KEY. Latter case is only possible in
  631. * "finish_unfinished()" processing during mount.
  632. */
  633. RFALSE(comp_keys(&MAX_KEY, key) &&
  634. !key_in_buffer(search_path, key, sb),
  635. "PAP-5130: key is not in the buffer");
  636. #ifdef CONFIG_REISERFS_CHECK
  637. if (REISERFS_SB(sb)->cur_tb) {
  638. print_cur_tb("5140");
  639. reiserfs_panic(sb, "PAP-5140",
  640. "schedule occurred in do_balance!");
  641. }
  642. #endif
  643. /*
  644. * make sure, that the node contents look like a node of
  645. * certain level
  646. */
  647. if (!is_tree_node(bh, expected_level)) {
  648. reiserfs_error(sb, "vs-5150",
  649. "invalid format found in block %ld. "
  650. "Fsck?", bh->b_blocknr);
  651. pathrelse(search_path);
  652. return IO_ERROR;
  653. }
  654. /* ok, we have acquired next formatted node in the tree */
  655. node_level = B_LEVEL(bh);
  656. PROC_INFO_BH_STAT(sb, bh, node_level - 1);
  657. RFALSE(node_level < stop_level,
  658. "vs-5152: tree level (%d) is less than stop level (%d)",
  659. node_level, stop_level);
  660. retval = bin_search(key, item_head(bh, 0),
  661. B_NR_ITEMS(bh),
  662. (node_level ==
  663. DISK_LEAF_NODE_LEVEL) ? IH_SIZE :
  664. KEY_SIZE,
  665. &last_element->pe_position);
  666. if (node_level == stop_level) {
  667. return retval;
  668. }
  669. /* we are not in the stop level */
  670. /*
  671. * item has been found, so we choose the pointer which
  672. * is to the right of the found one
  673. */
  674. if (retval == ITEM_FOUND)
  675. last_element->pe_position++;
  676. /*
  677. * if item was not found we choose the position which is to
  678. * the left of the found item. This requires no code,
  679. * bin_search did it already.
  680. */
  681. /*
  682. * So we have chosen a position in the current node which is
  683. * an internal node. Now we calculate child block number by
  684. * position in the node.
  685. */
  686. block_number =
  687. B_N_CHILD_NUM(bh, last_element->pe_position);
  688. /*
  689. * if we are going to read leaf nodes, try for read
  690. * ahead as well
  691. */
  692. if ((search_path->reada & PATH_READA) &&
  693. node_level == DISK_LEAF_NODE_LEVEL + 1) {
  694. int pos = last_element->pe_position;
  695. int limit = B_NR_ITEMS(bh);
  696. struct reiserfs_key *le_key;
  697. if (search_path->reada & PATH_READA_BACK)
  698. limit = 0;
  699. while (reada_count < SEARCH_BY_KEY_READA) {
  700. if (pos == limit)
  701. break;
  702. reada_blocks[reada_count++] =
  703. B_N_CHILD_NUM(bh, pos);
  704. if (search_path->reada & PATH_READA_BACK)
  705. pos--;
  706. else
  707. pos++;
  708. /*
  709. * check to make sure we're in the same object
  710. */
  711. le_key = internal_key(bh, pos);
  712. if (le32_to_cpu(le_key->k_objectid) !=
  713. key->on_disk_key.k_objectid) {
  714. break;
  715. }
  716. }
  717. }
  718. }
  719. }
  720. /*
  721. * Form the path to an item and position in this item which contains
  722. * file byte defined by key. If there is no such item
  723. * corresponding to the key, we point the path to the item with
  724. * maximal key less than key, and *pos_in_item is set to one
  725. * past the last entry/byte in the item. If searching for entry in a
  726. * directory item, and it is not found, *pos_in_item is set to one
  727. * entry more than the entry with maximal key which is less than the
  728. * sought key.
  729. *
  730. * Note that if there is no entry in this same node which is one more,
  731. * then we point to an imaginary entry. for direct items, the
  732. * position is in units of bytes, for indirect items the position is
  733. * in units of blocknr entries, for directory items the position is in
  734. * units of directory entries.
  735. */
  736. /* The function is NOT SCHEDULE-SAFE! */
  737. int search_for_position_by_key(struct super_block *sb,
  738. /* Key to search (cpu variable) */
  739. const struct cpu_key *p_cpu_key,
  740. /* Filled up by this function. */
  741. struct treepath *search_path)
  742. {
  743. struct item_head *p_le_ih; /* pointer to on-disk structure */
  744. int blk_size;
  745. loff_t item_offset, offset;
  746. struct reiserfs_dir_entry de;
  747. int retval;
  748. /* If searching for directory entry. */
  749. if (is_direntry_cpu_key(p_cpu_key))
  750. return search_by_entry_key(sb, p_cpu_key, search_path,
  751. &de);
  752. /* If not searching for directory entry. */
  753. /* If item is found. */
  754. retval = search_item(sb, p_cpu_key, search_path);
  755. if (retval == IO_ERROR)
  756. return retval;
  757. if (retval == ITEM_FOUND) {
  758. RFALSE(!ih_item_len
  759. (item_head
  760. (PATH_PLAST_BUFFER(search_path),
  761. PATH_LAST_POSITION(search_path))),
  762. "PAP-5165: item length equals zero");
  763. pos_in_item(search_path) = 0;
  764. return POSITION_FOUND;
  765. }
  766. RFALSE(!PATH_LAST_POSITION(search_path),
  767. "PAP-5170: position equals zero");
  768. /* Item is not found. Set path to the previous item. */
  769. p_le_ih =
  770. item_head(PATH_PLAST_BUFFER(search_path),
  771. --PATH_LAST_POSITION(search_path));
  772. blk_size = sb->s_blocksize;
  773. if (comp_short_keys(&p_le_ih->ih_key, p_cpu_key))
  774. return FILE_NOT_FOUND;
  775. /* FIXME: quite ugly this far */
  776. item_offset = le_ih_k_offset(p_le_ih);
  777. offset = cpu_key_k_offset(p_cpu_key);
  778. /* Needed byte is contained in the item pointed to by the path. */
  779. if (item_offset <= offset &&
  780. item_offset + op_bytes_number(p_le_ih, blk_size) > offset) {
  781. pos_in_item(search_path) = offset - item_offset;
  782. if (is_indirect_le_ih(p_le_ih)) {
  783. pos_in_item(search_path) /= blk_size;
  784. }
  785. return POSITION_FOUND;
  786. }
  787. /*
  788. * Needed byte is not contained in the item pointed to by the
  789. * path. Set pos_in_item out of the item.
  790. */
  791. if (is_indirect_le_ih(p_le_ih))
  792. pos_in_item(search_path) =
  793. ih_item_len(p_le_ih) / UNFM_P_SIZE;
  794. else
  795. pos_in_item(search_path) = ih_item_len(p_le_ih);
  796. return POSITION_NOT_FOUND;
  797. }
  798. /* Compare given item and item pointed to by the path. */
  799. int comp_items(const struct item_head *stored_ih, const struct treepath *path)
  800. {
  801. struct buffer_head *bh = PATH_PLAST_BUFFER(path);
  802. struct item_head *ih;
  803. /* Last buffer at the path is not in the tree. */
  804. if (!B_IS_IN_TREE(bh))
  805. return 1;
  806. /* Last path position is invalid. */
  807. if (PATH_LAST_POSITION(path) >= B_NR_ITEMS(bh))
  808. return 1;
  809. /* we need only to know, whether it is the same item */
  810. ih = tp_item_head(path);
  811. return memcmp(stored_ih, ih, IH_SIZE);
  812. }
  813. /* unformatted nodes are not logged anymore, ever. This is safe now */
  814. #define held_by_others(bh) (atomic_read(&(bh)->b_count) > 1)
  815. /* block can not be forgotten as it is in I/O or held by someone */
  816. #define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh)))
  817. /* prepare for delete or cut of direct item */
  818. static inline int prepare_for_direct_item(struct treepath *path,
  819. struct item_head *le_ih,
  820. struct inode *inode,
  821. loff_t new_file_length, int *cut_size)
  822. {
  823. loff_t round_len;
  824. if (new_file_length == max_reiserfs_offset(inode)) {
  825. /* item has to be deleted */
  826. *cut_size = -(IH_SIZE + ih_item_len(le_ih));
  827. return M_DELETE;
  828. }
  829. /* new file gets truncated */
  830. if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) {
  831. round_len = ROUND_UP(new_file_length);
  832. /* this was new_file_length < le_ih ... */
  833. if (round_len < le_ih_k_offset(le_ih)) {
  834. *cut_size = -(IH_SIZE + ih_item_len(le_ih));
  835. return M_DELETE; /* Delete this item. */
  836. }
  837. /* Calculate first position and size for cutting from item. */
  838. pos_in_item(path) = round_len - (le_ih_k_offset(le_ih) - 1);
  839. *cut_size = -(ih_item_len(le_ih) - pos_in_item(path));
  840. return M_CUT; /* Cut from this item. */
  841. }
  842. /* old file: items may have any length */
  843. if (new_file_length < le_ih_k_offset(le_ih)) {
  844. *cut_size = -(IH_SIZE + ih_item_len(le_ih));
  845. return M_DELETE; /* Delete this item. */
  846. }
  847. /* Calculate first position and size for cutting from item. */
  848. *cut_size = -(ih_item_len(le_ih) -
  849. (pos_in_item(path) =
  850. new_file_length + 1 - le_ih_k_offset(le_ih)));
  851. return M_CUT; /* Cut from this item. */
  852. }
  853. static inline int prepare_for_direntry_item(struct treepath *path,
  854. struct item_head *le_ih,
  855. struct inode *inode,
  856. loff_t new_file_length,
  857. int *cut_size)
  858. {
  859. if (le_ih_k_offset(le_ih) == DOT_OFFSET &&
  860. new_file_length == max_reiserfs_offset(inode)) {
  861. RFALSE(ih_entry_count(le_ih) != 2,
  862. "PAP-5220: incorrect empty directory item (%h)", le_ih);
  863. *cut_size = -(IH_SIZE + ih_item_len(le_ih));
  864. /* Delete the directory item containing "." and ".." entry. */
  865. return M_DELETE;
  866. }
  867. if (ih_entry_count(le_ih) == 1) {
  868. /*
  869. * Delete the directory item such as there is one record only
  870. * in this item
  871. */
  872. *cut_size = -(IH_SIZE + ih_item_len(le_ih));
  873. return M_DELETE;
  874. }
  875. /* Cut one record from the directory item. */
  876. *cut_size =
  877. -(DEH_SIZE +
  878. entry_length(get_last_bh(path), le_ih, pos_in_item(path)));
  879. return M_CUT;
  880. }
  881. #define JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD (2 * JOURNAL_PER_BALANCE_CNT + 1)
  882. /*
  883. * If the path points to a directory or direct item, calculate mode
  884. * and the size cut, for balance.
  885. * If the path points to an indirect item, remove some number of its
  886. * unformatted nodes.
  887. * In case of file truncate calculate whether this item must be
  888. * deleted/truncated or last unformatted node of this item will be
  889. * converted to a direct item.
  890. * This function returns a determination of what balance mode the
  891. * calling function should employ.
  892. */
  893. static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th,
  894. struct inode *inode,
  895. struct treepath *path,
  896. const struct cpu_key *item_key,
  897. /*
  898. * Number of unformatted nodes
  899. * which were removed from end
  900. * of the file.
  901. */
  902. int *removed,
  903. int *cut_size,
  904. /* MAX_KEY_OFFSET in case of delete. */
  905. unsigned long long new_file_length
  906. )
  907. {
  908. struct super_block *sb = inode->i_sb;
  909. struct item_head *p_le_ih = tp_item_head(path);
  910. struct buffer_head *bh = PATH_PLAST_BUFFER(path);
  911. BUG_ON(!th->t_trans_id);
  912. /* Stat_data item. */
  913. if (is_statdata_le_ih(p_le_ih)) {
  914. RFALSE(new_file_length != max_reiserfs_offset(inode),
  915. "PAP-5210: mode must be M_DELETE");
  916. *cut_size = -(IH_SIZE + ih_item_len(p_le_ih));
  917. return M_DELETE;
  918. }
  919. /* Directory item. */
  920. if (is_direntry_le_ih(p_le_ih))
  921. return prepare_for_direntry_item(path, p_le_ih, inode,
  922. new_file_length,
  923. cut_size);
  924. /* Direct item. */
  925. if (is_direct_le_ih(p_le_ih))
  926. return prepare_for_direct_item(path, p_le_ih, inode,
  927. new_file_length, cut_size);
  928. /* Case of an indirect item. */
  929. {
  930. int blk_size = sb->s_blocksize;
  931. struct item_head s_ih;
  932. int need_re_search;
  933. int delete = 0;
  934. int result = M_CUT;
  935. int pos = 0;
  936. if ( new_file_length == max_reiserfs_offset (inode) ) {
  937. /*
  938. * prepare_for_delete_or_cut() is called by
  939. * reiserfs_delete_item()
  940. */
  941. new_file_length = 0;
  942. delete = 1;
  943. }
  944. do {
  945. need_re_search = 0;
  946. *cut_size = 0;
  947. bh = PATH_PLAST_BUFFER(path);
  948. copy_item_head(&s_ih, tp_item_head(path));
  949. pos = I_UNFM_NUM(&s_ih);
  950. while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > new_file_length) {
  951. __le32 *unfm;
  952. __u32 block;
  953. /*
  954. * Each unformatted block deletion may involve
  955. * one additional bitmap block into the transaction,
  956. * thereby the initial journal space reservation
  957. * might not be enough.
  958. */
  959. if (!delete && (*cut_size) != 0 &&
  960. reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD)
  961. break;
  962. unfm = (__le32 *)ih_item_body(bh, &s_ih) + pos - 1;
  963. block = get_block_num(unfm, 0);
  964. if (block != 0) {
  965. reiserfs_prepare_for_journal(sb, bh, 1);
  966. put_block_num(unfm, 0, 0);
  967. journal_mark_dirty(th, bh);
  968. reiserfs_free_block(th, inode, block, 1);
  969. }
  970. reiserfs_cond_resched(sb);
  971. if (item_moved (&s_ih, path)) {
  972. need_re_search = 1;
  973. break;
  974. }
  975. pos --;
  976. (*removed)++;
  977. (*cut_size) -= UNFM_P_SIZE;
  978. if (pos == 0) {
  979. (*cut_size) -= IH_SIZE;
  980. result = M_DELETE;
  981. break;
  982. }
  983. }
  984. /*
  985. * a trick. If the buffer has been logged, this will
  986. * do nothing. If we've broken the loop without logging
  987. * it, it will restore the buffer
  988. */
  989. reiserfs_restore_prepared_buffer(sb, bh);
  990. } while (need_re_search &&
  991. search_for_position_by_key(sb, item_key, path) == POSITION_FOUND);
  992. pos_in_item(path) = pos * UNFM_P_SIZE;
  993. if (*cut_size == 0) {
  994. /*
  995. * Nothing was cut. maybe convert last unformatted node to the
  996. * direct item?
  997. */
  998. result = M_CONVERT;
  999. }
  1000. return result;
  1001. }
  1002. }
  1003. /* Calculate number of bytes which will be deleted or cut during balance */
  1004. static int calc_deleted_bytes_number(struct tree_balance *tb, char mode)
  1005. {
  1006. int del_size;
  1007. struct item_head *p_le_ih = tp_item_head(tb->tb_path);
  1008. if (is_statdata_le_ih(p_le_ih))
  1009. return 0;
  1010. del_size =
  1011. (mode ==
  1012. M_DELETE) ? ih_item_len(p_le_ih) : -tb->insert_size[0];
  1013. if (is_direntry_le_ih(p_le_ih)) {
  1014. /*
  1015. * return EMPTY_DIR_SIZE; We delete emty directories only.
  1016. * we can't use EMPTY_DIR_SIZE, as old format dirs have a
  1017. * different empty size. ick. FIXME, is this right?
  1018. */
  1019. return del_size;
  1020. }
  1021. if (is_indirect_le_ih(p_le_ih))
  1022. del_size = (del_size / UNFM_P_SIZE) *
  1023. (PATH_PLAST_BUFFER(tb->tb_path)->b_size);
  1024. return del_size;
  1025. }
  1026. static void init_tb_struct(struct reiserfs_transaction_handle *th,
  1027. struct tree_balance *tb,
  1028. struct super_block *sb,
  1029. struct treepath *path, int size)
  1030. {
  1031. BUG_ON(!th->t_trans_id);
  1032. memset(tb, '\0', sizeof(struct tree_balance));
  1033. tb->transaction_handle = th;
  1034. tb->tb_sb = sb;
  1035. tb->tb_path = path;
  1036. PATH_OFFSET_PBUFFER(path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL;
  1037. PATH_OFFSET_POSITION(path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0;
  1038. tb->insert_size[0] = size;
  1039. }
  1040. void padd_item(char *item, int total_length, int length)
  1041. {
  1042. int i;
  1043. for (i = total_length; i > length;)
  1044. item[--i] = 0;
  1045. }
  1046. #ifdef REISERQUOTA_DEBUG
  1047. char key2type(struct reiserfs_key *ih)
  1048. {
  1049. if (is_direntry_le_key(2, ih))
  1050. return 'd';
  1051. if (is_direct_le_key(2, ih))
  1052. return 'D';
  1053. if (is_indirect_le_key(2, ih))
  1054. return 'i';
  1055. if (is_statdata_le_key(2, ih))
  1056. return 's';
  1057. return 'u';
  1058. }
  1059. char head2type(struct item_head *ih)
  1060. {
  1061. if (is_direntry_le_ih(ih))
  1062. return 'd';
  1063. if (is_direct_le_ih(ih))
  1064. return 'D';
  1065. if (is_indirect_le_ih(ih))
  1066. return 'i';
  1067. if (is_statdata_le_ih(ih))
  1068. return 's';
  1069. return 'u';
  1070. }
  1071. #endif
  1072. /*
  1073. * Delete object item.
  1074. * th - active transaction handle
  1075. * path - path to the deleted item
  1076. * item_key - key to search for the deleted item
  1077. * indode - used for updating i_blocks and quotas
  1078. * un_bh - NULL or unformatted node pointer
  1079. */
  1080. int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
  1081. struct treepath *path, const struct cpu_key *item_key,
  1082. struct inode *inode, struct buffer_head *un_bh)
  1083. {
  1084. struct super_block *sb = inode->i_sb;
  1085. struct tree_balance s_del_balance;
  1086. struct item_head s_ih;
  1087. struct item_head *q_ih;
  1088. int quota_cut_bytes;
  1089. int ret_value, del_size, removed;
  1090. int depth;
  1091. #ifdef CONFIG_REISERFS_CHECK
  1092. char mode;
  1093. int iter = 0;
  1094. #endif
  1095. BUG_ON(!th->t_trans_id);
  1096. init_tb_struct(th, &s_del_balance, sb, path,
  1097. 0 /*size is unknown */ );
  1098. while (1) {
  1099. removed = 0;
  1100. #ifdef CONFIG_REISERFS_CHECK
  1101. iter++;
  1102. mode =
  1103. #endif
  1104. prepare_for_delete_or_cut(th, inode, path,
  1105. item_key, &removed,
  1106. &del_size,
  1107. max_reiserfs_offset(inode));
  1108. RFALSE(mode != M_DELETE, "PAP-5320: mode must be M_DELETE");
  1109. copy_item_head(&s_ih, tp_item_head(path));
  1110. s_del_balance.insert_size[0] = del_size;
  1111. ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL);
  1112. if (ret_value != REPEAT_SEARCH)
  1113. break;
  1114. PROC_INFO_INC(sb, delete_item_restarted);
  1115. /* file system changed, repeat search */
  1116. ret_value =
  1117. search_for_position_by_key(sb, item_key, path);
  1118. if (ret_value == IO_ERROR)
  1119. break;
  1120. if (ret_value == FILE_NOT_FOUND) {
  1121. reiserfs_warning(sb, "vs-5340",
  1122. "no items of the file %K found",
  1123. item_key);
  1124. break;
  1125. }
  1126. } /* while (1) */
  1127. if (ret_value != CARRY_ON) {
  1128. unfix_nodes(&s_del_balance);
  1129. return 0;
  1130. }
  1131. /* reiserfs_delete_item returns item length when success */
  1132. ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE);
  1133. q_ih = tp_item_head(path);
  1134. quota_cut_bytes = ih_item_len(q_ih);
  1135. /*
  1136. * hack so the quota code doesn't have to guess if the file has a
  1137. * tail. On tail insert, we allocate quota for 1 unformatted node.
  1138. * We test the offset because the tail might have been
  1139. * split into multiple items, and we only want to decrement for
  1140. * the unfm node once
  1141. */
  1142. if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(q_ih)) {
  1143. if ((le_ih_k_offset(q_ih) & (sb->s_blocksize - 1)) == 1) {
  1144. quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
  1145. } else {
  1146. quota_cut_bytes = 0;
  1147. }
  1148. }
  1149. if (un_bh) {
  1150. int off;
  1151. char *data;
  1152. /*
  1153. * We are in direct2indirect conversion, so move tail contents
  1154. * to the unformatted node
  1155. */
  1156. /*
  1157. * note, we do the copy before preparing the buffer because we
  1158. * don't care about the contents of the unformatted node yet.
  1159. * the only thing we really care about is the direct item's
  1160. * data is in the unformatted node.
  1161. *
  1162. * Otherwise, we would have to call
  1163. * reiserfs_prepare_for_journal on the unformatted node,
  1164. * which might schedule, meaning we'd have to loop all the
  1165. * way back up to the start of the while loop.
  1166. *
  1167. * The unformatted node must be dirtied later on. We can't be
  1168. * sure here if the entire tail has been deleted yet.
  1169. *
  1170. * un_bh is from the page cache (all unformatted nodes are
  1171. * from the page cache) and might be a highmem page. So, we
  1172. * can't use un_bh->b_data.
  1173. * -clm
  1174. */
  1175. data = kmap_atomic(un_bh->b_page);
  1176. off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_SIZE - 1));
  1177. memcpy(data + off,
  1178. ih_item_body(PATH_PLAST_BUFFER(path), &s_ih),
  1179. ret_value);
  1180. kunmap_atomic(data);
  1181. }
  1182. /* Perform balancing after all resources have been collected at once. */
  1183. do_balance(&s_del_balance, NULL, NULL, M_DELETE);
  1184. #ifdef REISERQUOTA_DEBUG
  1185. reiserfs_debug(sb, REISERFS_DEBUG_CODE,
  1186. "reiserquota delete_item(): freeing %u, id=%u type=%c",
  1187. quota_cut_bytes, inode->i_uid, head2type(&s_ih));
  1188. #endif
  1189. depth = reiserfs_write_unlock_nested(inode->i_sb);
  1190. dquot_free_space_nodirty(inode, quota_cut_bytes);
  1191. reiserfs_write_lock_nested(inode->i_sb, depth);
  1192. /* Return deleted body length */
  1193. return ret_value;
  1194. }
  1195. /*
  1196. * Summary Of Mechanisms For Handling Collisions Between Processes:
  1197. *
  1198. * deletion of the body of the object is performed by iput(), with the
  1199. * result that if multiple processes are operating on a file, the
  1200. * deletion of the body of the file is deferred until the last process
  1201. * that has an open inode performs its iput().
  1202. *
  1203. * writes and truncates are protected from collisions by use of
  1204. * semaphores.
  1205. *
  1206. * creates, linking, and mknod are protected from collisions with other
  1207. * processes by making the reiserfs_add_entry() the last step in the
  1208. * creation, and then rolling back all changes if there was a collision.
  1209. * - Hans
  1210. */
  1211. /* this deletes item which never gets split */
  1212. void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
  1213. struct inode *inode, struct reiserfs_key *key)
  1214. {
  1215. struct super_block *sb = th->t_super;
  1216. struct tree_balance tb;
  1217. INITIALIZE_PATH(path);
  1218. int item_len = 0;
  1219. int tb_init = 0;
  1220. struct cpu_key cpu_key;
  1221. int retval;
  1222. int quota_cut_bytes = 0;
  1223. BUG_ON(!th->t_trans_id);
  1224. le_key2cpu_key(&cpu_key, key);
  1225. while (1) {
  1226. retval = search_item(th->t_super, &cpu_key, &path);
  1227. if (retval == IO_ERROR) {
  1228. reiserfs_error(th->t_super, "vs-5350",
  1229. "i/o failure occurred trying "
  1230. "to delete %K", &cpu_key);
  1231. break;
  1232. }
  1233. if (retval != ITEM_FOUND) {
  1234. pathrelse(&path);
  1235. /*
  1236. * No need for a warning, if there is just no free
  1237. * space to insert '..' item into the
  1238. * newly-created subdir
  1239. */
  1240. if (!
  1241. ((unsigned long long)
  1242. GET_HASH_VALUE(le_key_k_offset
  1243. (le_key_version(key), key)) == 0
  1244. && (unsigned long long)
  1245. GET_GENERATION_NUMBER(le_key_k_offset
  1246. (le_key_version(key),
  1247. key)) == 1))
  1248. reiserfs_warning(th->t_super, "vs-5355",
  1249. "%k not found", key);
  1250. break;
  1251. }
  1252. if (!tb_init) {
  1253. tb_init = 1;
  1254. item_len = ih_item_len(tp_item_head(&path));
  1255. init_tb_struct(th, &tb, th->t_super, &path,
  1256. -(IH_SIZE + item_len));
  1257. }
  1258. quota_cut_bytes = ih_item_len(tp_item_head(&path));
  1259. retval = fix_nodes(M_DELETE, &tb, NULL, NULL);
  1260. if (retval == REPEAT_SEARCH) {
  1261. PROC_INFO_INC(th->t_super, delete_solid_item_restarted);
  1262. continue;
  1263. }
  1264. if (retval == CARRY_ON) {
  1265. do_balance(&tb, NULL, NULL, M_DELETE);
  1266. /*
  1267. * Should we count quota for item? (we don't
  1268. * count quotas for save-links)
  1269. */
  1270. if (inode) {
  1271. int depth;
  1272. #ifdef REISERQUOTA_DEBUG
  1273. reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
  1274. "reiserquota delete_solid_item(): freeing %u id=%u type=%c",
  1275. quota_cut_bytes, inode->i_uid,
  1276. key2type(key));
  1277. #endif
  1278. depth = reiserfs_write_unlock_nested(sb);
  1279. dquot_free_space_nodirty(inode,
  1280. quota_cut_bytes);
  1281. reiserfs_write_lock_nested(sb, depth);
  1282. }
  1283. break;
  1284. }
  1285. /* IO_ERROR, NO_DISK_SPACE, etc */
  1286. reiserfs_warning(th->t_super, "vs-5360",
  1287. "could not delete %K due to fix_nodes failure",
  1288. &cpu_key);
  1289. unfix_nodes(&tb);
  1290. break;
  1291. }
  1292. reiserfs_check_path(&path);
  1293. }
  1294. int reiserfs_delete_object(struct reiserfs_transaction_handle *th,
  1295. struct inode *inode)
  1296. {
  1297. int err;
  1298. inode->i_size = 0;
  1299. BUG_ON(!th->t_trans_id);
  1300. /* for directory this deletes item containing "." and ".." */
  1301. err =
  1302. reiserfs_do_truncate(th, inode, NULL, 0 /*no timestamp updates */ );
  1303. if (err)
  1304. return err;
  1305. #if defined( USE_INODE_GENERATION_COUNTER )
  1306. if (!old_format_only(th->t_super)) {
  1307. __le32 *inode_generation;
  1308. inode_generation =
  1309. &REISERFS_SB(th->t_super)->s_rs->s_inode_generation;
  1310. le32_add_cpu(inode_generation, 1);
  1311. }
  1312. /* USE_INODE_GENERATION_COUNTER */
  1313. #endif
  1314. reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
  1315. return err;
  1316. }
  1317. static void unmap_buffers(struct page *page, loff_t pos)
  1318. {
  1319. struct buffer_head *bh;
  1320. struct buffer_head *head;
  1321. struct buffer_head *next;
  1322. unsigned long tail_index;
  1323. unsigned long cur_index;
  1324. if (page) {
  1325. if (page_has_buffers(page)) {
  1326. tail_index = pos & (PAGE_SIZE - 1);
  1327. cur_index = 0;
  1328. head = page_buffers(page);
  1329. bh = head;
  1330. do {
  1331. next = bh->b_this_page;
  1332. /*
  1333. * we want to unmap the buffers that contain
  1334. * the tail, and all the buffers after it
  1335. * (since the tail must be at the end of the
  1336. * file). We don't want to unmap file data
  1337. * before the tail, since it might be dirty
  1338. * and waiting to reach disk
  1339. */
  1340. cur_index += bh->b_size;
  1341. if (cur_index > tail_index) {
  1342. reiserfs_unmap_buffer(bh);
  1343. }
  1344. bh = next;
  1345. } while (bh != head);
  1346. }
  1347. }
  1348. }
  1349. static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th,
  1350. struct inode *inode,
  1351. struct page *page,
  1352. struct treepath *path,
  1353. const struct cpu_key *item_key,
  1354. loff_t new_file_size, char *mode)
  1355. {
  1356. struct super_block *sb = inode->i_sb;
  1357. int block_size = sb->s_blocksize;
  1358. int cut_bytes;
  1359. BUG_ON(!th->t_trans_id);
  1360. BUG_ON(new_file_size != inode->i_size);
  1361. /*
  1362. * the page being sent in could be NULL if there was an i/o error
  1363. * reading in the last block. The user will hit problems trying to
  1364. * read the file, but for now we just skip the indirect2direct
  1365. */
  1366. if (atomic_read(&inode->i_count) > 1 ||
  1367. !tail_has_to_be_packed(inode) ||
  1368. !page || (REISERFS_I(inode)->i_flags & i_nopack_mask)) {
  1369. /* leave tail in an unformatted node */
  1370. *mode = M_SKIP_BALANCING;
  1371. cut_bytes =
  1372. block_size - (new_file_size & (block_size - 1));
  1373. pathrelse(path);
  1374. return cut_bytes;
  1375. }
  1376. /* Perform the conversion to a direct_item. */
  1377. return indirect2direct(th, inode, page, path, item_key,
  1378. new_file_size, mode);
  1379. }
  1380. /*
  1381. * we did indirect_to_direct conversion. And we have inserted direct
  1382. * item successesfully, but there were no disk space to cut unfm
  1383. * pointer being converted. Therefore we have to delete inserted
  1384. * direct item(s)
  1385. */
  1386. static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th,
  1387. struct inode *inode, struct treepath *path)
  1388. {
  1389. struct cpu_key tail_key;
  1390. int tail_len;
  1391. int removed;
  1392. BUG_ON(!th->t_trans_id);
  1393. make_cpu_key(&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4);
  1394. tail_key.key_length = 4;
  1395. tail_len =
  1396. (cpu_key_k_offset(&tail_key) & (inode->i_sb->s_blocksize - 1)) - 1;
  1397. while (tail_len) {
  1398. /* look for the last byte of the tail */
  1399. if (search_for_position_by_key(inode->i_sb, &tail_key, path) ==
  1400. POSITION_NOT_FOUND)
  1401. reiserfs_panic(inode->i_sb, "vs-5615",
  1402. "found invalid item");
  1403. RFALSE(path->pos_in_item !=
  1404. ih_item_len(tp_item_head(path)) - 1,
  1405. "vs-5616: appended bytes found");
  1406. PATH_LAST_POSITION(path)--;
  1407. removed =
  1408. reiserfs_delete_item(th, path, &tail_key, inode,
  1409. NULL /*unbh not needed */ );
  1410. RFALSE(removed <= 0
  1411. || removed > tail_len,
  1412. "vs-5617: there was tail %d bytes, removed item length %d bytes",
  1413. tail_len, removed);
  1414. tail_len -= removed;
  1415. set_cpu_key_k_offset(&tail_key,
  1416. cpu_key_k_offset(&tail_key) - removed);
  1417. }
  1418. reiserfs_warning(inode->i_sb, "reiserfs-5091", "indirect_to_direct "
  1419. "conversion has been rolled back due to "
  1420. "lack of disk space");
  1421. mark_inode_dirty(inode);
  1422. }
  1423. /* (Truncate or cut entry) or delete object item. Returns < 0 on failure */
  1424. int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
  1425. struct treepath *path,
  1426. struct cpu_key *item_key,
  1427. struct inode *inode,
  1428. struct page *page, loff_t new_file_size)
  1429. {
  1430. struct super_block *sb = inode->i_sb;
  1431. /*
  1432. * Every function which is going to call do_balance must first
  1433. * create a tree_balance structure. Then it must fill up this
  1434. * structure by using the init_tb_struct and fix_nodes functions.
  1435. * After that we can make tree balancing.
  1436. */
  1437. struct tree_balance s_cut_balance;
  1438. struct item_head *p_le_ih;
  1439. int cut_size = 0; /* Amount to be cut. */
  1440. int ret_value = CARRY_ON;
  1441. int removed = 0; /* Number of the removed unformatted nodes. */
  1442. int is_inode_locked = 0;
  1443. char mode; /* Mode of the balance. */
  1444. int retval2 = -1;
  1445. int quota_cut_bytes;
  1446. loff_t tail_pos = 0;
  1447. int depth;
  1448. BUG_ON(!th->t_trans_id);
  1449. init_tb_struct(th, &s_cut_balance, inode->i_sb, path,
  1450. cut_size);
  1451. /*
  1452. * Repeat this loop until we either cut the item without needing
  1453. * to balance, or we fix_nodes without schedule occurring
  1454. */
  1455. while (1) {
  1456. /*
  1457. * Determine the balance mode, position of the first byte to
  1458. * be cut, and size to be cut. In case of the indirect item
  1459. * free unformatted nodes which are pointed to by the cut
  1460. * pointers.
  1461. */
  1462. mode =
  1463. prepare_for_delete_or_cut(th, inode, path,
  1464. item_key, &removed,
  1465. &cut_size, new_file_size);
  1466. if (mode == M_CONVERT) {
  1467. /*
  1468. * convert last unformatted node to direct item or
  1469. * leave tail in the unformatted node
  1470. */
  1471. RFALSE(ret_value != CARRY_ON,
  1472. "PAP-5570: can not convert twice");
  1473. ret_value =
  1474. maybe_indirect_to_direct(th, inode, page,
  1475. path, item_key,
  1476. new_file_size, &mode);
  1477. if (mode == M_SKIP_BALANCING)
  1478. /* tail has been left in the unformatted node */
  1479. return ret_value;
  1480. is_inode_locked = 1;
  1481. /*
  1482. * removing of last unformatted node will
  1483. * change value we have to return to truncate.
  1484. * Save it
  1485. */
  1486. retval2 = ret_value;
  1487. /*
  1488. * So, we have performed the first part of the
  1489. * conversion:
  1490. * inserting the new direct item. Now we are
  1491. * removing the last unformatted node pointer.
  1492. * Set key to search for it.
  1493. */
  1494. set_cpu_key_k_type(item_key, TYPE_INDIRECT);
  1495. item_key->key_length = 4;
  1496. new_file_size -=
  1497. (new_file_size & (sb->s_blocksize - 1));
  1498. tail_pos = new_file_size;
  1499. set_cpu_key_k_offset(item_key, new_file_size + 1);
  1500. if (search_for_position_by_key
  1501. (sb, item_key,
  1502. path) == POSITION_NOT_FOUND) {
  1503. print_block(PATH_PLAST_BUFFER(path), 3,
  1504. PATH_LAST_POSITION(path) - 1,
  1505. PATH_LAST_POSITION(path) + 1);
  1506. reiserfs_panic(sb, "PAP-5580", "item to "
  1507. "convert does not exist (%K)",
  1508. item_key);
  1509. }
  1510. continue;
  1511. }
  1512. if (cut_size == 0) {
  1513. pathrelse(path);
  1514. return 0;
  1515. }
  1516. s_cut_balance.insert_size[0] = cut_size;
  1517. ret_value = fix_nodes(mode, &s_cut_balance, NULL, NULL);
  1518. if (ret_value != REPEAT_SEARCH)
  1519. break;
  1520. PROC_INFO_INC(sb, cut_from_item_restarted);
  1521. ret_value =
  1522. search_for_position_by_key(sb, item_key, path);
  1523. if (ret_value == POSITION_FOUND)
  1524. continue;
  1525. reiserfs_warning(sb, "PAP-5610", "item %K not found",
  1526. item_key);
  1527. unfix_nodes(&s_cut_balance);
  1528. return (ret_value == IO_ERROR) ? -EIO : -ENOENT;
  1529. } /* while */
  1530. /* check fix_nodes results (IO_ERROR or NO_DISK_SPACE) */
  1531. if (ret_value != CARRY_ON) {
  1532. if (is_inode_locked) {
  1533. /*
  1534. * FIXME: this seems to be not needed: we are always
  1535. * able to cut item
  1536. */
  1537. indirect_to_direct_roll_back(th, inode, path);
  1538. }
  1539. if (ret_value == NO_DISK_SPACE)
  1540. reiserfs_warning(sb, "reiserfs-5092",
  1541. "NO_DISK_SPACE");
  1542. unfix_nodes(&s_cut_balance);
  1543. return -EIO;
  1544. }
  1545. /* go ahead and perform balancing */
  1546. RFALSE(mode == M_PASTE || mode == M_INSERT, "invalid mode");
  1547. /* Calculate number of bytes that need to be cut from the item. */
  1548. quota_cut_bytes =
  1549. (mode ==
  1550. M_DELETE) ? ih_item_len(tp_item_head(path)) : -s_cut_balance.
  1551. insert_size[0];
  1552. if (retval2 == -1)
  1553. ret_value = calc_deleted_bytes_number(&s_cut_balance, mode);
  1554. else
  1555. ret_value = retval2;
  1556. /*
  1557. * For direct items, we only change the quota when deleting the last
  1558. * item.
  1559. */
  1560. p_le_ih = tp_item_head(s_cut_balance.tb_path);
  1561. if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_le_ih)) {
  1562. if (mode == M_DELETE &&
  1563. (le_ih_k_offset(p_le_ih) & (sb->s_blocksize - 1)) ==
  1564. 1) {
  1565. /* FIXME: this is to keep 3.5 happy */
  1566. REISERFS_I(inode)->i_first_direct_byte = U32_MAX;
  1567. quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
  1568. } else {
  1569. quota_cut_bytes = 0;
  1570. }
  1571. }
  1572. #ifdef CONFIG_REISERFS_CHECK
  1573. if (is_inode_locked) {
  1574. struct item_head *le_ih =
  1575. tp_item_head(s_cut_balance.tb_path);
  1576. /*
  1577. * we are going to complete indirect2direct conversion. Make
  1578. * sure, that we exactly remove last unformatted node pointer
  1579. * of the item
  1580. */
  1581. if (!is_indirect_le_ih(le_ih))
  1582. reiserfs_panic(sb, "vs-5652",
  1583. "item must be indirect %h", le_ih);
  1584. if (mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE)
  1585. reiserfs_panic(sb, "vs-5653", "completing "
  1586. "indirect2direct conversion indirect "
  1587. "item %h being deleted must be of "
  1588. "4 byte long", le_ih);
  1589. if (mode == M_CUT
  1590. && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) {
  1591. reiserfs_panic(sb, "vs-5654", "can not complete "
  1592. "indirect2direct conversion of %h "
  1593. "(CUT, insert_size==%d)",
  1594. le_ih, s_cut_balance.insert_size[0]);
  1595. }
  1596. /*
  1597. * it would be useful to make sure, that right neighboring
  1598. * item is direct item of this file
  1599. */
  1600. }
  1601. #endif
  1602. do_balance(&s_cut_balance, NULL, NULL, mode);
  1603. if (is_inode_locked) {
  1604. /*
  1605. * we've done an indirect->direct conversion. when the
  1606. * data block was freed, it was removed from the list of
  1607. * blocks that must be flushed before the transaction
  1608. * commits, make sure to unmap and invalidate it
  1609. */
  1610. unmap_buffers(page, tail_pos);
  1611. REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
  1612. }
  1613. #ifdef REISERQUOTA_DEBUG
  1614. reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
  1615. "reiserquota cut_from_item(): freeing %u id=%u type=%c",
  1616. quota_cut_bytes, inode->i_uid, '?');
  1617. #endif
  1618. depth = reiserfs_write_unlock_nested(sb);
  1619. dquot_free_space_nodirty(inode, quota_cut_bytes);
  1620. reiserfs_write_lock_nested(sb, depth);
  1621. return ret_value;
  1622. }
  1623. static void truncate_directory(struct reiserfs_transaction_handle *th,
  1624. struct inode *inode)
  1625. {
  1626. BUG_ON(!th->t_trans_id);
  1627. if (inode->i_nlink)
  1628. reiserfs_error(inode->i_sb, "vs-5655", "link count != 0");
  1629. set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), DOT_OFFSET);
  1630. set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_DIRENTRY);
  1631. reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
  1632. reiserfs_update_sd(th, inode);
  1633. set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), SD_OFFSET);
  1634. set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_STAT_DATA);
  1635. }
  1636. /*
  1637. * Truncate file to the new size. Note, this must be called with a
  1638. * transaction already started
  1639. */
  1640. int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
  1641. struct inode *inode, /* ->i_size contains new size */
  1642. struct page *page, /* up to date for last block */
  1643. /*
  1644. * when it is called by file_release to convert
  1645. * the tail - no timestamps should be updated
  1646. */
  1647. int update_timestamps
  1648. )
  1649. {
  1650. INITIALIZE_PATH(s_search_path); /* Path to the current object item. */
  1651. struct item_head *p_le_ih; /* Pointer to an item header. */
  1652. /* Key to search for a previous file item. */
  1653. struct cpu_key s_item_key;
  1654. loff_t file_size, /* Old file size. */
  1655. new_file_size; /* New file size. */
  1656. int deleted; /* Number of deleted or truncated bytes. */
  1657. int retval;
  1658. int err = 0;
  1659. BUG_ON(!th->t_trans_id);
  1660. if (!
  1661. (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
  1662. || S_ISLNK(inode->i_mode)))
  1663. return 0;
  1664. /* deletion of directory - no need to update timestamps */
  1665. if (S_ISDIR(inode->i_mode)) {
  1666. truncate_directory(th, inode);
  1667. return 0;
  1668. }
  1669. /* Get new file size. */
  1670. new_file_size = inode->i_size;
  1671. /* FIXME: note, that key type is unimportant here */
  1672. make_cpu_key(&s_item_key, inode, max_reiserfs_offset(inode),
  1673. TYPE_DIRECT, 3);
  1674. retval =
  1675. search_for_position_by_key(inode->i_sb, &s_item_key,
  1676. &s_search_path);
  1677. if (retval == IO_ERROR) {
  1678. reiserfs_error(inode->i_sb, "vs-5657",
  1679. "i/o failure occurred trying to truncate %K",
  1680. &s_item_key);
  1681. err = -EIO;
  1682. goto out;
  1683. }
  1684. if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) {
  1685. reiserfs_error(inode->i_sb, "PAP-5660",
  1686. "wrong result %d of search for %K", retval,
  1687. &s_item_key);
  1688. err = -EIO;
  1689. goto out;
  1690. }
  1691. s_search_path.pos_in_item--;
  1692. /* Get real file size (total length of all file items) */
  1693. p_le_ih = tp_item_head(&s_search_path);
  1694. if (is_statdata_le_ih(p_le_ih))
  1695. file_size = 0;
  1696. else {
  1697. loff_t offset = le_ih_k_offset(p_le_ih);
  1698. int bytes =
  1699. op_bytes_number(p_le_ih, inode->i_sb->s_blocksize);
  1700. /*
  1701. * this may mismatch with real file size: if last direct item
  1702. * had no padding zeros and last unformatted node had no free
  1703. * space, this file would have this file size
  1704. */
  1705. file_size = offset + bytes - 1;
  1706. }
  1707. /*
  1708. * are we doing a full truncate or delete, if so
  1709. * kick in the reada code
  1710. */
  1711. if (new_file_size == 0)
  1712. s_search_path.reada = PATH_READA | PATH_READA_BACK;
  1713. if (file_size == 0 || file_size < new_file_size) {
  1714. goto update_and_out;
  1715. }
  1716. /* Update key to search for the last file item. */
  1717. set_cpu_key_k_offset(&s_item_key, file_size);
  1718. do {
  1719. /* Cut or delete file item. */
  1720. deleted =
  1721. reiserfs_cut_from_item(th, &s_search_path, &s_item_key,
  1722. inode, page, new_file_size);
  1723. if (deleted < 0) {
  1724. reiserfs_warning(inode->i_sb, "vs-5665",
  1725. "reiserfs_cut_from_item failed");
  1726. reiserfs_check_path(&s_search_path);
  1727. return 0;
  1728. }
  1729. RFALSE(deleted > file_size,
  1730. "PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K",
  1731. deleted, file_size, &s_item_key);
  1732. /* Change key to search the last file item. */
  1733. file_size -= deleted;
  1734. set_cpu_key_k_offset(&s_item_key, file_size);
  1735. /*
  1736. * While there are bytes to truncate and previous
  1737. * file item is presented in the tree.
  1738. */
  1739. /*
  1740. * This loop could take a really long time, and could log
  1741. * many more blocks than a transaction can hold. So, we do
  1742. * a polite journal end here, and if the transaction needs
  1743. * ending, we make sure the file is consistent before ending
  1744. * the current trans and starting a new one
  1745. */
  1746. if (journal_transaction_should_end(th, 0) ||
  1747. reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) {
  1748. pathrelse(&s_search_path);
  1749. if (update_timestamps) {
  1750. inode->i_mtime = current_time(inode);
  1751. inode->i_ctime = current_time(inode);
  1752. }
  1753. reiserfs_update_sd(th, inode);
  1754. err = journal_end(th);
  1755. if (err)
  1756. goto out;
  1757. err = journal_begin(th, inode->i_sb,
  1758. JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD + JOURNAL_PER_BALANCE_CNT * 4) ;
  1759. if (err)
  1760. goto out;
  1761. reiserfs_update_inode_transaction(inode);
  1762. }
  1763. } while (file_size > ROUND_UP(new_file_size) &&
  1764. search_for_position_by_key(inode->i_sb, &s_item_key,
  1765. &s_search_path) == POSITION_FOUND);
  1766. RFALSE(file_size > ROUND_UP(new_file_size),
  1767. "PAP-5680: truncate did not finish: new_file_size %lld, current %lld, oid %d",
  1768. new_file_size, file_size, s_item_key.on_disk_key.k_objectid);
  1769. update_and_out:
  1770. if (update_timestamps) {
  1771. /* this is truncate, not file closing */
  1772. inode->i_mtime = current_time(inode);
  1773. inode->i_ctime = current_time(inode);
  1774. }
  1775. reiserfs_update_sd(th, inode);
  1776. out:
  1777. pathrelse(&s_search_path);
  1778. return err;
  1779. }
  1780. #ifdef CONFIG_REISERFS_CHECK
  1781. /* this makes sure, that we __append__, not overwrite or add holes */
  1782. static void check_research_for_paste(struct treepath *path,
  1783. const struct cpu_key *key)
  1784. {
  1785. struct item_head *found_ih = tp_item_head(path);
  1786. if (is_direct_le_ih(found_ih)) {
  1787. if (le_ih_k_offset(found_ih) +
  1788. op_bytes_number(found_ih,
  1789. get_last_bh(path)->b_size) !=
  1790. cpu_key_k_offset(key)
  1791. || op_bytes_number(found_ih,
  1792. get_last_bh(path)->b_size) !=
  1793. pos_in_item(path))
  1794. reiserfs_panic(NULL, "PAP-5720", "found direct item "
  1795. "%h or position (%d) does not match "
  1796. "to key %K", found_ih,
  1797. pos_in_item(path), key);
  1798. }
  1799. if (is_indirect_le_ih(found_ih)) {
  1800. if (le_ih_k_offset(found_ih) +
  1801. op_bytes_number(found_ih,
  1802. get_last_bh(path)->b_size) !=
  1803. cpu_key_k_offset(key)
  1804. || I_UNFM_NUM(found_ih) != pos_in_item(path)
  1805. || get_ih_free_space(found_ih) != 0)
  1806. reiserfs_panic(NULL, "PAP-5730", "found indirect "
  1807. "item (%h) or position (%d) does not "
  1808. "match to key (%K)",
  1809. found_ih, pos_in_item(path), key);
  1810. }
  1811. }
  1812. #endif /* config reiserfs check */
  1813. /*
  1814. * Paste bytes to the existing item.
  1815. * Returns bytes number pasted into the item.
  1816. */
  1817. int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th,
  1818. /* Path to the pasted item. */
  1819. struct treepath *search_path,
  1820. /* Key to search for the needed item. */
  1821. const struct cpu_key *key,
  1822. /* Inode item belongs to */
  1823. struct inode *inode,
  1824. /* Pointer to the bytes to paste. */
  1825. const char *body,
  1826. /* Size of pasted bytes. */
  1827. int pasted_size)
  1828. {
  1829. struct super_block *sb = inode->i_sb;
  1830. struct tree_balance s_paste_balance;
  1831. int retval;
  1832. int fs_gen;
  1833. int depth;
  1834. BUG_ON(!th->t_trans_id);
  1835. fs_gen = get_generation(inode->i_sb);
  1836. #ifdef REISERQUOTA_DEBUG
  1837. reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
  1838. "reiserquota paste_into_item(): allocating %u id=%u type=%c",
  1839. pasted_size, inode->i_uid,
  1840. key2type(&key->on_disk_key));
  1841. #endif
  1842. depth = reiserfs_write_unlock_nested(sb);
  1843. retval = dquot_alloc_space_nodirty(inode, pasted_size);
  1844. reiserfs_write_lock_nested(sb, depth);
  1845. if (retval) {
  1846. pathrelse(search_path);
  1847. return retval;
  1848. }
  1849. init_tb_struct(th, &s_paste_balance, th->t_super, search_path,
  1850. pasted_size);
  1851. #ifdef DISPLACE_NEW_PACKING_LOCALITIES
  1852. s_paste_balance.key = key->on_disk_key;
  1853. #endif
  1854. /* DQUOT_* can schedule, must check before the fix_nodes */
  1855. if (fs_changed(fs_gen, inode->i_sb)) {
  1856. goto search_again;
  1857. }
  1858. while ((retval =
  1859. fix_nodes(M_PASTE, &s_paste_balance, NULL,
  1860. body)) == REPEAT_SEARCH) {
  1861. search_again:
  1862. /* file system changed while we were in the fix_nodes */
  1863. PROC_INFO_INC(th->t_super, paste_into_item_restarted);
  1864. retval =
  1865. search_for_position_by_key(th->t_super, key,
  1866. search_path);
  1867. if (retval == IO_ERROR) {
  1868. retval = -EIO;
  1869. goto error_out;
  1870. }
  1871. if (retval == POSITION_FOUND) {
  1872. reiserfs_warning(inode->i_sb, "PAP-5710",
  1873. "entry or pasted byte (%K) exists",
  1874. key);
  1875. retval = -EEXIST;
  1876. goto error_out;
  1877. }
  1878. #ifdef CONFIG_REISERFS_CHECK
  1879. check_research_for_paste(search_path, key);
  1880. #endif
  1881. }
  1882. /*
  1883. * Perform balancing after all resources are collected by fix_nodes,
  1884. * and accessing them will not risk triggering schedule.
  1885. */
  1886. if (retval == CARRY_ON) {
  1887. do_balance(&s_paste_balance, NULL /*ih */ , body, M_PASTE);
  1888. return 0;
  1889. }
  1890. retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
  1891. error_out:
  1892. /* this also releases the path */
  1893. unfix_nodes(&s_paste_balance);
  1894. #ifdef REISERQUOTA_DEBUG
  1895. reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
  1896. "reiserquota paste_into_item(): freeing %u id=%u type=%c",
  1897. pasted_size, inode->i_uid,
  1898. key2type(&key->on_disk_key));
  1899. #endif
  1900. depth = reiserfs_write_unlock_nested(sb);
  1901. dquot_free_space_nodirty(inode, pasted_size);
  1902. reiserfs_write_lock_nested(sb, depth);
  1903. return retval;
  1904. }
  1905. /*
  1906. * Insert new item into the buffer at the path.
  1907. * th - active transaction handle
  1908. * path - path to the inserted item
  1909. * ih - pointer to the item header to insert
  1910. * body - pointer to the bytes to insert
  1911. */
  1912. int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
  1913. struct treepath *path, const struct cpu_key *key,
  1914. struct item_head *ih, struct inode *inode,
  1915. const char *body)
  1916. {
  1917. struct tree_balance s_ins_balance;
  1918. int retval;
  1919. int fs_gen = 0;
  1920. int quota_bytes = 0;
  1921. BUG_ON(!th->t_trans_id);
  1922. if (inode) { /* Do we count quotas for item? */
  1923. int depth;
  1924. fs_gen = get_generation(inode->i_sb);
  1925. quota_bytes = ih_item_len(ih);
  1926. /*
  1927. * hack so the quota code doesn't have to guess
  1928. * if the file has a tail, links are always tails,
  1929. * so there's no guessing needed
  1930. */
  1931. if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(ih))
  1932. quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE;
  1933. #ifdef REISERQUOTA_DEBUG
  1934. reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
  1935. "reiserquota insert_item(): allocating %u id=%u type=%c",
  1936. quota_bytes, inode->i_uid, head2type(ih));
  1937. #endif
  1938. /*
  1939. * We can't dirty inode here. It would be immediately
  1940. * written but appropriate stat item isn't inserted yet...
  1941. */
  1942. depth = reiserfs_write_unlock_nested(inode->i_sb);
  1943. retval = dquot_alloc_space_nodirty(inode, quota_bytes);
  1944. reiserfs_write_lock_nested(inode->i_sb, depth);
  1945. if (retval) {
  1946. pathrelse(path);
  1947. return retval;
  1948. }
  1949. }
  1950. init_tb_struct(th, &s_ins_balance, th->t_super, path,
  1951. IH_SIZE + ih_item_len(ih));
  1952. #ifdef DISPLACE_NEW_PACKING_LOCALITIES
  1953. s_ins_balance.key = key->on_disk_key;
  1954. #endif
  1955. /*
  1956. * DQUOT_* can schedule, must check to be sure calling
  1957. * fix_nodes is safe
  1958. */
  1959. if (inode && fs_changed(fs_gen, inode->i_sb)) {
  1960. goto search_again;
  1961. }
  1962. while ((retval =
  1963. fix_nodes(M_INSERT, &s_ins_balance, ih,
  1964. body)) == REPEAT_SEARCH) {
  1965. search_again:
  1966. /* file system changed while we were in the fix_nodes */
  1967. PROC_INFO_INC(th->t_super, insert_item_restarted);
  1968. retval = search_item(th->t_super, key, path);
  1969. if (retval == IO_ERROR) {
  1970. retval = -EIO;
  1971. goto error_out;
  1972. }
  1973. if (retval == ITEM_FOUND) {
  1974. reiserfs_warning(th->t_super, "PAP-5760",
  1975. "key %K already exists in the tree",
  1976. key);
  1977. retval = -EEXIST;
  1978. goto error_out;
  1979. }
  1980. }
  1981. /* make balancing after all resources will be collected at a time */
  1982. if (retval == CARRY_ON) {
  1983. do_balance(&s_ins_balance, ih, body, M_INSERT);
  1984. return 0;
  1985. }
  1986. retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
  1987. error_out:
  1988. /* also releases the path */
  1989. unfix_nodes(&s_ins_balance);
  1990. #ifdef REISERQUOTA_DEBUG
  1991. if (inode)
  1992. reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
  1993. "reiserquota insert_item(): freeing %u id=%u type=%c",
  1994. quota_bytes, inode->i_uid, head2type(ih));
  1995. #endif
  1996. if (inode) {
  1997. int depth = reiserfs_write_unlock_nested(inode->i_sb);
  1998. dquot_free_space_nodirty(inode, quota_bytes);
  1999. reiserfs_write_lock_nested(inode->i_sb, depth);
  2000. }
  2001. return retval;
  2002. }