segment.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962
  1. /*
  2. * fs/logfs/segment.c - Handling the Object Store
  3. *
  4. * As should be obvious for Linux kernel code, license is GPLv2
  5. *
  6. * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
  7. *
  8. * Object store or ostore makes up the complete device with exception of
  9. * the superblock and journal areas. Apart from its own metadata it stores
  10. * three kinds of objects: inodes, dentries and blocks, both data and indirect.
  11. */
  12. #include "logfs.h"
  13. #include <linux/slab.h>
  14. static int logfs_mark_segment_bad(struct super_block *sb, u32 segno)
  15. {
  16. struct logfs_super *super = logfs_super(sb);
  17. struct btree_head32 *head = &super->s_reserved_segments;
  18. int err;
  19. err = btree_insert32(head, segno, (void *)1, GFP_NOFS);
  20. if (err)
  21. return err;
  22. logfs_super(sb)->s_bad_segments++;
  23. /* FIXME: write to journal */
  24. return 0;
  25. }
  26. int logfs_erase_segment(struct super_block *sb, u32 segno, int ensure_erase)
  27. {
  28. struct logfs_super *super = logfs_super(sb);
  29. super->s_gec++;
  30. return super->s_devops->erase(sb, (u64)segno << super->s_segshift,
  31. super->s_segsize, ensure_erase);
  32. }
  33. static s64 logfs_get_free_bytes(struct logfs_area *area, size_t bytes)
  34. {
  35. s32 ofs;
  36. logfs_open_area(area, bytes);
  37. ofs = area->a_used_bytes;
  38. area->a_used_bytes += bytes;
  39. BUG_ON(area->a_used_bytes >= logfs_super(area->a_sb)->s_segsize);
  40. return dev_ofs(area->a_sb, area->a_segno, ofs);
  41. }
  42. static struct page *get_mapping_page(struct super_block *sb, pgoff_t index,
  43. int use_filler)
  44. {
  45. struct logfs_super *super = logfs_super(sb);
  46. struct address_space *mapping = super->s_mapping_inode->i_mapping;
  47. filler_t *filler = super->s_devops->readpage;
  48. struct page *page;
  49. BUG_ON(mapping_gfp_constraint(mapping, __GFP_FS));
  50. if (use_filler)
  51. page = read_cache_page(mapping, index, filler, sb);
  52. else {
  53. page = find_or_create_page(mapping, index, GFP_NOFS);
  54. if (page)
  55. unlock_page(page);
  56. }
  57. return page;
  58. }
  59. int __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len,
  60. int use_filler)
  61. {
  62. pgoff_t index = ofs >> PAGE_SHIFT;
  63. struct page *page;
  64. long offset = ofs & (PAGE_SIZE-1);
  65. long copylen;
  66. /* Only logfs_wbuf_recover may use len==0 */
  67. BUG_ON(!len && !use_filler);
  68. do {
  69. copylen = min((ulong)len, PAGE_SIZE - offset);
  70. page = get_mapping_page(area->a_sb, index, use_filler);
  71. if (IS_ERR(page))
  72. return PTR_ERR(page);
  73. BUG_ON(!page); /* FIXME: reserve a pool */
  74. SetPageUptodate(page);
  75. memcpy(page_address(page) + offset, buf, copylen);
  76. if (!PagePrivate(page)) {
  77. SetPagePrivate(page);
  78. get_page(page);
  79. }
  80. put_page(page);
  81. buf += copylen;
  82. len -= copylen;
  83. offset = 0;
  84. index++;
  85. } while (len);
  86. return 0;
  87. }
  88. static void pad_partial_page(struct logfs_area *area)
  89. {
  90. struct super_block *sb = area->a_sb;
  91. struct page *page;
  92. u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes);
  93. pgoff_t index = ofs >> PAGE_SHIFT;
  94. long offset = ofs & (PAGE_SIZE-1);
  95. u32 len = PAGE_SIZE - offset;
  96. if (len % PAGE_SIZE) {
  97. page = get_mapping_page(sb, index, 0);
  98. BUG_ON(!page); /* FIXME: reserve a pool */
  99. memset(page_address(page) + offset, 0xff, len);
  100. if (!PagePrivate(page)) {
  101. SetPagePrivate(page);
  102. get_page(page);
  103. }
  104. put_page(page);
  105. }
  106. }
  107. static void pad_full_pages(struct logfs_area *area)
  108. {
  109. struct super_block *sb = area->a_sb;
  110. struct logfs_super *super = logfs_super(sb);
  111. u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes);
  112. u32 len = super->s_segsize - area->a_used_bytes;
  113. pgoff_t index = PAGE_ALIGN(ofs) >> PAGE_SHIFT;
  114. pgoff_t no_indizes = len >> PAGE_SHIFT;
  115. struct page *page;
  116. while (no_indizes) {
  117. page = get_mapping_page(sb, index, 0);
  118. BUG_ON(!page); /* FIXME: reserve a pool */
  119. SetPageUptodate(page);
  120. memset(page_address(page), 0xff, PAGE_SIZE);
  121. if (!PagePrivate(page)) {
  122. SetPagePrivate(page);
  123. get_page(page);
  124. }
  125. put_page(page);
  126. index++;
  127. no_indizes--;
  128. }
  129. }
  130. /*
  131. * bdev_writeseg will write full pages. Memset the tail to prevent data leaks.
  132. * Also make sure we allocate (and memset) all pages for final writeout.
  133. */
  134. static void pad_wbuf(struct logfs_area *area, int final)
  135. {
  136. pad_partial_page(area);
  137. if (final)
  138. pad_full_pages(area);
  139. }
  140. /*
  141. * We have to be careful with the alias tree. Since lookup is done by bix,
  142. * it needs to be normalized, so 14, 15, 16, etc. all match when dealing with
  143. * indirect blocks. So always use it through accessor functions.
  144. */
  145. static void *alias_tree_lookup(struct super_block *sb, u64 ino, u64 bix,
  146. level_t level)
  147. {
  148. struct btree_head128 *head = &logfs_super(sb)->s_object_alias_tree;
  149. pgoff_t index = logfs_pack_index(bix, level);
  150. return btree_lookup128(head, ino, index);
  151. }
  152. static int alias_tree_insert(struct super_block *sb, u64 ino, u64 bix,
  153. level_t level, void *val)
  154. {
  155. struct btree_head128 *head = &logfs_super(sb)->s_object_alias_tree;
  156. pgoff_t index = logfs_pack_index(bix, level);
  157. return btree_insert128(head, ino, index, val, GFP_NOFS);
  158. }
  159. static int btree_write_alias(struct super_block *sb, struct logfs_block *block,
  160. write_alias_t *write_one_alias)
  161. {
  162. struct object_alias_item *item;
  163. int err;
  164. list_for_each_entry(item, &block->item_list, list) {
  165. err = write_alias_journal(sb, block->ino, block->bix,
  166. block->level, item->child_no, item->val);
  167. if (err)
  168. return err;
  169. }
  170. return 0;
  171. }
  172. static const struct logfs_block_ops btree_block_ops = {
  173. .write_block = btree_write_block,
  174. .free_block = __free_block,
  175. .write_alias = btree_write_alias,
  176. };
  177. int logfs_load_object_aliases(struct super_block *sb,
  178. struct logfs_obj_alias *oa, int count)
  179. {
  180. struct logfs_super *super = logfs_super(sb);
  181. struct logfs_block *block;
  182. struct object_alias_item *item;
  183. u64 ino, bix;
  184. level_t level;
  185. int i, err;
  186. super->s_flags |= LOGFS_SB_FLAG_OBJ_ALIAS;
  187. count /= sizeof(*oa);
  188. for (i = 0; i < count; i++) {
  189. item = mempool_alloc(super->s_alias_pool, GFP_NOFS);
  190. if (!item)
  191. return -ENOMEM;
  192. memset(item, 0, sizeof(*item));
  193. super->s_no_object_aliases++;
  194. item->val = oa[i].val;
  195. item->child_no = be16_to_cpu(oa[i].child_no);
  196. ino = be64_to_cpu(oa[i].ino);
  197. bix = be64_to_cpu(oa[i].bix);
  198. level = LEVEL(oa[i].level);
  199. log_aliases("logfs_load_object_aliases(%llx, %llx, %x, %x) %llx\n",
  200. ino, bix, level, item->child_no,
  201. be64_to_cpu(item->val));
  202. block = alias_tree_lookup(sb, ino, bix, level);
  203. if (!block) {
  204. block = __alloc_block(sb, ino, bix, level);
  205. block->ops = &btree_block_ops;
  206. err = alias_tree_insert(sb, ino, bix, level, block);
  207. BUG_ON(err); /* mempool empty */
  208. }
  209. if (test_and_set_bit(item->child_no, block->alias_map)) {
  210. printk(KERN_ERR"LogFS: Alias collision detected\n");
  211. return -EIO;
  212. }
  213. list_move_tail(&block->alias_list, &super->s_object_alias);
  214. list_add(&item->list, &block->item_list);
  215. }
  216. return 0;
  217. }
  218. static void kill_alias(void *_block, unsigned long ignore0,
  219. u64 ignore1, u64 ignore2, size_t ignore3)
  220. {
  221. struct logfs_block *block = _block;
  222. struct super_block *sb = block->sb;
  223. struct logfs_super *super = logfs_super(sb);
  224. struct object_alias_item *item;
  225. while (!list_empty(&block->item_list)) {
  226. item = list_entry(block->item_list.next, typeof(*item), list);
  227. list_del(&item->list);
  228. mempool_free(item, super->s_alias_pool);
  229. }
  230. block->ops->free_block(sb, block);
  231. }
  232. static int obj_type(struct inode *inode, level_t level)
  233. {
  234. if (level == 0) {
  235. if (S_ISDIR(inode->i_mode))
  236. return OBJ_DENTRY;
  237. if (inode->i_ino == LOGFS_INO_MASTER)
  238. return OBJ_INODE;
  239. }
  240. return OBJ_BLOCK;
  241. }
  242. static int obj_len(struct super_block *sb, int obj_type)
  243. {
  244. switch (obj_type) {
  245. case OBJ_DENTRY:
  246. return sizeof(struct logfs_disk_dentry);
  247. case OBJ_INODE:
  248. return sizeof(struct logfs_disk_inode);
  249. case OBJ_BLOCK:
  250. return sb->s_blocksize;
  251. default:
  252. BUG();
  253. }
  254. }
  255. static int __logfs_segment_write(struct inode *inode, void *buf,
  256. struct logfs_shadow *shadow, int type, int len, int compr)
  257. {
  258. struct logfs_area *area;
  259. struct super_block *sb = inode->i_sb;
  260. s64 ofs;
  261. struct logfs_object_header h;
  262. int acc_len;
  263. if (shadow->gc_level == 0)
  264. acc_len = len;
  265. else
  266. acc_len = obj_len(sb, type);
  267. area = get_area(sb, shadow->gc_level);
  268. ofs = logfs_get_free_bytes(area, len + LOGFS_OBJECT_HEADERSIZE);
  269. LOGFS_BUG_ON(ofs <= 0, sb);
  270. /*
  271. * Order is important. logfs_get_free_bytes(), by modifying the
  272. * segment file, may modify the content of the very page we're about
  273. * to write now. Which is fine, as long as the calculated crc and
  274. * written data still match. So do the modifications _before_
  275. * calculating the crc.
  276. */
  277. h.len = cpu_to_be16(len);
  278. h.type = type;
  279. h.compr = compr;
  280. h.ino = cpu_to_be64(inode->i_ino);
  281. h.bix = cpu_to_be64(shadow->bix);
  282. h.crc = logfs_crc32(&h, sizeof(h) - 4, 4);
  283. h.data_crc = logfs_crc32(buf, len, 0);
  284. logfs_buf_write(area, ofs, &h, sizeof(h));
  285. logfs_buf_write(area, ofs + LOGFS_OBJECT_HEADERSIZE, buf, len);
  286. shadow->new_ofs = ofs;
  287. shadow->new_len = acc_len + LOGFS_OBJECT_HEADERSIZE;
  288. return 0;
  289. }
  290. static s64 logfs_segment_write_compress(struct inode *inode, void *buf,
  291. struct logfs_shadow *shadow, int type, int len)
  292. {
  293. struct super_block *sb = inode->i_sb;
  294. void *compressor_buf = logfs_super(sb)->s_compressed_je;
  295. ssize_t compr_len;
  296. int ret;
  297. mutex_lock(&logfs_super(sb)->s_journal_mutex);
  298. compr_len = logfs_compress(buf, compressor_buf, len, len);
  299. if (compr_len >= 0) {
  300. ret = __logfs_segment_write(inode, compressor_buf, shadow,
  301. type, compr_len, COMPR_ZLIB);
  302. } else {
  303. ret = __logfs_segment_write(inode, buf, shadow, type, len,
  304. COMPR_NONE);
  305. }
  306. mutex_unlock(&logfs_super(sb)->s_journal_mutex);
  307. return ret;
  308. }
  309. /**
  310. * logfs_segment_write - write data block to object store
  311. * @inode: inode containing data
  312. *
  313. * Returns an errno or zero.
  314. */
  315. int logfs_segment_write(struct inode *inode, struct page *page,
  316. struct logfs_shadow *shadow)
  317. {
  318. struct super_block *sb = inode->i_sb;
  319. struct logfs_super *super = logfs_super(sb);
  320. int do_compress, type, len;
  321. int ret;
  322. void *buf;
  323. super->s_flags |= LOGFS_SB_FLAG_DIRTY;
  324. BUG_ON(super->s_flags & LOGFS_SB_FLAG_SHUTDOWN);
  325. do_compress = logfs_inode(inode)->li_flags & LOGFS_IF_COMPRESSED;
  326. if (shadow->gc_level != 0) {
  327. /* temporarily disable compression for indirect blocks */
  328. do_compress = 0;
  329. }
  330. type = obj_type(inode, shrink_level(shadow->gc_level));
  331. len = obj_len(sb, type);
  332. buf = kmap(page);
  333. if (do_compress)
  334. ret = logfs_segment_write_compress(inode, buf, shadow, type,
  335. len);
  336. else
  337. ret = __logfs_segment_write(inode, buf, shadow, type, len,
  338. COMPR_NONE);
  339. kunmap(page);
  340. log_segment("logfs_segment_write(%llx, %llx, %x) %llx->%llx %x->%x\n",
  341. shadow->ino, shadow->bix, shadow->gc_level,
  342. shadow->old_ofs, shadow->new_ofs,
  343. shadow->old_len, shadow->new_len);
  344. /* this BUG_ON did catch a locking bug. useful */
  345. BUG_ON(!(shadow->new_ofs & (super->s_segsize - 1)));
  346. return ret;
  347. }
  348. int wbuf_read(struct super_block *sb, u64 ofs, size_t len, void *buf)
  349. {
  350. pgoff_t index = ofs >> PAGE_SHIFT;
  351. struct page *page;
  352. long offset = ofs & (PAGE_SIZE-1);
  353. long copylen;
  354. while (len) {
  355. copylen = min((ulong)len, PAGE_SIZE - offset);
  356. page = get_mapping_page(sb, index, 1);
  357. if (IS_ERR(page))
  358. return PTR_ERR(page);
  359. memcpy(buf, page_address(page) + offset, copylen);
  360. put_page(page);
  361. buf += copylen;
  362. len -= copylen;
  363. offset = 0;
  364. index++;
  365. }
  366. return 0;
  367. }
  368. /*
  369. * The "position" of indirect blocks is ambiguous. It can be the position
  370. * of any data block somewhere behind this indirect block. So we need to
  371. * normalize the positions through logfs_block_mask() before comparing.
  372. */
  373. static int check_pos(struct super_block *sb, u64 pos1, u64 pos2, level_t level)
  374. {
  375. return (pos1 & logfs_block_mask(sb, level)) !=
  376. (pos2 & logfs_block_mask(sb, level));
  377. }
  378. #if 0
  379. static int read_seg_header(struct super_block *sb, u64 ofs,
  380. struct logfs_segment_header *sh)
  381. {
  382. __be32 crc;
  383. int err;
  384. err = wbuf_read(sb, ofs, sizeof(*sh), sh);
  385. if (err)
  386. return err;
  387. crc = logfs_crc32(sh, sizeof(*sh), 4);
  388. if (crc != sh->crc) {
  389. printk(KERN_ERR"LOGFS: header crc error at %llx: expected %x, "
  390. "got %x\n", ofs, be32_to_cpu(sh->crc),
  391. be32_to_cpu(crc));
  392. return -EIO;
  393. }
  394. return 0;
  395. }
  396. #endif
  397. static int read_obj_header(struct super_block *sb, u64 ofs,
  398. struct logfs_object_header *oh)
  399. {
  400. __be32 crc;
  401. int err;
  402. err = wbuf_read(sb, ofs, sizeof(*oh), oh);
  403. if (err)
  404. return err;
  405. crc = logfs_crc32(oh, sizeof(*oh) - 4, 4);
  406. if (crc != oh->crc) {
  407. printk(KERN_ERR"LOGFS: header crc error at %llx: expected %x, "
  408. "got %x\n", ofs, be32_to_cpu(oh->crc),
  409. be32_to_cpu(crc));
  410. return -EIO;
  411. }
  412. return 0;
  413. }
  414. static void move_btree_to_page(struct inode *inode, struct page *page,
  415. __be64 *data)
  416. {
  417. struct super_block *sb = inode->i_sb;
  418. struct logfs_super *super = logfs_super(sb);
  419. struct btree_head128 *head = &super->s_object_alias_tree;
  420. struct logfs_block *block;
  421. struct object_alias_item *item, *next;
  422. if (!(super->s_flags & LOGFS_SB_FLAG_OBJ_ALIAS))
  423. return;
  424. block = btree_remove128(head, inode->i_ino, page->index);
  425. if (!block)
  426. return;
  427. log_blockmove("move_btree_to_page(%llx, %llx, %x)\n",
  428. block->ino, block->bix, block->level);
  429. list_for_each_entry_safe(item, next, &block->item_list, list) {
  430. data[item->child_no] = item->val;
  431. list_del(&item->list);
  432. mempool_free(item, super->s_alias_pool);
  433. }
  434. block->page = page;
  435. if (!PagePrivate(page)) {
  436. SetPagePrivate(page);
  437. get_page(page);
  438. set_page_private(page, (unsigned long) block);
  439. }
  440. block->ops = &indirect_block_ops;
  441. initialize_block_counters(page, block, data, 0);
  442. }
  443. /*
  444. * This silences a false, yet annoying gcc warning. I hate it when my editor
  445. * jumps into bitops.h each time I recompile this file.
  446. * TODO: Complain to gcc folks about this and upgrade compiler.
  447. */
  448. static unsigned long fnb(const unsigned long *addr,
  449. unsigned long size, unsigned long offset)
  450. {
  451. return find_next_bit(addr, size, offset);
  452. }
  453. void move_page_to_btree(struct page *page)
  454. {
  455. struct logfs_block *block = logfs_block(page);
  456. struct super_block *sb = block->sb;
  457. struct logfs_super *super = logfs_super(sb);
  458. struct object_alias_item *item;
  459. unsigned long pos;
  460. __be64 *child;
  461. int err;
  462. if (super->s_flags & LOGFS_SB_FLAG_SHUTDOWN) {
  463. block->ops->free_block(sb, block);
  464. return;
  465. }
  466. log_blockmove("move_page_to_btree(%llx, %llx, %x)\n",
  467. block->ino, block->bix, block->level);
  468. super->s_flags |= LOGFS_SB_FLAG_OBJ_ALIAS;
  469. for (pos = 0; ; pos++) {
  470. pos = fnb(block->alias_map, LOGFS_BLOCK_FACTOR, pos);
  471. if (pos >= LOGFS_BLOCK_FACTOR)
  472. break;
  473. item = mempool_alloc(super->s_alias_pool, GFP_NOFS);
  474. BUG_ON(!item); /* mempool empty */
  475. memset(item, 0, sizeof(*item));
  476. child = kmap_atomic(page);
  477. item->val = child[pos];
  478. kunmap_atomic(child);
  479. item->child_no = pos;
  480. list_add(&item->list, &block->item_list);
  481. }
  482. block->page = NULL;
  483. if (PagePrivate(page)) {
  484. ClearPagePrivate(page);
  485. put_page(page);
  486. set_page_private(page, 0);
  487. }
  488. block->ops = &btree_block_ops;
  489. err = alias_tree_insert(block->sb, block->ino, block->bix, block->level,
  490. block);
  491. BUG_ON(err); /* mempool empty */
  492. ClearPageUptodate(page);
  493. }
  494. static int __logfs_segment_read(struct inode *inode, void *buf,
  495. u64 ofs, u64 bix, level_t level)
  496. {
  497. struct super_block *sb = inode->i_sb;
  498. void *compressor_buf = logfs_super(sb)->s_compressed_je;
  499. struct logfs_object_header oh;
  500. __be32 crc;
  501. u16 len;
  502. int err, block_len;
  503. block_len = obj_len(sb, obj_type(inode, level));
  504. err = read_obj_header(sb, ofs, &oh);
  505. if (err)
  506. goto out_err;
  507. err = -EIO;
  508. if (be64_to_cpu(oh.ino) != inode->i_ino
  509. || check_pos(sb, be64_to_cpu(oh.bix), bix, level)) {
  510. printk(KERN_ERR"LOGFS: (ino, bix) don't match at %llx: "
  511. "expected (%lx, %llx), got (%llx, %llx)\n",
  512. ofs, inode->i_ino, bix,
  513. be64_to_cpu(oh.ino), be64_to_cpu(oh.bix));
  514. goto out_err;
  515. }
  516. len = be16_to_cpu(oh.len);
  517. switch (oh.compr) {
  518. case COMPR_NONE:
  519. err = wbuf_read(sb, ofs + LOGFS_OBJECT_HEADERSIZE, len, buf);
  520. if (err)
  521. goto out_err;
  522. crc = logfs_crc32(buf, len, 0);
  523. if (crc != oh.data_crc) {
  524. printk(KERN_ERR"LOGFS: uncompressed data crc error at "
  525. "%llx: expected %x, got %x\n", ofs,
  526. be32_to_cpu(oh.data_crc),
  527. be32_to_cpu(crc));
  528. goto out_err;
  529. }
  530. break;
  531. case COMPR_ZLIB:
  532. mutex_lock(&logfs_super(sb)->s_journal_mutex);
  533. err = wbuf_read(sb, ofs + LOGFS_OBJECT_HEADERSIZE, len,
  534. compressor_buf);
  535. if (err) {
  536. mutex_unlock(&logfs_super(sb)->s_journal_mutex);
  537. goto out_err;
  538. }
  539. crc = logfs_crc32(compressor_buf, len, 0);
  540. if (crc != oh.data_crc) {
  541. printk(KERN_ERR"LOGFS: compressed data crc error at "
  542. "%llx: expected %x, got %x\n", ofs,
  543. be32_to_cpu(oh.data_crc),
  544. be32_to_cpu(crc));
  545. mutex_unlock(&logfs_super(sb)->s_journal_mutex);
  546. goto out_err;
  547. }
  548. err = logfs_uncompress(compressor_buf, buf, len, block_len);
  549. mutex_unlock(&logfs_super(sb)->s_journal_mutex);
  550. if (err) {
  551. printk(KERN_ERR"LOGFS: uncompress error at %llx\n", ofs);
  552. goto out_err;
  553. }
  554. break;
  555. default:
  556. LOGFS_BUG(sb);
  557. err = -EIO;
  558. goto out_err;
  559. }
  560. return 0;
  561. out_err:
  562. logfs_set_ro(sb);
  563. printk(KERN_ERR"LOGFS: device is read-only now\n");
  564. LOGFS_BUG(sb);
  565. return err;
  566. }
  567. /**
  568. * logfs_segment_read - read data block from object store
  569. * @inode: inode containing data
  570. * @buf: data buffer
  571. * @ofs: physical data offset
  572. * @bix: block index
  573. * @level: block level
  574. *
  575. * Returns 0 on success or a negative errno.
  576. */
  577. int logfs_segment_read(struct inode *inode, struct page *page,
  578. u64 ofs, u64 bix, level_t level)
  579. {
  580. int err;
  581. void *buf;
  582. if (PageUptodate(page))
  583. return 0;
  584. ofs &= ~LOGFS_FULLY_POPULATED;
  585. buf = kmap(page);
  586. err = __logfs_segment_read(inode, buf, ofs, bix, level);
  587. if (!err) {
  588. move_btree_to_page(inode, page, buf);
  589. SetPageUptodate(page);
  590. }
  591. kunmap(page);
  592. log_segment("logfs_segment_read(%lx, %llx, %x) %llx (%d)\n",
  593. inode->i_ino, bix, level, ofs, err);
  594. return err;
  595. }
  596. int logfs_segment_delete(struct inode *inode, struct logfs_shadow *shadow)
  597. {
  598. struct super_block *sb = inode->i_sb;
  599. struct logfs_super *super = logfs_super(sb);
  600. struct logfs_object_header h;
  601. u16 len;
  602. int err;
  603. super->s_flags |= LOGFS_SB_FLAG_DIRTY;
  604. BUG_ON(super->s_flags & LOGFS_SB_FLAG_SHUTDOWN);
  605. BUG_ON(shadow->old_ofs & LOGFS_FULLY_POPULATED);
  606. if (!shadow->old_ofs)
  607. return 0;
  608. log_segment("logfs_segment_delete(%llx, %llx, %x) %llx->%llx %x->%x\n",
  609. shadow->ino, shadow->bix, shadow->gc_level,
  610. shadow->old_ofs, shadow->new_ofs,
  611. shadow->old_len, shadow->new_len);
  612. err = read_obj_header(sb, shadow->old_ofs, &h);
  613. LOGFS_BUG_ON(err, sb);
  614. LOGFS_BUG_ON(be64_to_cpu(h.ino) != inode->i_ino, sb);
  615. LOGFS_BUG_ON(check_pos(sb, shadow->bix, be64_to_cpu(h.bix),
  616. shrink_level(shadow->gc_level)), sb);
  617. if (shadow->gc_level == 0)
  618. len = be16_to_cpu(h.len);
  619. else
  620. len = obj_len(sb, h.type);
  621. shadow->old_len = len + sizeof(h);
  622. return 0;
  623. }
  624. void freeseg(struct super_block *sb, u32 segno)
  625. {
  626. struct logfs_super *super = logfs_super(sb);
  627. struct address_space *mapping = super->s_mapping_inode->i_mapping;
  628. struct page *page;
  629. u64 ofs, start, end;
  630. start = dev_ofs(sb, segno, 0);
  631. end = dev_ofs(sb, segno + 1, 0);
  632. for (ofs = start; ofs < end; ofs += PAGE_SIZE) {
  633. page = find_get_page(mapping, ofs >> PAGE_SHIFT);
  634. if (!page)
  635. continue;
  636. if (PagePrivate(page)) {
  637. ClearPagePrivate(page);
  638. put_page(page);
  639. }
  640. put_page(page);
  641. }
  642. }
  643. int logfs_open_area(struct logfs_area *area, size_t bytes)
  644. {
  645. struct super_block *sb = area->a_sb;
  646. struct logfs_super *super = logfs_super(sb);
  647. int err, closed = 0;
  648. if (area->a_is_open && area->a_used_bytes + bytes <= super->s_segsize)
  649. return 0;
  650. if (area->a_is_open) {
  651. u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes);
  652. u32 len = super->s_segsize - area->a_written_bytes;
  653. log_gc("logfs_close_area(%x)\n", area->a_segno);
  654. pad_wbuf(area, 1);
  655. super->s_devops->writeseg(area->a_sb, ofs, len);
  656. freeseg(sb, area->a_segno);
  657. closed = 1;
  658. }
  659. area->a_used_bytes = 0;
  660. area->a_written_bytes = 0;
  661. again:
  662. area->a_ops->get_free_segment(area);
  663. area->a_ops->get_erase_count(area);
  664. log_gc("logfs_open_area(%x, %x)\n", area->a_segno, area->a_level);
  665. err = area->a_ops->erase_segment(area);
  666. if (err) {
  667. printk(KERN_WARNING "LogFS: Error erasing segment %x\n",
  668. area->a_segno);
  669. logfs_mark_segment_bad(sb, area->a_segno);
  670. goto again;
  671. }
  672. area->a_is_open = 1;
  673. return closed;
  674. }
  675. void logfs_sync_area(struct logfs_area *area)
  676. {
  677. struct super_block *sb = area->a_sb;
  678. struct logfs_super *super = logfs_super(sb);
  679. u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes);
  680. u32 len = (area->a_used_bytes - area->a_written_bytes);
  681. if (super->s_writesize)
  682. len &= ~(super->s_writesize - 1);
  683. if (len == 0)
  684. return;
  685. pad_wbuf(area, 0);
  686. super->s_devops->writeseg(sb, ofs, len);
  687. area->a_written_bytes += len;
  688. }
  689. void logfs_sync_segments(struct super_block *sb)
  690. {
  691. struct logfs_super *super = logfs_super(sb);
  692. int i;
  693. for_each_area(i)
  694. logfs_sync_area(super->s_area[i]);
  695. }
  696. /*
  697. * Pick a free segment to be used for this area. Effectively takes a
  698. * candidate from the free list (not really a candidate anymore).
  699. */
  700. static void ostore_get_free_segment(struct logfs_area *area)
  701. {
  702. struct super_block *sb = area->a_sb;
  703. struct logfs_super *super = logfs_super(sb);
  704. if (super->s_free_list.count == 0) {
  705. printk(KERN_ERR"LOGFS: ran out of free segments\n");
  706. LOGFS_BUG(sb);
  707. }
  708. area->a_segno = get_best_cand(sb, &super->s_free_list, NULL);
  709. }
  710. static void ostore_get_erase_count(struct logfs_area *area)
  711. {
  712. struct logfs_segment_entry se;
  713. u32 ec_level;
  714. logfs_get_segment_entry(area->a_sb, area->a_segno, &se);
  715. BUG_ON(se.ec_level == cpu_to_be32(BADSEG) ||
  716. se.valid == cpu_to_be32(RESERVED));
  717. ec_level = be32_to_cpu(se.ec_level);
  718. area->a_erase_count = (ec_level >> 4) + 1;
  719. }
  720. static int ostore_erase_segment(struct logfs_area *area)
  721. {
  722. struct super_block *sb = area->a_sb;
  723. struct logfs_segment_header sh;
  724. u64 ofs;
  725. int err;
  726. err = logfs_erase_segment(sb, area->a_segno, 0);
  727. if (err)
  728. return err;
  729. sh.pad = 0;
  730. sh.type = SEG_OSTORE;
  731. sh.level = (__force u8)area->a_level;
  732. sh.segno = cpu_to_be32(area->a_segno);
  733. sh.ec = cpu_to_be32(area->a_erase_count);
  734. sh.gec = cpu_to_be64(logfs_super(sb)->s_gec);
  735. sh.crc = logfs_crc32(&sh, sizeof(sh), 4);
  736. logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count,
  737. area->a_level);
  738. ofs = dev_ofs(sb, area->a_segno, 0);
  739. area->a_used_bytes = sizeof(sh);
  740. logfs_buf_write(area, ofs, &sh, sizeof(sh));
  741. return 0;
  742. }
  743. static const struct logfs_area_ops ostore_area_ops = {
  744. .get_free_segment = ostore_get_free_segment,
  745. .get_erase_count = ostore_get_erase_count,
  746. .erase_segment = ostore_erase_segment,
  747. };
  748. static void free_area(struct logfs_area *area)
  749. {
  750. if (area)
  751. freeseg(area->a_sb, area->a_segno);
  752. kfree(area);
  753. }
  754. void free_areas(struct super_block *sb)
  755. {
  756. struct logfs_super *super = logfs_super(sb);
  757. int i;
  758. for_each_area(i)
  759. free_area(super->s_area[i]);
  760. free_area(super->s_journal_area);
  761. }
  762. static struct logfs_area *alloc_area(struct super_block *sb)
  763. {
  764. struct logfs_area *area;
  765. area = kzalloc(sizeof(*area), GFP_KERNEL);
  766. if (!area)
  767. return NULL;
  768. area->a_sb = sb;
  769. return area;
  770. }
  771. static void map_invalidatepage(struct page *page, unsigned int o,
  772. unsigned int l)
  773. {
  774. return;
  775. }
  776. static int map_releasepage(struct page *page, gfp_t g)
  777. {
  778. /* Don't release these pages */
  779. return 0;
  780. }
  781. static const struct address_space_operations mapping_aops = {
  782. .invalidatepage = map_invalidatepage,
  783. .releasepage = map_releasepage,
  784. .set_page_dirty = __set_page_dirty_nobuffers,
  785. };
  786. int logfs_init_mapping(struct super_block *sb)
  787. {
  788. struct logfs_super *super = logfs_super(sb);
  789. struct address_space *mapping;
  790. struct inode *inode;
  791. inode = logfs_new_meta_inode(sb, LOGFS_INO_MAPPING);
  792. if (IS_ERR(inode))
  793. return PTR_ERR(inode);
  794. super->s_mapping_inode = inode;
  795. mapping = inode->i_mapping;
  796. mapping->a_ops = &mapping_aops;
  797. /* Would it be possible to use __GFP_HIGHMEM as well? */
  798. mapping_set_gfp_mask(mapping, GFP_NOFS);
  799. return 0;
  800. }
  801. int logfs_init_areas(struct super_block *sb)
  802. {
  803. struct logfs_super *super = logfs_super(sb);
  804. int i = -1;
  805. super->s_alias_pool = mempool_create_kmalloc_pool(600,
  806. sizeof(struct object_alias_item));
  807. if (!super->s_alias_pool)
  808. return -ENOMEM;
  809. super->s_journal_area = alloc_area(sb);
  810. if (!super->s_journal_area)
  811. goto err;
  812. for_each_area(i) {
  813. super->s_area[i] = alloc_area(sb);
  814. if (!super->s_area[i])
  815. goto err;
  816. super->s_area[i]->a_level = GC_LEVEL(i);
  817. super->s_area[i]->a_ops = &ostore_area_ops;
  818. }
  819. btree_init_mempool128(&super->s_object_alias_tree,
  820. super->s_btree_pool);
  821. return 0;
  822. err:
  823. for (i--; i >= 0; i--)
  824. free_area(super->s_area[i]);
  825. free_area(super->s_journal_area);
  826. logfs_mempool_destroy(super->s_alias_pool);
  827. return -ENOMEM;
  828. }
  829. void logfs_cleanup_areas(struct super_block *sb)
  830. {
  831. struct logfs_super *super = logfs_super(sb);
  832. btree_grim_visitor128(&super->s_object_alias_tree, 0, kill_alias);
  833. }