dm-clone-metadata.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (C) 2019 Arrikto, Inc. All Rights Reserved.
  4. */
  5. #include <linux/mm.h>
  6. #include <linux/err.h>
  7. #include <linux/slab.h>
  8. #include <linux/rwsem.h>
  9. #include <linux/bitops.h>
  10. #include <linux/bitmap.h>
  11. #include <linux/device-mapper.h>
  12. #include "persistent-data/dm-bitset.h"
  13. #include "persistent-data/dm-space-map.h"
  14. #include "persistent-data/dm-block-manager.h"
  15. #include "persistent-data/dm-transaction-manager.h"
  16. #include "dm-clone-metadata.h"
  17. #define DM_MSG_PREFIX "clone metadata"
  18. #define SUPERBLOCK_LOCATION 0
  19. #define SUPERBLOCK_MAGIC 0x8af27f64
  20. #define SUPERBLOCK_CSUM_XOR 257649492
  21. #define DM_CLONE_MAX_CONCURRENT_LOCKS 5
  22. #define UUID_LEN 16
  23. /* Min and max dm-clone metadata versions supported */
  24. #define DM_CLONE_MIN_METADATA_VERSION 1
  25. #define DM_CLONE_MAX_METADATA_VERSION 1
  26. /*
  27. * On-disk metadata layout
  28. */
  29. struct superblock_disk {
  30. __le32 csum;
  31. __le32 flags;
  32. __le64 blocknr;
  33. __u8 uuid[UUID_LEN];
  34. __le64 magic;
  35. __le32 version;
  36. __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
  37. __le64 region_size;
  38. __le64 target_size;
  39. __le64 bitset_root;
  40. } __packed;
  41. /*
  42. * Region and Dirty bitmaps.
  43. *
  44. * dm-clone logically splits the source and destination devices in regions of
  45. * fixed size. The destination device's regions are gradually hydrated, i.e.,
  46. * we copy (clone) the source's regions to the destination device. Eventually,
  47. * all regions will get hydrated and all I/O will be served from the
  48. * destination device.
  49. *
  50. * We maintain an on-disk bitmap which tracks the state of each of the
  51. * destination device's regions, i.e., whether they are hydrated or not.
  52. *
  53. * To save constantly doing look ups on disk we keep an in core copy of the
  54. * on-disk bitmap, the region_map.
  55. *
  56. * In order to track which regions are hydrated during a metadata transaction,
  57. * we use a second set of bitmaps, the dmap (dirty bitmap), which includes two
  58. * bitmaps, namely dirty_regions and dirty_words. The dirty_regions bitmap
  59. * tracks the regions that got hydrated during the current metadata
  60. * transaction. The dirty_words bitmap tracks the dirty words, i.e. longs, of
  61. * the dirty_regions bitmap.
  62. *
  63. * This allows us to precisely track the regions that were hydrated during the
  64. * current metadata transaction and update the metadata accordingly, when we
  65. * commit the current transaction. This is important because dm-clone should
  66. * only commit the metadata of regions that were properly flushed to the
  67. * destination device beforehand. Otherwise, in case of a crash, we could end
  68. * up with a corrupted dm-clone device.
  69. *
  70. * When a region finishes hydrating dm-clone calls
  71. * dm_clone_set_region_hydrated(), or for discard requests
  72. * dm_clone_cond_set_range(), which sets the corresponding bits in region_map
  73. * and dmap.
  74. *
  75. * During a metadata commit we scan dmap->dirty_words and dmap->dirty_regions
  76. * and update the on-disk metadata accordingly. Thus, we don't have to flush to
  77. * disk the whole region_map. We can just flush the dirty region_map bits.
  78. *
  79. * We use the helper dmap->dirty_words bitmap, which is smaller than the
  80. * original region_map, to reduce the amount of memory accesses during a
  81. * metadata commit. Moreover, as dm-bitset also accesses the on-disk bitmap in
  82. * 64-bit word granularity, the dirty_words bitmap helps us avoid useless disk
  83. * accesses.
  84. *
  85. * We could update directly the on-disk bitmap, when dm-clone calls either
  86. * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), buts this
  87. * inserts significant metadata I/O overhead in dm-clone's I/O path. Also, as
  88. * these two functions don't block, we can call them in interrupt context,
  89. * e.g., in a hooked overwrite bio's completion routine, and further reduce the
  90. * I/O completion latency.
  91. *
  92. * We maintain two dirty bitmap sets. During a metadata commit we atomically
  93. * swap the currently used dmap with the unused one. This allows the metadata
  94. * update functions to run concurrently with an ongoing commit.
  95. */
  96. struct dirty_map {
  97. unsigned long *dirty_words;
  98. unsigned long *dirty_regions;
  99. unsigned int changed;
  100. };
  101. struct dm_clone_metadata {
  102. /* The metadata block device */
  103. struct block_device *bdev;
  104. sector_t target_size;
  105. sector_t region_size;
  106. unsigned long nr_regions;
  107. unsigned long nr_words;
  108. /* Spinlock protecting the region and dirty bitmaps. */
  109. spinlock_t bitmap_lock;
  110. struct dirty_map dmap[2];
  111. struct dirty_map *current_dmap;
  112. /* Protected by lock */
  113. struct dirty_map *committing_dmap;
  114. /*
  115. * In core copy of the on-disk bitmap to save constantly doing look ups
  116. * on disk.
  117. */
  118. unsigned long *region_map;
  119. /* Protected by bitmap_lock */
  120. unsigned int read_only;
  121. struct dm_block_manager *bm;
  122. struct dm_space_map *sm;
  123. struct dm_transaction_manager *tm;
  124. struct rw_semaphore lock;
  125. struct dm_disk_bitset bitset_info;
  126. dm_block_t bitset_root;
  127. /*
  128. * Reading the space map root can fail, so we read it into this
  129. * buffer before the superblock is locked and updated.
  130. */
  131. __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
  132. bool hydration_done:1;
  133. bool fail_io:1;
  134. };
  135. /*---------------------------------------------------------------------------*/
  136. /*
  137. * Superblock validation.
  138. */
  139. static void sb_prepare_for_write(struct dm_block_validator *v,
  140. struct dm_block *b, size_t sb_block_size)
  141. {
  142. struct superblock_disk *sb;
  143. u32 csum;
  144. sb = dm_block_data(b);
  145. sb->blocknr = cpu_to_le64(dm_block_location(b));
  146. csum = dm_bm_checksum(&sb->flags, sb_block_size - sizeof(__le32),
  147. SUPERBLOCK_CSUM_XOR);
  148. sb->csum = cpu_to_le32(csum);
  149. }
  150. static int sb_check(struct dm_block_validator *v, struct dm_block *b,
  151. size_t sb_block_size)
  152. {
  153. struct superblock_disk *sb;
  154. u32 csum, metadata_version;
  155. sb = dm_block_data(b);
  156. if (dm_block_location(b) != le64_to_cpu(sb->blocknr)) {
  157. DMERR("Superblock check failed: blocknr %llu, expected %llu",
  158. le64_to_cpu(sb->blocknr),
  159. (unsigned long long)dm_block_location(b));
  160. return -ENOTBLK;
  161. }
  162. if (le64_to_cpu(sb->magic) != SUPERBLOCK_MAGIC) {
  163. DMERR("Superblock check failed: magic %llu, expected %llu",
  164. le64_to_cpu(sb->magic),
  165. (unsigned long long)SUPERBLOCK_MAGIC);
  166. return -EILSEQ;
  167. }
  168. csum = dm_bm_checksum(&sb->flags, sb_block_size - sizeof(__le32),
  169. SUPERBLOCK_CSUM_XOR);
  170. if (sb->csum != cpu_to_le32(csum)) {
  171. DMERR("Superblock check failed: checksum %u, expected %u",
  172. csum, le32_to_cpu(sb->csum));
  173. return -EILSEQ;
  174. }
  175. /* Check metadata version */
  176. metadata_version = le32_to_cpu(sb->version);
  177. if (metadata_version < DM_CLONE_MIN_METADATA_VERSION ||
  178. metadata_version > DM_CLONE_MAX_METADATA_VERSION) {
  179. DMERR("Clone metadata version %u found, but only versions between %u and %u supported.",
  180. metadata_version, DM_CLONE_MIN_METADATA_VERSION,
  181. DM_CLONE_MAX_METADATA_VERSION);
  182. return -EINVAL;
  183. }
  184. return 0;
  185. }
  186. static struct dm_block_validator sb_validator = {
  187. .name = "superblock",
  188. .prepare_for_write = sb_prepare_for_write,
  189. .check = sb_check
  190. };
  191. /*
  192. * Check if the superblock is formatted or not. We consider the superblock to
  193. * be formatted in case we find non-zero bytes in it.
  194. */
  195. static int __superblock_all_zeroes(struct dm_block_manager *bm, bool *formatted)
  196. {
  197. int r;
  198. unsigned int i, nr_words;
  199. struct dm_block *sblock;
  200. __le64 *data_le, zero = cpu_to_le64(0);
  201. /*
  202. * We don't use a validator here because the superblock could be all
  203. * zeroes.
  204. */
  205. r = dm_bm_read_lock(bm, SUPERBLOCK_LOCATION, NULL, &sblock);
  206. if (r) {
  207. DMERR("Failed to read_lock superblock");
  208. return r;
  209. }
  210. data_le = dm_block_data(sblock);
  211. *formatted = false;
  212. /* This assumes that the block size is a multiple of 8 bytes */
  213. BUG_ON(dm_bm_block_size(bm) % sizeof(__le64));
  214. nr_words = dm_bm_block_size(bm) / sizeof(__le64);
  215. for (i = 0; i < nr_words; i++) {
  216. if (data_le[i] != zero) {
  217. *formatted = true;
  218. break;
  219. }
  220. }
  221. dm_bm_unlock(sblock);
  222. return 0;
  223. }
  224. /*---------------------------------------------------------------------------*/
  225. /*
  226. * Low-level metadata handling.
  227. */
  228. static inline int superblock_read_lock(struct dm_clone_metadata *cmd,
  229. struct dm_block **sblock)
  230. {
  231. return dm_bm_read_lock(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock);
  232. }
  233. static inline int superblock_write_lock(struct dm_clone_metadata *cmd,
  234. struct dm_block **sblock)
  235. {
  236. return dm_bm_write_lock(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock);
  237. }
  238. static inline int superblock_write_lock_zero(struct dm_clone_metadata *cmd,
  239. struct dm_block **sblock)
  240. {
  241. return dm_bm_write_lock_zero(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock);
  242. }
  243. static int __copy_sm_root(struct dm_clone_metadata *cmd)
  244. {
  245. int r;
  246. size_t root_size;
  247. r = dm_sm_root_size(cmd->sm, &root_size);
  248. if (r)
  249. return r;
  250. return dm_sm_copy_root(cmd->sm, &cmd->metadata_space_map_root, root_size);
  251. }
  252. /* Save dm-clone metadata in superblock */
  253. static void __prepare_superblock(struct dm_clone_metadata *cmd,
  254. struct superblock_disk *sb)
  255. {
  256. sb->flags = cpu_to_le32(0UL);
  257. /* FIXME: UUID is currently unused */
  258. memset(sb->uuid, 0, sizeof(sb->uuid));
  259. sb->magic = cpu_to_le64(SUPERBLOCK_MAGIC);
  260. sb->version = cpu_to_le32(DM_CLONE_MAX_METADATA_VERSION);
  261. /* Save the metadata space_map root */
  262. memcpy(&sb->metadata_space_map_root, &cmd->metadata_space_map_root,
  263. sizeof(cmd->metadata_space_map_root));
  264. sb->region_size = cpu_to_le64(cmd->region_size);
  265. sb->target_size = cpu_to_le64(cmd->target_size);
  266. sb->bitset_root = cpu_to_le64(cmd->bitset_root);
  267. }
  268. static int __open_metadata(struct dm_clone_metadata *cmd)
  269. {
  270. int r;
  271. struct dm_block *sblock;
  272. struct superblock_disk *sb;
  273. r = superblock_read_lock(cmd, &sblock);
  274. if (r) {
  275. DMERR("Failed to read_lock superblock");
  276. return r;
  277. }
  278. sb = dm_block_data(sblock);
  279. /* Verify that target_size and region_size haven't changed. */
  280. if (cmd->region_size != le64_to_cpu(sb->region_size) ||
  281. cmd->target_size != le64_to_cpu(sb->target_size)) {
  282. DMERR("Region and/or target size don't match the ones in metadata");
  283. r = -EINVAL;
  284. goto out_with_lock;
  285. }
  286. r = dm_tm_open_with_sm(cmd->bm, SUPERBLOCK_LOCATION,
  287. sb->metadata_space_map_root,
  288. sizeof(sb->metadata_space_map_root),
  289. &cmd->tm, &cmd->sm);
  290. if (r) {
  291. DMERR("dm_tm_open_with_sm failed");
  292. goto out_with_lock;
  293. }
  294. dm_disk_bitset_init(cmd->tm, &cmd->bitset_info);
  295. cmd->bitset_root = le64_to_cpu(sb->bitset_root);
  296. out_with_lock:
  297. dm_bm_unlock(sblock);
  298. return r;
  299. }
  300. static int __format_metadata(struct dm_clone_metadata *cmd)
  301. {
  302. int r;
  303. struct dm_block *sblock;
  304. struct superblock_disk *sb;
  305. r = dm_tm_create_with_sm(cmd->bm, SUPERBLOCK_LOCATION, &cmd->tm, &cmd->sm);
  306. if (r) {
  307. DMERR("Failed to create transaction manager");
  308. return r;
  309. }
  310. dm_disk_bitset_init(cmd->tm, &cmd->bitset_info);
  311. r = dm_bitset_empty(&cmd->bitset_info, &cmd->bitset_root);
  312. if (r) {
  313. DMERR("Failed to create empty on-disk bitset");
  314. goto err_with_tm;
  315. }
  316. r = dm_bitset_resize(&cmd->bitset_info, cmd->bitset_root, 0,
  317. cmd->nr_regions, false, &cmd->bitset_root);
  318. if (r) {
  319. DMERR("Failed to resize on-disk bitset to %lu entries", cmd->nr_regions);
  320. goto err_with_tm;
  321. }
  322. /* Flush to disk all blocks, except the superblock */
  323. r = dm_tm_pre_commit(cmd->tm);
  324. if (r) {
  325. DMERR("dm_tm_pre_commit failed");
  326. goto err_with_tm;
  327. }
  328. r = __copy_sm_root(cmd);
  329. if (r) {
  330. DMERR("__copy_sm_root failed");
  331. goto err_with_tm;
  332. }
  333. r = superblock_write_lock_zero(cmd, &sblock);
  334. if (r) {
  335. DMERR("Failed to write_lock superblock");
  336. goto err_with_tm;
  337. }
  338. sb = dm_block_data(sblock);
  339. __prepare_superblock(cmd, sb);
  340. r = dm_tm_commit(cmd->tm, sblock);
  341. if (r) {
  342. DMERR("Failed to commit superblock");
  343. goto err_with_tm;
  344. }
  345. return 0;
  346. err_with_tm:
  347. dm_sm_destroy(cmd->sm);
  348. dm_tm_destroy(cmd->tm);
  349. return r;
  350. }
  351. static int __open_or_format_metadata(struct dm_clone_metadata *cmd, bool may_format_device)
  352. {
  353. int r;
  354. bool formatted = false;
  355. r = __superblock_all_zeroes(cmd->bm, &formatted);
  356. if (r)
  357. return r;
  358. if (!formatted)
  359. return may_format_device ? __format_metadata(cmd) : -EPERM;
  360. return __open_metadata(cmd);
  361. }
  362. static int __create_persistent_data_structures(struct dm_clone_metadata *cmd,
  363. bool may_format_device)
  364. {
  365. int r;
  366. /* Create block manager */
  367. cmd->bm = dm_block_manager_create(cmd->bdev,
  368. DM_CLONE_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
  369. DM_CLONE_MAX_CONCURRENT_LOCKS);
  370. if (IS_ERR(cmd->bm)) {
  371. DMERR("Failed to create block manager");
  372. return PTR_ERR(cmd->bm);
  373. }
  374. r = __open_or_format_metadata(cmd, may_format_device);
  375. if (r)
  376. dm_block_manager_destroy(cmd->bm);
  377. return r;
  378. }
  379. static void __destroy_persistent_data_structures(struct dm_clone_metadata *cmd)
  380. {
  381. dm_sm_destroy(cmd->sm);
  382. dm_tm_destroy(cmd->tm);
  383. dm_block_manager_destroy(cmd->bm);
  384. }
  385. /*---------------------------------------------------------------------------*/
  386. static size_t bitmap_size(unsigned long nr_bits)
  387. {
  388. return BITS_TO_LONGS(nr_bits) * sizeof(long);
  389. }
  390. static int __dirty_map_init(struct dirty_map *dmap, unsigned long nr_words,
  391. unsigned long nr_regions)
  392. {
  393. dmap->changed = 0;
  394. dmap->dirty_words = kvzalloc(bitmap_size(nr_words), GFP_KERNEL);
  395. if (!dmap->dirty_words)
  396. return -ENOMEM;
  397. dmap->dirty_regions = kvzalloc(bitmap_size(nr_regions), GFP_KERNEL);
  398. if (!dmap->dirty_regions) {
  399. kvfree(dmap->dirty_words);
  400. return -ENOMEM;
  401. }
  402. return 0;
  403. }
  404. static void __dirty_map_exit(struct dirty_map *dmap)
  405. {
  406. kvfree(dmap->dirty_words);
  407. kvfree(dmap->dirty_regions);
  408. }
  409. static int dirty_map_init(struct dm_clone_metadata *cmd)
  410. {
  411. if (__dirty_map_init(&cmd->dmap[0], cmd->nr_words, cmd->nr_regions)) {
  412. DMERR("Failed to allocate dirty bitmap");
  413. return -ENOMEM;
  414. }
  415. if (__dirty_map_init(&cmd->dmap[1], cmd->nr_words, cmd->nr_regions)) {
  416. DMERR("Failed to allocate dirty bitmap");
  417. __dirty_map_exit(&cmd->dmap[0]);
  418. return -ENOMEM;
  419. }
  420. cmd->current_dmap = &cmd->dmap[0];
  421. cmd->committing_dmap = NULL;
  422. return 0;
  423. }
  424. static void dirty_map_exit(struct dm_clone_metadata *cmd)
  425. {
  426. __dirty_map_exit(&cmd->dmap[0]);
  427. __dirty_map_exit(&cmd->dmap[1]);
  428. }
  429. static int __load_bitset_in_core(struct dm_clone_metadata *cmd)
  430. {
  431. int r;
  432. unsigned long i;
  433. struct dm_bitset_cursor c;
  434. /* Flush bitset cache */
  435. r = dm_bitset_flush(&cmd->bitset_info, cmd->bitset_root, &cmd->bitset_root);
  436. if (r)
  437. return r;
  438. r = dm_bitset_cursor_begin(&cmd->bitset_info, cmd->bitset_root, cmd->nr_regions, &c);
  439. if (r)
  440. return r;
  441. for (i = 0; ; i++) {
  442. if (dm_bitset_cursor_get_value(&c))
  443. __set_bit(i, cmd->region_map);
  444. else
  445. __clear_bit(i, cmd->region_map);
  446. if (i >= (cmd->nr_regions - 1))
  447. break;
  448. r = dm_bitset_cursor_next(&c);
  449. if (r)
  450. break;
  451. }
  452. dm_bitset_cursor_end(&c);
  453. return r;
  454. }
  455. struct dm_clone_metadata *dm_clone_metadata_open(struct block_device *bdev,
  456. sector_t target_size,
  457. sector_t region_size)
  458. {
  459. int r;
  460. struct dm_clone_metadata *cmd;
  461. cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
  462. if (!cmd) {
  463. DMERR("Failed to allocate memory for dm-clone metadata");
  464. return ERR_PTR(-ENOMEM);
  465. }
  466. cmd->bdev = bdev;
  467. cmd->target_size = target_size;
  468. cmd->region_size = region_size;
  469. cmd->nr_regions = dm_sector_div_up(cmd->target_size, cmd->region_size);
  470. cmd->nr_words = BITS_TO_LONGS(cmd->nr_regions);
  471. init_rwsem(&cmd->lock);
  472. spin_lock_init(&cmd->bitmap_lock);
  473. cmd->read_only = 0;
  474. cmd->fail_io = false;
  475. cmd->hydration_done = false;
  476. cmd->region_map = kvmalloc(bitmap_size(cmd->nr_regions), GFP_KERNEL);
  477. if (!cmd->region_map) {
  478. DMERR("Failed to allocate memory for region bitmap");
  479. r = -ENOMEM;
  480. goto out_with_md;
  481. }
  482. r = __create_persistent_data_structures(cmd, true);
  483. if (r)
  484. goto out_with_region_map;
  485. r = __load_bitset_in_core(cmd);
  486. if (r) {
  487. DMERR("Failed to load on-disk region map");
  488. goto out_with_pds;
  489. }
  490. r = dirty_map_init(cmd);
  491. if (r)
  492. goto out_with_pds;
  493. if (bitmap_full(cmd->region_map, cmd->nr_regions))
  494. cmd->hydration_done = true;
  495. return cmd;
  496. out_with_pds:
  497. __destroy_persistent_data_structures(cmd);
  498. out_with_region_map:
  499. kvfree(cmd->region_map);
  500. out_with_md:
  501. kfree(cmd);
  502. return ERR_PTR(r);
  503. }
  504. void dm_clone_metadata_close(struct dm_clone_metadata *cmd)
  505. {
  506. if (!cmd->fail_io)
  507. __destroy_persistent_data_structures(cmd);
  508. dirty_map_exit(cmd);
  509. kvfree(cmd->region_map);
  510. kfree(cmd);
  511. }
  512. bool dm_clone_is_hydration_done(struct dm_clone_metadata *cmd)
  513. {
  514. return cmd->hydration_done;
  515. }
  516. bool dm_clone_is_region_hydrated(struct dm_clone_metadata *cmd, unsigned long region_nr)
  517. {
  518. return dm_clone_is_hydration_done(cmd) || test_bit(region_nr, cmd->region_map);
  519. }
  520. bool dm_clone_is_range_hydrated(struct dm_clone_metadata *cmd,
  521. unsigned long start, unsigned long nr_regions)
  522. {
  523. unsigned long bit;
  524. if (dm_clone_is_hydration_done(cmd))
  525. return true;
  526. bit = find_next_zero_bit(cmd->region_map, cmd->nr_regions, start);
  527. return (bit >= (start + nr_regions));
  528. }
  529. unsigned int dm_clone_nr_of_hydrated_regions(struct dm_clone_metadata *cmd)
  530. {
  531. return bitmap_weight(cmd->region_map, cmd->nr_regions);
  532. }
  533. unsigned long dm_clone_find_next_unhydrated_region(struct dm_clone_metadata *cmd,
  534. unsigned long start)
  535. {
  536. return find_next_zero_bit(cmd->region_map, cmd->nr_regions, start);
  537. }
  538. static int __update_metadata_word(struct dm_clone_metadata *cmd,
  539. unsigned long *dirty_regions,
  540. unsigned long word)
  541. {
  542. int r;
  543. unsigned long index = word * BITS_PER_LONG;
  544. unsigned long max_index = min(cmd->nr_regions, (word + 1) * BITS_PER_LONG);
  545. while (index < max_index) {
  546. if (test_bit(index, dirty_regions)) {
  547. r = dm_bitset_set_bit(&cmd->bitset_info, cmd->bitset_root,
  548. index, &cmd->bitset_root);
  549. if (r) {
  550. DMERR("dm_bitset_set_bit failed");
  551. return r;
  552. }
  553. __clear_bit(index, dirty_regions);
  554. }
  555. index++;
  556. }
  557. return 0;
  558. }
  559. static int __metadata_commit(struct dm_clone_metadata *cmd)
  560. {
  561. int r;
  562. struct dm_block *sblock;
  563. struct superblock_disk *sb;
  564. /* Flush bitset cache */
  565. r = dm_bitset_flush(&cmd->bitset_info, cmd->bitset_root, &cmd->bitset_root);
  566. if (r) {
  567. DMERR("dm_bitset_flush failed");
  568. return r;
  569. }
  570. /* Flush to disk all blocks, except the superblock */
  571. r = dm_tm_pre_commit(cmd->tm);
  572. if (r) {
  573. DMERR("dm_tm_pre_commit failed");
  574. return r;
  575. }
  576. /* Save the space map root in cmd->metadata_space_map_root */
  577. r = __copy_sm_root(cmd);
  578. if (r) {
  579. DMERR("__copy_sm_root failed");
  580. return r;
  581. }
  582. /* Lock the superblock */
  583. r = superblock_write_lock_zero(cmd, &sblock);
  584. if (r) {
  585. DMERR("Failed to write_lock superblock");
  586. return r;
  587. }
  588. /* Save the metadata in superblock */
  589. sb = dm_block_data(sblock);
  590. __prepare_superblock(cmd, sb);
  591. /* Unlock superblock and commit it to disk */
  592. r = dm_tm_commit(cmd->tm, sblock);
  593. if (r) {
  594. DMERR("Failed to commit superblock");
  595. return r;
  596. }
  597. /*
  598. * FIXME: Find a more efficient way to check if the hydration is done.
  599. */
  600. if (bitmap_full(cmd->region_map, cmd->nr_regions))
  601. cmd->hydration_done = true;
  602. return 0;
  603. }
  604. static int __flush_dmap(struct dm_clone_metadata *cmd, struct dirty_map *dmap)
  605. {
  606. int r;
  607. unsigned long word;
  608. word = 0;
  609. do {
  610. word = find_next_bit(dmap->dirty_words, cmd->nr_words, word);
  611. if (word == cmd->nr_words)
  612. break;
  613. r = __update_metadata_word(cmd, dmap->dirty_regions, word);
  614. if (r)
  615. return r;
  616. __clear_bit(word, dmap->dirty_words);
  617. word++;
  618. } while (word < cmd->nr_words);
  619. r = __metadata_commit(cmd);
  620. if (r)
  621. return r;
  622. /* Update the changed flag */
  623. spin_lock_irq(&cmd->bitmap_lock);
  624. dmap->changed = 0;
  625. spin_unlock_irq(&cmd->bitmap_lock);
  626. return 0;
  627. }
  628. int dm_clone_metadata_pre_commit(struct dm_clone_metadata *cmd)
  629. {
  630. int r = 0;
  631. struct dirty_map *dmap, *next_dmap;
  632. down_write(&cmd->lock);
  633. if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) {
  634. r = -EPERM;
  635. goto out;
  636. }
  637. /* Get current dirty bitmap */
  638. dmap = cmd->current_dmap;
  639. /* Get next dirty bitmap */
  640. next_dmap = (dmap == &cmd->dmap[0]) ? &cmd->dmap[1] : &cmd->dmap[0];
  641. /*
  642. * The last commit failed, so we don't have a clean dirty-bitmap to
  643. * use.
  644. */
  645. if (WARN_ON(next_dmap->changed || cmd->committing_dmap)) {
  646. r = -EINVAL;
  647. goto out;
  648. }
  649. /* Swap dirty bitmaps */
  650. spin_lock_irq(&cmd->bitmap_lock);
  651. cmd->current_dmap = next_dmap;
  652. spin_unlock_irq(&cmd->bitmap_lock);
  653. /* Set old dirty bitmap as currently committing */
  654. cmd->committing_dmap = dmap;
  655. out:
  656. up_write(&cmd->lock);
  657. return r;
  658. }
  659. int dm_clone_metadata_commit(struct dm_clone_metadata *cmd)
  660. {
  661. int r = -EPERM;
  662. down_write(&cmd->lock);
  663. if (cmd->fail_io || dm_bm_is_read_only(cmd->bm))
  664. goto out;
  665. if (WARN_ON(!cmd->committing_dmap)) {
  666. r = -EINVAL;
  667. goto out;
  668. }
  669. r = __flush_dmap(cmd, cmd->committing_dmap);
  670. if (!r) {
  671. /* Clear committing dmap */
  672. cmd->committing_dmap = NULL;
  673. }
  674. out:
  675. up_write(&cmd->lock);
  676. return r;
  677. }
  678. int dm_clone_set_region_hydrated(struct dm_clone_metadata *cmd, unsigned long region_nr)
  679. {
  680. int r = 0;
  681. struct dirty_map *dmap;
  682. unsigned long word, flags;
  683. if (unlikely(region_nr >= cmd->nr_regions)) {
  684. DMERR("Region %lu out of range (total number of regions %lu)",
  685. region_nr, cmd->nr_regions);
  686. return -ERANGE;
  687. }
  688. word = region_nr / BITS_PER_LONG;
  689. spin_lock_irqsave(&cmd->bitmap_lock, flags);
  690. if (cmd->read_only) {
  691. r = -EPERM;
  692. goto out;
  693. }
  694. dmap = cmd->current_dmap;
  695. __set_bit(word, dmap->dirty_words);
  696. __set_bit(region_nr, dmap->dirty_regions);
  697. __set_bit(region_nr, cmd->region_map);
  698. dmap->changed = 1;
  699. out:
  700. spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
  701. return r;
  702. }
  703. int dm_clone_cond_set_range(struct dm_clone_metadata *cmd, unsigned long start,
  704. unsigned long nr_regions)
  705. {
  706. int r = 0;
  707. struct dirty_map *dmap;
  708. unsigned long word, region_nr;
  709. if (unlikely(start >= cmd->nr_regions || (start + nr_regions) < start ||
  710. (start + nr_regions) > cmd->nr_regions)) {
  711. DMERR("Invalid region range: start %lu, nr_regions %lu (total number of regions %lu)",
  712. start, nr_regions, cmd->nr_regions);
  713. return -ERANGE;
  714. }
  715. spin_lock_irq(&cmd->bitmap_lock);
  716. if (cmd->read_only) {
  717. r = -EPERM;
  718. goto out;
  719. }
  720. dmap = cmd->current_dmap;
  721. for (region_nr = start; region_nr < (start + nr_regions); region_nr++) {
  722. if (!test_bit(region_nr, cmd->region_map)) {
  723. word = region_nr / BITS_PER_LONG;
  724. __set_bit(word, dmap->dirty_words);
  725. __set_bit(region_nr, dmap->dirty_regions);
  726. __set_bit(region_nr, cmd->region_map);
  727. dmap->changed = 1;
  728. }
  729. }
  730. out:
  731. spin_unlock_irq(&cmd->bitmap_lock);
  732. return r;
  733. }
  734. /*
  735. * WARNING: This must not be called concurrently with either
  736. * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), as it changes
  737. * cmd->region_map without taking the cmd->bitmap_lock spinlock. The only
  738. * exception is after setting the metadata to read-only mode, using
  739. * dm_clone_metadata_set_read_only().
  740. *
  741. * We don't take the spinlock because __load_bitset_in_core() does I/O, so it
  742. * may block.
  743. */
  744. int dm_clone_reload_in_core_bitset(struct dm_clone_metadata *cmd)
  745. {
  746. int r = -EINVAL;
  747. down_write(&cmd->lock);
  748. if (cmd->fail_io)
  749. goto out;
  750. r = __load_bitset_in_core(cmd);
  751. out:
  752. up_write(&cmd->lock);
  753. return r;
  754. }
  755. bool dm_clone_changed_this_transaction(struct dm_clone_metadata *cmd)
  756. {
  757. bool r;
  758. unsigned long flags;
  759. spin_lock_irqsave(&cmd->bitmap_lock, flags);
  760. r = cmd->dmap[0].changed || cmd->dmap[1].changed;
  761. spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
  762. return r;
  763. }
  764. int dm_clone_metadata_abort(struct dm_clone_metadata *cmd)
  765. {
  766. int r = -EPERM;
  767. down_write(&cmd->lock);
  768. if (cmd->fail_io || dm_bm_is_read_only(cmd->bm))
  769. goto out;
  770. __destroy_persistent_data_structures(cmd);
  771. r = __create_persistent_data_structures(cmd, false);
  772. if (r) {
  773. /* If something went wrong we can neither write nor read the metadata */
  774. cmd->fail_io = true;
  775. }
  776. out:
  777. up_write(&cmd->lock);
  778. return r;
  779. }
  780. void dm_clone_metadata_set_read_only(struct dm_clone_metadata *cmd)
  781. {
  782. down_write(&cmd->lock);
  783. spin_lock_irq(&cmd->bitmap_lock);
  784. cmd->read_only = 1;
  785. spin_unlock_irq(&cmd->bitmap_lock);
  786. if (!cmd->fail_io)
  787. dm_bm_set_read_only(cmd->bm);
  788. up_write(&cmd->lock);
  789. }
  790. void dm_clone_metadata_set_read_write(struct dm_clone_metadata *cmd)
  791. {
  792. down_write(&cmd->lock);
  793. spin_lock_irq(&cmd->bitmap_lock);
  794. cmd->read_only = 0;
  795. spin_unlock_irq(&cmd->bitmap_lock);
  796. if (!cmd->fail_io)
  797. dm_bm_set_read_write(cmd->bm);
  798. up_write(&cmd->lock);
  799. }
  800. int dm_clone_get_free_metadata_block_count(struct dm_clone_metadata *cmd,
  801. dm_block_t *result)
  802. {
  803. int r = -EINVAL;
  804. down_read(&cmd->lock);
  805. if (!cmd->fail_io)
  806. r = dm_sm_get_nr_free(cmd->sm, result);
  807. up_read(&cmd->lock);
  808. return r;
  809. }
  810. int dm_clone_get_metadata_dev_size(struct dm_clone_metadata *cmd,
  811. dm_block_t *result)
  812. {
  813. int r = -EINVAL;
  814. down_read(&cmd->lock);
  815. if (!cmd->fail_io)
  816. r = dm_sm_get_nr_blocks(cmd->sm, result);
  817. up_read(&cmd->lock);
  818. return r;
  819. }