dm-block-manager.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650
  1. /*
  2. * Copyright (C) 2011 Red Hat, Inc.
  3. *
  4. * This file is released under the GPL.
  5. */
  6. #include "dm-block-manager.h"
  7. #include "dm-persistent-data-internal.h"
  8. #include <linux/dm-bufio.h>
  9. #include <linux/crc32c.h>
  10. #include <linux/module.h>
  11. #include <linux/slab.h>
  12. #include <linux/rwsem.h>
  13. #include <linux/device-mapper.h>
  14. #include <linux/stacktrace.h>
  15. #include <linux/sched/task.h>
  16. #define DM_MSG_PREFIX "block manager"
  17. /*----------------------------------------------------------------*/
  18. #ifdef CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING
  19. /*
  20. * This is a read/write semaphore with a couple of differences.
  21. *
  22. * i) There is a restriction on the number of concurrent read locks that
  23. * may be held at once. This is just an implementation detail.
  24. *
  25. * ii) Recursive locking attempts are detected and return EINVAL. A stack
  26. * trace is also emitted for the previous lock acquisition.
  27. *
  28. * iii) Priority is given to write locks.
  29. */
  30. #define MAX_HOLDERS 4
  31. #define MAX_STACK 10
  32. struct stack_store {
  33. unsigned int nr_entries;
  34. unsigned long entries[MAX_STACK];
  35. };
  36. struct block_lock {
  37. spinlock_t lock;
  38. __s32 count;
  39. struct list_head waiters;
  40. struct task_struct *holders[MAX_HOLDERS];
  41. #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
  42. struct stack_store traces[MAX_HOLDERS];
  43. #endif
  44. };
  45. struct waiter {
  46. struct list_head list;
  47. struct task_struct *task;
  48. int wants_write;
  49. };
  50. static unsigned __find_holder(struct block_lock *lock,
  51. struct task_struct *task)
  52. {
  53. unsigned i;
  54. for (i = 0; i < MAX_HOLDERS; i++)
  55. if (lock->holders[i] == task)
  56. break;
  57. BUG_ON(i == MAX_HOLDERS);
  58. return i;
  59. }
  60. /* call this *after* you increment lock->count */
  61. static void __add_holder(struct block_lock *lock, struct task_struct *task)
  62. {
  63. unsigned h = __find_holder(lock, NULL);
  64. #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
  65. struct stack_store *t;
  66. #endif
  67. get_task_struct(task);
  68. lock->holders[h] = task;
  69. #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
  70. t = lock->traces + h;
  71. t->nr_entries = stack_trace_save(t->entries, MAX_STACK, 2);
  72. #endif
  73. }
  74. /* call this *before* you decrement lock->count */
  75. static void __del_holder(struct block_lock *lock, struct task_struct *task)
  76. {
  77. unsigned h = __find_holder(lock, task);
  78. lock->holders[h] = NULL;
  79. put_task_struct(task);
  80. }
  81. static int __check_holder(struct block_lock *lock)
  82. {
  83. unsigned i;
  84. for (i = 0; i < MAX_HOLDERS; i++) {
  85. if (lock->holders[i] == current) {
  86. DMERR("recursive lock detected in metadata");
  87. #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
  88. DMERR("previously held here:");
  89. stack_trace_print(lock->traces[i].entries,
  90. lock->traces[i].nr_entries, 4);
  91. DMERR("subsequent acquisition attempted here:");
  92. dump_stack();
  93. #endif
  94. return -EINVAL;
  95. }
  96. }
  97. return 0;
  98. }
  99. static void __wait(struct waiter *w)
  100. {
  101. for (;;) {
  102. set_current_state(TASK_UNINTERRUPTIBLE);
  103. if (!w->task)
  104. break;
  105. schedule();
  106. }
  107. set_current_state(TASK_RUNNING);
  108. }
  109. static void __wake_waiter(struct waiter *w)
  110. {
  111. struct task_struct *task;
  112. list_del(&w->list);
  113. task = w->task;
  114. smp_mb();
  115. w->task = NULL;
  116. wake_up_process(task);
  117. }
  118. /*
  119. * We either wake a few readers or a single writer.
  120. */
  121. static void __wake_many(struct block_lock *lock)
  122. {
  123. struct waiter *w, *tmp;
  124. BUG_ON(lock->count < 0);
  125. list_for_each_entry_safe(w, tmp, &lock->waiters, list) {
  126. if (lock->count >= MAX_HOLDERS)
  127. return;
  128. if (w->wants_write) {
  129. if (lock->count > 0)
  130. return; /* still read locked */
  131. lock->count = -1;
  132. __add_holder(lock, w->task);
  133. __wake_waiter(w);
  134. return;
  135. }
  136. lock->count++;
  137. __add_holder(lock, w->task);
  138. __wake_waiter(w);
  139. }
  140. }
  141. static void bl_init(struct block_lock *lock)
  142. {
  143. int i;
  144. spin_lock_init(&lock->lock);
  145. lock->count = 0;
  146. INIT_LIST_HEAD(&lock->waiters);
  147. for (i = 0; i < MAX_HOLDERS; i++)
  148. lock->holders[i] = NULL;
  149. }
  150. static int __available_for_read(struct block_lock *lock)
  151. {
  152. return lock->count >= 0 &&
  153. lock->count < MAX_HOLDERS &&
  154. list_empty(&lock->waiters);
  155. }
  156. static int bl_down_read(struct block_lock *lock)
  157. {
  158. int r;
  159. struct waiter w;
  160. spin_lock(&lock->lock);
  161. r = __check_holder(lock);
  162. if (r) {
  163. spin_unlock(&lock->lock);
  164. return r;
  165. }
  166. if (__available_for_read(lock)) {
  167. lock->count++;
  168. __add_holder(lock, current);
  169. spin_unlock(&lock->lock);
  170. return 0;
  171. }
  172. get_task_struct(current);
  173. w.task = current;
  174. w.wants_write = 0;
  175. list_add_tail(&w.list, &lock->waiters);
  176. spin_unlock(&lock->lock);
  177. __wait(&w);
  178. put_task_struct(current);
  179. return 0;
  180. }
  181. static int bl_down_read_nonblock(struct block_lock *lock)
  182. {
  183. int r;
  184. spin_lock(&lock->lock);
  185. r = __check_holder(lock);
  186. if (r)
  187. goto out;
  188. if (__available_for_read(lock)) {
  189. lock->count++;
  190. __add_holder(lock, current);
  191. r = 0;
  192. } else
  193. r = -EWOULDBLOCK;
  194. out:
  195. spin_unlock(&lock->lock);
  196. return r;
  197. }
  198. static void bl_up_read(struct block_lock *lock)
  199. {
  200. spin_lock(&lock->lock);
  201. BUG_ON(lock->count <= 0);
  202. __del_holder(lock, current);
  203. --lock->count;
  204. if (!list_empty(&lock->waiters))
  205. __wake_many(lock);
  206. spin_unlock(&lock->lock);
  207. }
  208. static int bl_down_write(struct block_lock *lock)
  209. {
  210. int r;
  211. struct waiter w;
  212. spin_lock(&lock->lock);
  213. r = __check_holder(lock);
  214. if (r) {
  215. spin_unlock(&lock->lock);
  216. return r;
  217. }
  218. if (lock->count == 0 && list_empty(&lock->waiters)) {
  219. lock->count = -1;
  220. __add_holder(lock, current);
  221. spin_unlock(&lock->lock);
  222. return 0;
  223. }
  224. get_task_struct(current);
  225. w.task = current;
  226. w.wants_write = 1;
  227. /*
  228. * Writers given priority. We know there's only one mutator in the
  229. * system, so ignoring the ordering reversal.
  230. */
  231. list_add(&w.list, &lock->waiters);
  232. spin_unlock(&lock->lock);
  233. __wait(&w);
  234. put_task_struct(current);
  235. return 0;
  236. }
  237. static void bl_up_write(struct block_lock *lock)
  238. {
  239. spin_lock(&lock->lock);
  240. __del_holder(lock, current);
  241. lock->count = 0;
  242. if (!list_empty(&lock->waiters))
  243. __wake_many(lock);
  244. spin_unlock(&lock->lock);
  245. }
  246. static void report_recursive_bug(dm_block_t b, int r)
  247. {
  248. if (r == -EINVAL)
  249. DMERR("recursive acquisition of block %llu requested.",
  250. (unsigned long long) b);
  251. }
  252. #else /* !CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING */
  253. #define bl_init(x) do { } while (0)
  254. #define bl_down_read(x) 0
  255. #define bl_down_read_nonblock(x) 0
  256. #define bl_up_read(x) do { } while (0)
  257. #define bl_down_write(x) 0
  258. #define bl_up_write(x) do { } while (0)
  259. #define report_recursive_bug(x, y) do { } while (0)
  260. #endif /* CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING */
  261. /*----------------------------------------------------------------*/
  262. /*
  263. * Block manager is currently implemented using dm-bufio. struct
  264. * dm_block_manager and struct dm_block map directly onto a couple of
  265. * structs in the bufio interface. I want to retain the freedom to move
  266. * away from bufio in the future. So these structs are just cast within
  267. * this .c file, rather than making it through to the public interface.
  268. */
  269. static struct dm_buffer *to_buffer(struct dm_block *b)
  270. {
  271. return (struct dm_buffer *) b;
  272. }
  273. dm_block_t dm_block_location(struct dm_block *b)
  274. {
  275. return dm_bufio_get_block_number(to_buffer(b));
  276. }
  277. EXPORT_SYMBOL_GPL(dm_block_location);
  278. void *dm_block_data(struct dm_block *b)
  279. {
  280. return dm_bufio_get_block_data(to_buffer(b));
  281. }
  282. EXPORT_SYMBOL_GPL(dm_block_data);
  283. struct buffer_aux {
  284. struct dm_block_validator *validator;
  285. int write_locked;
  286. #ifdef CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING
  287. struct block_lock lock;
  288. #endif
  289. };
  290. static void dm_block_manager_alloc_callback(struct dm_buffer *buf)
  291. {
  292. struct buffer_aux *aux = dm_bufio_get_aux_data(buf);
  293. aux->validator = NULL;
  294. bl_init(&aux->lock);
  295. }
  296. static void dm_block_manager_write_callback(struct dm_buffer *buf)
  297. {
  298. struct buffer_aux *aux = dm_bufio_get_aux_data(buf);
  299. if (aux->validator) {
  300. aux->validator->prepare_for_write(aux->validator, (struct dm_block *) buf,
  301. dm_bufio_get_block_size(dm_bufio_get_client(buf)));
  302. }
  303. }
  304. /*----------------------------------------------------------------
  305. * Public interface
  306. *--------------------------------------------------------------*/
  307. struct dm_block_manager {
  308. struct dm_bufio_client *bufio;
  309. bool read_only:1;
  310. };
  311. struct dm_block_manager *dm_block_manager_create(struct block_device *bdev,
  312. unsigned block_size,
  313. unsigned max_held_per_thread)
  314. {
  315. int r;
  316. struct dm_block_manager *bm;
  317. bm = kmalloc(sizeof(*bm), GFP_KERNEL);
  318. if (!bm) {
  319. r = -ENOMEM;
  320. goto bad;
  321. }
  322. bm->bufio = dm_bufio_client_create(bdev, block_size, max_held_per_thread,
  323. sizeof(struct buffer_aux),
  324. dm_block_manager_alloc_callback,
  325. dm_block_manager_write_callback);
  326. if (IS_ERR(bm->bufio)) {
  327. r = PTR_ERR(bm->bufio);
  328. kfree(bm);
  329. goto bad;
  330. }
  331. bm->read_only = false;
  332. return bm;
  333. bad:
  334. return ERR_PTR(r);
  335. }
  336. EXPORT_SYMBOL_GPL(dm_block_manager_create);
  337. void dm_block_manager_destroy(struct dm_block_manager *bm)
  338. {
  339. dm_bufio_client_destroy(bm->bufio);
  340. kfree(bm);
  341. }
  342. EXPORT_SYMBOL_GPL(dm_block_manager_destroy);
  343. unsigned dm_bm_block_size(struct dm_block_manager *bm)
  344. {
  345. return dm_bufio_get_block_size(bm->bufio);
  346. }
  347. EXPORT_SYMBOL_GPL(dm_bm_block_size);
  348. dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm)
  349. {
  350. return dm_bufio_get_device_size(bm->bufio);
  351. }
  352. static int dm_bm_validate_buffer(struct dm_block_manager *bm,
  353. struct dm_buffer *buf,
  354. struct buffer_aux *aux,
  355. struct dm_block_validator *v)
  356. {
  357. if (unlikely(!aux->validator)) {
  358. int r;
  359. if (!v)
  360. return 0;
  361. r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(bm->bufio));
  362. if (unlikely(r)) {
  363. DMERR_LIMIT("%s validator check failed for block %llu", v->name,
  364. (unsigned long long) dm_bufio_get_block_number(buf));
  365. return r;
  366. }
  367. aux->validator = v;
  368. } else {
  369. if (unlikely(aux->validator != v)) {
  370. DMERR_LIMIT("validator mismatch (old=%s vs new=%s) for block %llu",
  371. aux->validator->name, v ? v->name : "NULL",
  372. (unsigned long long) dm_bufio_get_block_number(buf));
  373. return -EINVAL;
  374. }
  375. }
  376. return 0;
  377. }
  378. int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b,
  379. struct dm_block_validator *v,
  380. struct dm_block **result)
  381. {
  382. struct buffer_aux *aux;
  383. void *p;
  384. int r;
  385. p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result);
  386. if (IS_ERR(p))
  387. return PTR_ERR(p);
  388. aux = dm_bufio_get_aux_data(to_buffer(*result));
  389. r = bl_down_read(&aux->lock);
  390. if (unlikely(r)) {
  391. dm_bufio_release(to_buffer(*result));
  392. report_recursive_bug(b, r);
  393. return r;
  394. }
  395. aux->write_locked = 0;
  396. r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v);
  397. if (unlikely(r)) {
  398. bl_up_read(&aux->lock);
  399. dm_bufio_release(to_buffer(*result));
  400. return r;
  401. }
  402. return 0;
  403. }
  404. EXPORT_SYMBOL_GPL(dm_bm_read_lock);
  405. int dm_bm_write_lock(struct dm_block_manager *bm,
  406. dm_block_t b, struct dm_block_validator *v,
  407. struct dm_block **result)
  408. {
  409. struct buffer_aux *aux;
  410. void *p;
  411. int r;
  412. if (dm_bm_is_read_only(bm))
  413. return -EPERM;
  414. p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result);
  415. if (IS_ERR(p))
  416. return PTR_ERR(p);
  417. aux = dm_bufio_get_aux_data(to_buffer(*result));
  418. r = bl_down_write(&aux->lock);
  419. if (r) {
  420. dm_bufio_release(to_buffer(*result));
  421. report_recursive_bug(b, r);
  422. return r;
  423. }
  424. aux->write_locked = 1;
  425. r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v);
  426. if (unlikely(r)) {
  427. bl_up_write(&aux->lock);
  428. dm_bufio_release(to_buffer(*result));
  429. return r;
  430. }
  431. return 0;
  432. }
  433. EXPORT_SYMBOL_GPL(dm_bm_write_lock);
  434. int dm_bm_read_try_lock(struct dm_block_manager *bm,
  435. dm_block_t b, struct dm_block_validator *v,
  436. struct dm_block **result)
  437. {
  438. struct buffer_aux *aux;
  439. void *p;
  440. int r;
  441. p = dm_bufio_get(bm->bufio, b, (struct dm_buffer **) result);
  442. if (IS_ERR(p))
  443. return PTR_ERR(p);
  444. if (unlikely(!p))
  445. return -EWOULDBLOCK;
  446. aux = dm_bufio_get_aux_data(to_buffer(*result));
  447. r = bl_down_read_nonblock(&aux->lock);
  448. if (r < 0) {
  449. dm_bufio_release(to_buffer(*result));
  450. report_recursive_bug(b, r);
  451. return r;
  452. }
  453. aux->write_locked = 0;
  454. r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v);
  455. if (unlikely(r)) {
  456. bl_up_read(&aux->lock);
  457. dm_bufio_release(to_buffer(*result));
  458. return r;
  459. }
  460. return 0;
  461. }
  462. int dm_bm_write_lock_zero(struct dm_block_manager *bm,
  463. dm_block_t b, struct dm_block_validator *v,
  464. struct dm_block **result)
  465. {
  466. int r;
  467. struct buffer_aux *aux;
  468. void *p;
  469. if (dm_bm_is_read_only(bm))
  470. return -EPERM;
  471. p = dm_bufio_new(bm->bufio, b, (struct dm_buffer **) result);
  472. if (IS_ERR(p))
  473. return PTR_ERR(p);
  474. memset(p, 0, dm_bm_block_size(bm));
  475. aux = dm_bufio_get_aux_data(to_buffer(*result));
  476. r = bl_down_write(&aux->lock);
  477. if (r) {
  478. dm_bufio_release(to_buffer(*result));
  479. return r;
  480. }
  481. aux->write_locked = 1;
  482. aux->validator = v;
  483. return 0;
  484. }
  485. EXPORT_SYMBOL_GPL(dm_bm_write_lock_zero);
  486. void dm_bm_unlock(struct dm_block *b)
  487. {
  488. struct buffer_aux *aux;
  489. aux = dm_bufio_get_aux_data(to_buffer(b));
  490. if (aux->write_locked) {
  491. dm_bufio_mark_buffer_dirty(to_buffer(b));
  492. bl_up_write(&aux->lock);
  493. } else
  494. bl_up_read(&aux->lock);
  495. dm_bufio_release(to_buffer(b));
  496. }
  497. EXPORT_SYMBOL_GPL(dm_bm_unlock);
  498. int dm_bm_flush(struct dm_block_manager *bm)
  499. {
  500. if (dm_bm_is_read_only(bm))
  501. return -EPERM;
  502. return dm_bufio_write_dirty_buffers(bm->bufio);
  503. }
  504. EXPORT_SYMBOL_GPL(dm_bm_flush);
  505. void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b)
  506. {
  507. dm_bufio_prefetch(bm->bufio, b, 1);
  508. }
  509. bool dm_bm_is_read_only(struct dm_block_manager *bm)
  510. {
  511. return (bm ? bm->read_only : true);
  512. }
  513. EXPORT_SYMBOL_GPL(dm_bm_is_read_only);
  514. void dm_bm_set_read_only(struct dm_block_manager *bm)
  515. {
  516. if (bm)
  517. bm->read_only = true;
  518. }
  519. EXPORT_SYMBOL_GPL(dm_bm_set_read_only);
  520. void dm_bm_set_read_write(struct dm_block_manager *bm)
  521. {
  522. if (bm)
  523. bm->read_only = false;
  524. }
  525. EXPORT_SYMBOL_GPL(dm_bm_set_read_write);
  526. u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor)
  527. {
  528. return crc32c(~(u32) 0, data, len) ^ init_xor;
  529. }
  530. EXPORT_SYMBOL_GPL(dm_bm_checksum);
  531. /*----------------------------------------------------------------*/
  532. MODULE_LICENSE("GPL");
  533. MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
  534. MODULE_DESCRIPTION("Immutable metadata library for dm");
  535. /*----------------------------------------------------------------*/