dm-block-manager.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649
  1. /*
  2. * Copyright (C) 2011 Red Hat, Inc.
  3. *
  4. * This file is released under the GPL.
  5. */
  6. #include "dm-block-manager.h"
  7. #include "dm-persistent-data-internal.h"
  8. #include "../dm-bufio.h"
  9. #include <linux/crc32c.h>
  10. #include <linux/module.h>
  11. #include <linux/slab.h>
  12. #include <linux/rwsem.h>
  13. #include <linux/device-mapper.h>
  14. #include <linux/stacktrace.h>
  15. #include <linux/sched/task.h>
  16. #define DM_MSG_PREFIX "block manager"
  17. /*----------------------------------------------------------------*/
  18. #ifdef CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING
  19. /*
  20. * This is a read/write semaphore with a couple of differences.
  21. *
  22. * i) There is a restriction on the number of concurrent read locks that
  23. * may be held at once. This is just an implementation detail.
  24. *
  25. * ii) Recursive locking attempts are detected and return EINVAL. A stack
  26. * trace is also emitted for the previous lock acquisition.
  27. *
  28. * iii) Priority is given to write locks.
  29. */
  30. #define MAX_HOLDERS 4
  31. #define MAX_STACK 10
  32. typedef unsigned long stack_entries[MAX_STACK];
  33. struct block_lock {
  34. spinlock_t lock;
  35. __s32 count;
  36. struct list_head waiters;
  37. struct task_struct *holders[MAX_HOLDERS];
  38. #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
  39. struct stack_trace traces[MAX_HOLDERS];
  40. stack_entries entries[MAX_HOLDERS];
  41. #endif
  42. };
  43. struct waiter {
  44. struct list_head list;
  45. struct task_struct *task;
  46. int wants_write;
  47. };
  48. static unsigned __find_holder(struct block_lock *lock,
  49. struct task_struct *task)
  50. {
  51. unsigned i;
  52. for (i = 0; i < MAX_HOLDERS; i++)
  53. if (lock->holders[i] == task)
  54. break;
  55. BUG_ON(i == MAX_HOLDERS);
  56. return i;
  57. }
  58. /* call this *after* you increment lock->count */
  59. static void __add_holder(struct block_lock *lock, struct task_struct *task)
  60. {
  61. unsigned h = __find_holder(lock, NULL);
  62. #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
  63. struct stack_trace *t;
  64. #endif
  65. get_task_struct(task);
  66. lock->holders[h] = task;
  67. #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
  68. t = lock->traces + h;
  69. t->nr_entries = 0;
  70. t->max_entries = MAX_STACK;
  71. t->entries = lock->entries[h];
  72. t->skip = 2;
  73. save_stack_trace(t);
  74. #endif
  75. }
  76. /* call this *before* you decrement lock->count */
  77. static void __del_holder(struct block_lock *lock, struct task_struct *task)
  78. {
  79. unsigned h = __find_holder(lock, task);
  80. lock->holders[h] = NULL;
  81. put_task_struct(task);
  82. }
  83. static int __check_holder(struct block_lock *lock)
  84. {
  85. unsigned i;
  86. for (i = 0; i < MAX_HOLDERS; i++) {
  87. if (lock->holders[i] == current) {
  88. DMERR("recursive lock detected in metadata");
  89. #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
  90. DMERR("previously held here:");
  91. print_stack_trace(lock->traces + i, 4);
  92. DMERR("subsequent acquisition attempted here:");
  93. dump_stack();
  94. #endif
  95. return -EINVAL;
  96. }
  97. }
  98. return 0;
  99. }
  100. static void __wait(struct waiter *w)
  101. {
  102. for (;;) {
  103. set_current_state(TASK_UNINTERRUPTIBLE);
  104. if (!w->task)
  105. break;
  106. schedule();
  107. }
  108. set_current_state(TASK_RUNNING);
  109. }
  110. static void __wake_waiter(struct waiter *w)
  111. {
  112. struct task_struct *task;
  113. list_del(&w->list);
  114. task = w->task;
  115. smp_mb();
  116. w->task = NULL;
  117. wake_up_process(task);
  118. }
  119. /*
  120. * We either wake a few readers or a single writer.
  121. */
  122. static void __wake_many(struct block_lock *lock)
  123. {
  124. struct waiter *w, *tmp;
  125. BUG_ON(lock->count < 0);
  126. list_for_each_entry_safe(w, tmp, &lock->waiters, list) {
  127. if (lock->count >= MAX_HOLDERS)
  128. return;
  129. if (w->wants_write) {
  130. if (lock->count > 0)
  131. return; /* still read locked */
  132. lock->count = -1;
  133. __add_holder(lock, w->task);
  134. __wake_waiter(w);
  135. return;
  136. }
  137. lock->count++;
  138. __add_holder(lock, w->task);
  139. __wake_waiter(w);
  140. }
  141. }
  142. static void bl_init(struct block_lock *lock)
  143. {
  144. int i;
  145. spin_lock_init(&lock->lock);
  146. lock->count = 0;
  147. INIT_LIST_HEAD(&lock->waiters);
  148. for (i = 0; i < MAX_HOLDERS; i++)
  149. lock->holders[i] = NULL;
  150. }
  151. static int __available_for_read(struct block_lock *lock)
  152. {
  153. return lock->count >= 0 &&
  154. lock->count < MAX_HOLDERS &&
  155. list_empty(&lock->waiters);
  156. }
  157. static int bl_down_read(struct block_lock *lock)
  158. {
  159. int r;
  160. struct waiter w;
  161. spin_lock(&lock->lock);
  162. r = __check_holder(lock);
  163. if (r) {
  164. spin_unlock(&lock->lock);
  165. return r;
  166. }
  167. if (__available_for_read(lock)) {
  168. lock->count++;
  169. __add_holder(lock, current);
  170. spin_unlock(&lock->lock);
  171. return 0;
  172. }
  173. get_task_struct(current);
  174. w.task = current;
  175. w.wants_write = 0;
  176. list_add_tail(&w.list, &lock->waiters);
  177. spin_unlock(&lock->lock);
  178. __wait(&w);
  179. put_task_struct(current);
  180. return 0;
  181. }
  182. static int bl_down_read_nonblock(struct block_lock *lock)
  183. {
  184. int r;
  185. spin_lock(&lock->lock);
  186. r = __check_holder(lock);
  187. if (r)
  188. goto out;
  189. if (__available_for_read(lock)) {
  190. lock->count++;
  191. __add_holder(lock, current);
  192. r = 0;
  193. } else
  194. r = -EWOULDBLOCK;
  195. out:
  196. spin_unlock(&lock->lock);
  197. return r;
  198. }
  199. static void bl_up_read(struct block_lock *lock)
  200. {
  201. spin_lock(&lock->lock);
  202. BUG_ON(lock->count <= 0);
  203. __del_holder(lock, current);
  204. --lock->count;
  205. if (!list_empty(&lock->waiters))
  206. __wake_many(lock);
  207. spin_unlock(&lock->lock);
  208. }
  209. static int bl_down_write(struct block_lock *lock)
  210. {
  211. int r;
  212. struct waiter w;
  213. spin_lock(&lock->lock);
  214. r = __check_holder(lock);
  215. if (r) {
  216. spin_unlock(&lock->lock);
  217. return r;
  218. }
  219. if (lock->count == 0 && list_empty(&lock->waiters)) {
  220. lock->count = -1;
  221. __add_holder(lock, current);
  222. spin_unlock(&lock->lock);
  223. return 0;
  224. }
  225. get_task_struct(current);
  226. w.task = current;
  227. w.wants_write = 1;
  228. /*
  229. * Writers given priority. We know there's only one mutator in the
  230. * system, so ignoring the ordering reversal.
  231. */
  232. list_add(&w.list, &lock->waiters);
  233. spin_unlock(&lock->lock);
  234. __wait(&w);
  235. put_task_struct(current);
  236. return 0;
  237. }
  238. static void bl_up_write(struct block_lock *lock)
  239. {
  240. spin_lock(&lock->lock);
  241. __del_holder(lock, current);
  242. lock->count = 0;
  243. if (!list_empty(&lock->waiters))
  244. __wake_many(lock);
  245. spin_unlock(&lock->lock);
  246. }
  247. static void report_recursive_bug(dm_block_t b, int r)
  248. {
  249. if (r == -EINVAL)
  250. DMERR("recursive acquisition of block %llu requested.",
  251. (unsigned long long) b);
  252. }
  253. #else /* !CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING */
  254. #define bl_init(x) do { } while (0)
  255. #define bl_down_read(x) 0
  256. #define bl_down_read_nonblock(x) 0
  257. #define bl_up_read(x) do { } while (0)
  258. #define bl_down_write(x) 0
  259. #define bl_up_write(x) do { } while (0)
  260. #define report_recursive_bug(x, y) do { } while (0)
  261. #endif /* CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING */
  262. /*----------------------------------------------------------------*/
  263. /*
  264. * Block manager is currently implemented using dm-bufio. struct
  265. * dm_block_manager and struct dm_block map directly onto a couple of
  266. * structs in the bufio interface. I want to retain the freedom to move
  267. * away from bufio in the future. So these structs are just cast within
  268. * this .c file, rather than making it through to the public interface.
  269. */
  270. static struct dm_buffer *to_buffer(struct dm_block *b)
  271. {
  272. return (struct dm_buffer *) b;
  273. }
  274. dm_block_t dm_block_location(struct dm_block *b)
  275. {
  276. return dm_bufio_get_block_number(to_buffer(b));
  277. }
  278. EXPORT_SYMBOL_GPL(dm_block_location);
  279. void *dm_block_data(struct dm_block *b)
  280. {
  281. return dm_bufio_get_block_data(to_buffer(b));
  282. }
  283. EXPORT_SYMBOL_GPL(dm_block_data);
  284. struct buffer_aux {
  285. struct dm_block_validator *validator;
  286. int write_locked;
  287. #ifdef CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING
  288. struct block_lock lock;
  289. #endif
  290. };
  291. static void dm_block_manager_alloc_callback(struct dm_buffer *buf)
  292. {
  293. struct buffer_aux *aux = dm_bufio_get_aux_data(buf);
  294. aux->validator = NULL;
  295. bl_init(&aux->lock);
  296. }
  297. static void dm_block_manager_write_callback(struct dm_buffer *buf)
  298. {
  299. struct buffer_aux *aux = dm_bufio_get_aux_data(buf);
  300. if (aux->validator) {
  301. aux->validator->prepare_for_write(aux->validator, (struct dm_block *) buf,
  302. dm_bufio_get_block_size(dm_bufio_get_client(buf)));
  303. }
  304. }
  305. /*----------------------------------------------------------------
  306. * Public interface
  307. *--------------------------------------------------------------*/
  308. struct dm_block_manager {
  309. struct dm_bufio_client *bufio;
  310. bool read_only:1;
  311. };
  312. struct dm_block_manager *dm_block_manager_create(struct block_device *bdev,
  313. unsigned block_size,
  314. unsigned max_held_per_thread)
  315. {
  316. int r;
  317. struct dm_block_manager *bm;
  318. bm = kmalloc(sizeof(*bm), GFP_KERNEL);
  319. if (!bm) {
  320. r = -ENOMEM;
  321. goto bad;
  322. }
  323. bm->bufio = dm_bufio_client_create(bdev, block_size, max_held_per_thread,
  324. sizeof(struct buffer_aux),
  325. dm_block_manager_alloc_callback,
  326. dm_block_manager_write_callback);
  327. if (IS_ERR(bm->bufio)) {
  328. r = PTR_ERR(bm->bufio);
  329. kfree(bm);
  330. goto bad;
  331. }
  332. bm->read_only = false;
  333. return bm;
  334. bad:
  335. return ERR_PTR(r);
  336. }
  337. EXPORT_SYMBOL_GPL(dm_block_manager_create);
  338. void dm_block_manager_destroy(struct dm_block_manager *bm)
  339. {
  340. dm_bufio_client_destroy(bm->bufio);
  341. kfree(bm);
  342. }
  343. EXPORT_SYMBOL_GPL(dm_block_manager_destroy);
  344. unsigned dm_bm_block_size(struct dm_block_manager *bm)
  345. {
  346. return dm_bufio_get_block_size(bm->bufio);
  347. }
  348. EXPORT_SYMBOL_GPL(dm_bm_block_size);
  349. dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm)
  350. {
  351. return dm_bufio_get_device_size(bm->bufio);
  352. }
  353. static int dm_bm_validate_buffer(struct dm_block_manager *bm,
  354. struct dm_buffer *buf,
  355. struct buffer_aux *aux,
  356. struct dm_block_validator *v)
  357. {
  358. if (unlikely(!aux->validator)) {
  359. int r;
  360. if (!v)
  361. return 0;
  362. r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(bm->bufio));
  363. if (unlikely(r)) {
  364. DMERR_LIMIT("%s validator check failed for block %llu", v->name,
  365. (unsigned long long) dm_bufio_get_block_number(buf));
  366. return r;
  367. }
  368. aux->validator = v;
  369. } else {
  370. if (unlikely(aux->validator != v)) {
  371. DMERR_LIMIT("validator mismatch (old=%s vs new=%s) for block %llu",
  372. aux->validator->name, v ? v->name : "NULL",
  373. (unsigned long long) dm_bufio_get_block_number(buf));
  374. return -EINVAL;
  375. }
  376. }
  377. return 0;
  378. }
  379. int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b,
  380. struct dm_block_validator *v,
  381. struct dm_block **result)
  382. {
  383. struct buffer_aux *aux;
  384. void *p;
  385. int r;
  386. p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result);
  387. if (unlikely(IS_ERR(p)))
  388. return PTR_ERR(p);
  389. aux = dm_bufio_get_aux_data(to_buffer(*result));
  390. r = bl_down_read(&aux->lock);
  391. if (unlikely(r)) {
  392. dm_bufio_release(to_buffer(*result));
  393. report_recursive_bug(b, r);
  394. return r;
  395. }
  396. aux->write_locked = 0;
  397. r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v);
  398. if (unlikely(r)) {
  399. bl_up_read(&aux->lock);
  400. dm_bufio_release(to_buffer(*result));
  401. return r;
  402. }
  403. return 0;
  404. }
  405. EXPORT_SYMBOL_GPL(dm_bm_read_lock);
  406. int dm_bm_write_lock(struct dm_block_manager *bm,
  407. dm_block_t b, struct dm_block_validator *v,
  408. struct dm_block **result)
  409. {
  410. struct buffer_aux *aux;
  411. void *p;
  412. int r;
  413. if (bm->read_only)
  414. return -EPERM;
  415. p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result);
  416. if (unlikely(IS_ERR(p)))
  417. return PTR_ERR(p);
  418. aux = dm_bufio_get_aux_data(to_buffer(*result));
  419. r = bl_down_write(&aux->lock);
  420. if (r) {
  421. dm_bufio_release(to_buffer(*result));
  422. report_recursive_bug(b, r);
  423. return r;
  424. }
  425. aux->write_locked = 1;
  426. r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v);
  427. if (unlikely(r)) {
  428. bl_up_write(&aux->lock);
  429. dm_bufio_release(to_buffer(*result));
  430. return r;
  431. }
  432. return 0;
  433. }
  434. EXPORT_SYMBOL_GPL(dm_bm_write_lock);
  435. int dm_bm_read_try_lock(struct dm_block_manager *bm,
  436. dm_block_t b, struct dm_block_validator *v,
  437. struct dm_block **result)
  438. {
  439. struct buffer_aux *aux;
  440. void *p;
  441. int r;
  442. p = dm_bufio_get(bm->bufio, b, (struct dm_buffer **) result);
  443. if (unlikely(IS_ERR(p)))
  444. return PTR_ERR(p);
  445. if (unlikely(!p))
  446. return -EWOULDBLOCK;
  447. aux = dm_bufio_get_aux_data(to_buffer(*result));
  448. r = bl_down_read_nonblock(&aux->lock);
  449. if (r < 0) {
  450. dm_bufio_release(to_buffer(*result));
  451. report_recursive_bug(b, r);
  452. return r;
  453. }
  454. aux->write_locked = 0;
  455. r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v);
  456. if (unlikely(r)) {
  457. bl_up_read(&aux->lock);
  458. dm_bufio_release(to_buffer(*result));
  459. return r;
  460. }
  461. return 0;
  462. }
  463. int dm_bm_write_lock_zero(struct dm_block_manager *bm,
  464. dm_block_t b, struct dm_block_validator *v,
  465. struct dm_block **result)
  466. {
  467. int r;
  468. struct buffer_aux *aux;
  469. void *p;
  470. if (bm->read_only)
  471. return -EPERM;
  472. p = dm_bufio_new(bm->bufio, b, (struct dm_buffer **) result);
  473. if (unlikely(IS_ERR(p)))
  474. return PTR_ERR(p);
  475. memset(p, 0, dm_bm_block_size(bm));
  476. aux = dm_bufio_get_aux_data(to_buffer(*result));
  477. r = bl_down_write(&aux->lock);
  478. if (r) {
  479. dm_bufio_release(to_buffer(*result));
  480. return r;
  481. }
  482. aux->write_locked = 1;
  483. aux->validator = v;
  484. return 0;
  485. }
  486. EXPORT_SYMBOL_GPL(dm_bm_write_lock_zero);
  487. void dm_bm_unlock(struct dm_block *b)
  488. {
  489. struct buffer_aux *aux;
  490. aux = dm_bufio_get_aux_data(to_buffer(b));
  491. if (aux->write_locked) {
  492. dm_bufio_mark_buffer_dirty(to_buffer(b));
  493. bl_up_write(&aux->lock);
  494. } else
  495. bl_up_read(&aux->lock);
  496. dm_bufio_release(to_buffer(b));
  497. }
  498. EXPORT_SYMBOL_GPL(dm_bm_unlock);
  499. int dm_bm_flush(struct dm_block_manager *bm)
  500. {
  501. if (bm->read_only)
  502. return -EPERM;
  503. return dm_bufio_write_dirty_buffers(bm->bufio);
  504. }
  505. EXPORT_SYMBOL_GPL(dm_bm_flush);
  506. void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b)
  507. {
  508. dm_bufio_prefetch(bm->bufio, b, 1);
  509. }
  510. bool dm_bm_is_read_only(struct dm_block_manager *bm)
  511. {
  512. return bm->read_only;
  513. }
  514. EXPORT_SYMBOL_GPL(dm_bm_is_read_only);
  515. void dm_bm_set_read_only(struct dm_block_manager *bm)
  516. {
  517. bm->read_only = true;
  518. }
  519. EXPORT_SYMBOL_GPL(dm_bm_set_read_only);
  520. void dm_bm_set_read_write(struct dm_block_manager *bm)
  521. {
  522. bm->read_only = false;
  523. }
  524. EXPORT_SYMBOL_GPL(dm_bm_set_read_write);
  525. u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor)
  526. {
  527. return crc32c(~(u32) 0, data, len) ^ init_xor;
  528. }
  529. EXPORT_SYMBOL_GPL(dm_bm_checksum);
  530. /*----------------------------------------------------------------*/
  531. MODULE_LICENSE("GPL");
  532. MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
  533. MODULE_DESCRIPTION("Immutable metadata library for dm");
  534. /*----------------------------------------------------------------*/