dm-log-userspace-base.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937
  1. /*
  2. * Copyright (C) 2006-2009 Red Hat, Inc.
  3. *
  4. * This file is released under the LGPL.
  5. */
  6. #include <linux/bio.h>
  7. #include <linux/slab.h>
  8. #include <linux/jiffies.h>
  9. #include <linux/dm-dirty-log.h>
  10. #include <linux/device-mapper.h>
  11. #include <linux/dm-log-userspace.h>
  12. #include <linux/module.h>
  13. #include <linux/workqueue.h>
  14. #include "dm-log-userspace-transfer.h"
  15. #define DM_LOG_USERSPACE_VSN "1.3.0"
  16. #define FLUSH_ENTRY_POOL_SIZE 16
  17. struct dm_dirty_log_flush_entry {
  18. int type;
  19. region_t region;
  20. struct list_head list;
  21. };
  22. /*
  23. * This limit on the number of mark and clear request is, to a degree,
  24. * arbitrary. However, there is some basis for the choice in the limits
  25. * imposed on the size of data payload by dm-log-userspace-transfer.c:
  26. * dm_consult_userspace().
  27. */
  28. #define MAX_FLUSH_GROUP_COUNT 32
  29. struct log_c {
  30. struct dm_target *ti;
  31. struct dm_dev *log_dev;
  32. char *usr_argv_str;
  33. uint32_t usr_argc;
  34. uint32_t region_size;
  35. region_t region_count;
  36. uint64_t luid;
  37. char uuid[DM_UUID_LEN];
  38. /*
  39. * Mark and clear requests are held until a flush is issued
  40. * so that we can group, and thereby limit, the amount of
  41. * network traffic between kernel and userspace. The 'flush_lock'
  42. * is used to protect these lists.
  43. */
  44. spinlock_t flush_lock;
  45. struct list_head mark_list;
  46. struct list_head clear_list;
  47. /*
  48. * in_sync_hint gets set when doing is_remote_recovering. It
  49. * represents the first region that needs recovery. IOW, the
  50. * first zero bit of sync_bits. This can be useful for to limit
  51. * traffic for calls like is_remote_recovering and get_resync_work,
  52. * but be take care in its use for anything else.
  53. */
  54. uint64_t in_sync_hint;
  55. /*
  56. * Workqueue for flush of clear region requests.
  57. */
  58. struct workqueue_struct *dmlog_wq;
  59. struct delayed_work flush_log_work;
  60. atomic_t sched_flush;
  61. /*
  62. * Combine userspace flush and mark requests for efficiency.
  63. */
  64. uint32_t integrated_flush;
  65. mempool_t *flush_entry_pool;
  66. };
  67. static struct kmem_cache *_flush_entry_cache;
  68. static int userspace_do_request(struct log_c *lc, const char *uuid,
  69. int request_type, char *data, size_t data_size,
  70. char *rdata, size_t *rdata_size)
  71. {
  72. int r;
  73. /*
  74. * If the server isn't there, -ESRCH is returned,
  75. * and we must keep trying until the server is
  76. * restored.
  77. */
  78. retry:
  79. r = dm_consult_userspace(uuid, lc->luid, request_type, data,
  80. data_size, rdata, rdata_size);
  81. if (r != -ESRCH)
  82. return r;
  83. DMERR(" Userspace log server not found.");
  84. while (1) {
  85. set_current_state(TASK_INTERRUPTIBLE);
  86. schedule_timeout(2*HZ);
  87. DMWARN("Attempting to contact userspace log server...");
  88. r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_CTR,
  89. lc->usr_argv_str,
  90. strlen(lc->usr_argv_str) + 1,
  91. NULL, NULL);
  92. if (!r)
  93. break;
  94. }
  95. DMINFO("Reconnected to userspace log server... DM_ULOG_CTR complete");
  96. r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_RESUME, NULL,
  97. 0, NULL, NULL);
  98. if (!r)
  99. goto retry;
  100. DMERR("Error trying to resume userspace log: %d", r);
  101. return -ESRCH;
  102. }
  103. static int build_constructor_string(struct dm_target *ti,
  104. unsigned argc, char **argv,
  105. char **ctr_str)
  106. {
  107. int i, str_size;
  108. char *str = NULL;
  109. *ctr_str = NULL;
  110. /*
  111. * Determine overall size of the string.
  112. */
  113. for (i = 0, str_size = 0; i < argc; i++)
  114. str_size += strlen(argv[i]) + 1; /* +1 for space between args */
  115. str_size += 20; /* Max number of chars in a printed u64 number */
  116. str = kzalloc(str_size, GFP_KERNEL);
  117. if (!str) {
  118. DMWARN("Unable to allocate memory for constructor string");
  119. return -ENOMEM;
  120. }
  121. str_size = sprintf(str, "%llu", (unsigned long long)ti->len);
  122. for (i = 0; i < argc; i++)
  123. str_size += sprintf(str + str_size, " %s", argv[i]);
  124. *ctr_str = str;
  125. return str_size;
  126. }
  127. static void do_flush(struct work_struct *work)
  128. {
  129. int r;
  130. struct log_c *lc = container_of(work, struct log_c, flush_log_work.work);
  131. atomic_set(&lc->sched_flush, 0);
  132. r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH, NULL, 0, NULL, NULL);
  133. if (r)
  134. dm_table_event(lc->ti->table);
  135. }
  136. /*
  137. * userspace_ctr
  138. *
  139. * argv contains:
  140. * <UUID> [integrated_flush] <other args>
  141. * Where 'other args' are the userspace implementation-specific log
  142. * arguments.
  143. *
  144. * Example:
  145. * <UUID> [integrated_flush] clustered-disk <arg count> <log dev>
  146. * <region_size> [[no]sync]
  147. *
  148. * This module strips off the <UUID> and uses it for identification
  149. * purposes when communicating with userspace about a log.
  150. *
  151. * If integrated_flush is defined, the kernel combines flush
  152. * and mark requests.
  153. *
  154. * The rest of the line, beginning with 'clustered-disk', is passed
  155. * to the userspace ctr function.
  156. */
  157. static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
  158. unsigned argc, char **argv)
  159. {
  160. int r = 0;
  161. int str_size;
  162. char *ctr_str = NULL;
  163. struct log_c *lc = NULL;
  164. uint64_t rdata;
  165. size_t rdata_size = sizeof(rdata);
  166. char *devices_rdata = NULL;
  167. size_t devices_rdata_size = DM_NAME_LEN;
  168. if (argc < 3) {
  169. DMWARN("Too few arguments to userspace dirty log");
  170. return -EINVAL;
  171. }
  172. lc = kzalloc(sizeof(*lc), GFP_KERNEL);
  173. if (!lc) {
  174. DMWARN("Unable to allocate userspace log context.");
  175. return -ENOMEM;
  176. }
  177. /* The ptr value is sufficient for local unique id */
  178. lc->luid = (unsigned long)lc;
  179. lc->ti = ti;
  180. if (strlen(argv[0]) > (DM_UUID_LEN - 1)) {
  181. DMWARN("UUID argument too long.");
  182. kfree(lc);
  183. return -EINVAL;
  184. }
  185. lc->usr_argc = argc;
  186. strncpy(lc->uuid, argv[0], DM_UUID_LEN);
  187. argc--;
  188. argv++;
  189. spin_lock_init(&lc->flush_lock);
  190. INIT_LIST_HEAD(&lc->mark_list);
  191. INIT_LIST_HEAD(&lc->clear_list);
  192. if (!strcasecmp(argv[0], "integrated_flush")) {
  193. lc->integrated_flush = 1;
  194. argc--;
  195. argv++;
  196. }
  197. str_size = build_constructor_string(ti, argc, argv, &ctr_str);
  198. if (str_size < 0) {
  199. kfree(lc);
  200. return str_size;
  201. }
  202. devices_rdata = kzalloc(devices_rdata_size, GFP_KERNEL);
  203. if (!devices_rdata) {
  204. DMERR("Failed to allocate memory for device information");
  205. r = -ENOMEM;
  206. goto out;
  207. }
  208. lc->flush_entry_pool = mempool_create_slab_pool(FLUSH_ENTRY_POOL_SIZE,
  209. _flush_entry_cache);
  210. if (!lc->flush_entry_pool) {
  211. DMERR("Failed to create flush_entry_pool");
  212. r = -ENOMEM;
  213. goto out;
  214. }
  215. /*
  216. * Send table string and get back any opened device.
  217. */
  218. r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_CTR,
  219. ctr_str, str_size,
  220. devices_rdata, &devices_rdata_size);
  221. if (r < 0) {
  222. if (r == -ESRCH)
  223. DMERR("Userspace log server not found");
  224. else
  225. DMERR("Userspace log server failed to create log");
  226. goto out;
  227. }
  228. /* Since the region size does not change, get it now */
  229. rdata_size = sizeof(rdata);
  230. r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_GET_REGION_SIZE,
  231. NULL, 0, (char *)&rdata, &rdata_size);
  232. if (r) {
  233. DMERR("Failed to get region size of dirty log");
  234. goto out;
  235. }
  236. lc->region_size = (uint32_t)rdata;
  237. lc->region_count = dm_sector_div_up(ti->len, lc->region_size);
  238. if (devices_rdata_size) {
  239. if (devices_rdata[devices_rdata_size - 1] != '\0') {
  240. DMERR("DM_ULOG_CTR device return string not properly terminated");
  241. r = -EINVAL;
  242. goto out;
  243. }
  244. r = dm_get_device(ti, devices_rdata,
  245. dm_table_get_mode(ti->table), &lc->log_dev);
  246. if (r)
  247. DMERR("Failed to register %s with device-mapper",
  248. devices_rdata);
  249. }
  250. if (lc->integrated_flush) {
  251. lc->dmlog_wq = alloc_workqueue("dmlogd", WQ_MEM_RECLAIM, 0);
  252. if (!lc->dmlog_wq) {
  253. DMERR("couldn't start dmlogd");
  254. r = -ENOMEM;
  255. goto out;
  256. }
  257. INIT_DELAYED_WORK(&lc->flush_log_work, do_flush);
  258. atomic_set(&lc->sched_flush, 0);
  259. }
  260. out:
  261. kfree(devices_rdata);
  262. if (r) {
  263. if (lc->flush_entry_pool)
  264. mempool_destroy(lc->flush_entry_pool);
  265. kfree(lc);
  266. kfree(ctr_str);
  267. } else {
  268. lc->usr_argv_str = ctr_str;
  269. log->context = lc;
  270. }
  271. return r;
  272. }
  273. static void userspace_dtr(struct dm_dirty_log *log)
  274. {
  275. struct log_c *lc = log->context;
  276. if (lc->integrated_flush) {
  277. /* flush workqueue */
  278. if (atomic_read(&lc->sched_flush))
  279. flush_delayed_work(&lc->flush_log_work);
  280. destroy_workqueue(lc->dmlog_wq);
  281. }
  282. (void) dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_DTR,
  283. NULL, 0, NULL, NULL);
  284. if (lc->log_dev)
  285. dm_put_device(lc->ti, lc->log_dev);
  286. mempool_destroy(lc->flush_entry_pool);
  287. kfree(lc->usr_argv_str);
  288. kfree(lc);
  289. return;
  290. }
  291. static int userspace_presuspend(struct dm_dirty_log *log)
  292. {
  293. int r;
  294. struct log_c *lc = log->context;
  295. r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_PRESUSPEND,
  296. NULL, 0, NULL, NULL);
  297. return r;
  298. }
  299. static int userspace_postsuspend(struct dm_dirty_log *log)
  300. {
  301. int r;
  302. struct log_c *lc = log->context;
  303. /*
  304. * Run planned flush earlier.
  305. */
  306. if (lc->integrated_flush && atomic_read(&lc->sched_flush))
  307. flush_delayed_work(&lc->flush_log_work);
  308. r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_POSTSUSPEND,
  309. NULL, 0, NULL, NULL);
  310. return r;
  311. }
  312. static int userspace_resume(struct dm_dirty_log *log)
  313. {
  314. int r;
  315. struct log_c *lc = log->context;
  316. lc->in_sync_hint = 0;
  317. r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_RESUME,
  318. NULL, 0, NULL, NULL);
  319. return r;
  320. }
  321. static uint32_t userspace_get_region_size(struct dm_dirty_log *log)
  322. {
  323. struct log_c *lc = log->context;
  324. return lc->region_size;
  325. }
  326. /*
  327. * userspace_is_clean
  328. *
  329. * Check whether a region is clean. If there is any sort of
  330. * failure when consulting the server, we return not clean.
  331. *
  332. * Returns: 1 if clean, 0 otherwise
  333. */
  334. static int userspace_is_clean(struct dm_dirty_log *log, region_t region)
  335. {
  336. int r;
  337. uint64_t region64 = (uint64_t)region;
  338. int64_t is_clean;
  339. size_t rdata_size;
  340. struct log_c *lc = log->context;
  341. rdata_size = sizeof(is_clean);
  342. r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_CLEAN,
  343. (char *)&region64, sizeof(region64),
  344. (char *)&is_clean, &rdata_size);
  345. return (r) ? 0 : (int)is_clean;
  346. }
  347. /*
  348. * userspace_in_sync
  349. *
  350. * Check if the region is in-sync. If there is any sort
  351. * of failure when consulting the server, we assume that
  352. * the region is not in sync.
  353. *
  354. * If 'can_block' is set, return immediately
  355. *
  356. * Returns: 1 if in-sync, 0 if not-in-sync, -EWOULDBLOCK
  357. */
  358. static int userspace_in_sync(struct dm_dirty_log *log, region_t region,
  359. int can_block)
  360. {
  361. int r;
  362. uint64_t region64 = region;
  363. int64_t in_sync;
  364. size_t rdata_size;
  365. struct log_c *lc = log->context;
  366. /*
  367. * We can never respond directly - even if in_sync_hint is
  368. * set. This is because another machine could see a device
  369. * failure and mark the region out-of-sync. If we don't go
  370. * to userspace to ask, we might think the region is in-sync
  371. * and allow a read to pick up data that is stale. (This is
  372. * very unlikely if a device actually fails; but it is very
  373. * likely if a connection to one device from one machine fails.)
  374. *
  375. * There still might be a problem if the mirror caches the region
  376. * state as in-sync... but then this call would not be made. So,
  377. * that is a mirror problem.
  378. */
  379. if (!can_block)
  380. return -EWOULDBLOCK;
  381. rdata_size = sizeof(in_sync);
  382. r = userspace_do_request(lc, lc->uuid, DM_ULOG_IN_SYNC,
  383. (char *)&region64, sizeof(region64),
  384. (char *)&in_sync, &rdata_size);
  385. return (r) ? 0 : (int)in_sync;
  386. }
  387. static int flush_one_by_one(struct log_c *lc, struct list_head *flush_list)
  388. {
  389. int r = 0;
  390. struct dm_dirty_log_flush_entry *fe;
  391. list_for_each_entry(fe, flush_list, list) {
  392. r = userspace_do_request(lc, lc->uuid, fe->type,
  393. (char *)&fe->region,
  394. sizeof(fe->region),
  395. NULL, NULL);
  396. if (r)
  397. break;
  398. }
  399. return r;
  400. }
  401. static int flush_by_group(struct log_c *lc, struct list_head *flush_list,
  402. int flush_with_payload)
  403. {
  404. int r = 0;
  405. int count;
  406. uint32_t type = 0;
  407. struct dm_dirty_log_flush_entry *fe, *tmp_fe;
  408. LIST_HEAD(tmp_list);
  409. uint64_t group[MAX_FLUSH_GROUP_COUNT];
  410. /*
  411. * Group process the requests
  412. */
  413. while (!list_empty(flush_list)) {
  414. count = 0;
  415. list_for_each_entry_safe(fe, tmp_fe, flush_list, list) {
  416. group[count] = fe->region;
  417. count++;
  418. list_move(&fe->list, &tmp_list);
  419. type = fe->type;
  420. if (count >= MAX_FLUSH_GROUP_COUNT)
  421. break;
  422. }
  423. if (flush_with_payload) {
  424. r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH,
  425. (char *)(group),
  426. count * sizeof(uint64_t),
  427. NULL, NULL);
  428. /*
  429. * Integrated flush failed.
  430. */
  431. if (r)
  432. break;
  433. } else {
  434. r = userspace_do_request(lc, lc->uuid, type,
  435. (char *)(group),
  436. count * sizeof(uint64_t),
  437. NULL, NULL);
  438. if (r) {
  439. /*
  440. * Group send failed. Attempt one-by-one.
  441. */
  442. list_splice_init(&tmp_list, flush_list);
  443. r = flush_one_by_one(lc, flush_list);
  444. break;
  445. }
  446. }
  447. }
  448. /*
  449. * Must collect flush_entrys that were successfully processed
  450. * as a group so that they will be free'd by the caller.
  451. */
  452. list_splice_init(&tmp_list, flush_list);
  453. return r;
  454. }
  455. /*
  456. * userspace_flush
  457. *
  458. * This function is ok to block.
  459. * The flush happens in two stages. First, it sends all
  460. * clear/mark requests that are on the list. Then it
  461. * tells the server to commit them. This gives the
  462. * server a chance to optimise the commit, instead of
  463. * doing it for every request.
  464. *
  465. * Additionally, we could implement another thread that
  466. * sends the requests up to the server - reducing the
  467. * load on flush. Then the flush would have less in
  468. * the list and be responsible for the finishing commit.
  469. *
  470. * Returns: 0 on success, < 0 on failure
  471. */
  472. static int userspace_flush(struct dm_dirty_log *log)
  473. {
  474. int r = 0;
  475. unsigned long flags;
  476. struct log_c *lc = log->context;
  477. LIST_HEAD(mark_list);
  478. LIST_HEAD(clear_list);
  479. int mark_list_is_empty;
  480. int clear_list_is_empty;
  481. struct dm_dirty_log_flush_entry *fe, *tmp_fe;
  482. mempool_t *flush_entry_pool = lc->flush_entry_pool;
  483. spin_lock_irqsave(&lc->flush_lock, flags);
  484. list_splice_init(&lc->mark_list, &mark_list);
  485. list_splice_init(&lc->clear_list, &clear_list);
  486. spin_unlock_irqrestore(&lc->flush_lock, flags);
  487. mark_list_is_empty = list_empty(&mark_list);
  488. clear_list_is_empty = list_empty(&clear_list);
  489. if (mark_list_is_empty && clear_list_is_empty)
  490. return 0;
  491. r = flush_by_group(lc, &clear_list, 0);
  492. if (r)
  493. goto out;
  494. if (!lc->integrated_flush) {
  495. r = flush_by_group(lc, &mark_list, 0);
  496. if (r)
  497. goto out;
  498. r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH,
  499. NULL, 0, NULL, NULL);
  500. goto out;
  501. }
  502. /*
  503. * Send integrated flush request with mark_list as payload.
  504. */
  505. r = flush_by_group(lc, &mark_list, 1);
  506. if (r)
  507. goto out;
  508. if (mark_list_is_empty && !atomic_read(&lc->sched_flush)) {
  509. /*
  510. * When there are only clear region requests,
  511. * we schedule a flush in the future.
  512. */
  513. queue_delayed_work(lc->dmlog_wq, &lc->flush_log_work, 3 * HZ);
  514. atomic_set(&lc->sched_flush, 1);
  515. } else {
  516. /*
  517. * Cancel pending flush because we
  518. * have already flushed in mark_region.
  519. */
  520. cancel_delayed_work(&lc->flush_log_work);
  521. atomic_set(&lc->sched_flush, 0);
  522. }
  523. out:
  524. /*
  525. * We can safely remove these entries, even after failure.
  526. * Calling code will receive an error and will know that
  527. * the log facility has failed.
  528. */
  529. list_for_each_entry_safe(fe, tmp_fe, &mark_list, list) {
  530. list_del(&fe->list);
  531. mempool_free(fe, flush_entry_pool);
  532. }
  533. list_for_each_entry_safe(fe, tmp_fe, &clear_list, list) {
  534. list_del(&fe->list);
  535. mempool_free(fe, flush_entry_pool);
  536. }
  537. if (r)
  538. dm_table_event(lc->ti->table);
  539. return r;
  540. }
  541. /*
  542. * userspace_mark_region
  543. *
  544. * This function should avoid blocking unless absolutely required.
  545. * (Memory allocation is valid for blocking.)
  546. */
  547. static void userspace_mark_region(struct dm_dirty_log *log, region_t region)
  548. {
  549. unsigned long flags;
  550. struct log_c *lc = log->context;
  551. struct dm_dirty_log_flush_entry *fe;
  552. /* Wait for an allocation, but _never_ fail */
  553. fe = mempool_alloc(lc->flush_entry_pool, GFP_NOIO);
  554. BUG_ON(!fe);
  555. spin_lock_irqsave(&lc->flush_lock, flags);
  556. fe->type = DM_ULOG_MARK_REGION;
  557. fe->region = region;
  558. list_add(&fe->list, &lc->mark_list);
  559. spin_unlock_irqrestore(&lc->flush_lock, flags);
  560. return;
  561. }
  562. /*
  563. * userspace_clear_region
  564. *
  565. * This function must not block.
  566. * So, the alloc can't block. In the worst case, it is ok to
  567. * fail. It would simply mean we can't clear the region.
  568. * Does nothing to current sync context, but does mean
  569. * the region will be re-sync'ed on a reload of the mirror
  570. * even though it is in-sync.
  571. */
  572. static void userspace_clear_region(struct dm_dirty_log *log, region_t region)
  573. {
  574. unsigned long flags;
  575. struct log_c *lc = log->context;
  576. struct dm_dirty_log_flush_entry *fe;
  577. /*
  578. * If we fail to allocate, we skip the clearing of
  579. * the region. This doesn't hurt us in any way, except
  580. * to cause the region to be resync'ed when the
  581. * device is activated next time.
  582. */
  583. fe = mempool_alloc(lc->flush_entry_pool, GFP_ATOMIC);
  584. if (!fe) {
  585. DMERR("Failed to allocate memory to clear region.");
  586. return;
  587. }
  588. spin_lock_irqsave(&lc->flush_lock, flags);
  589. fe->type = DM_ULOG_CLEAR_REGION;
  590. fe->region = region;
  591. list_add(&fe->list, &lc->clear_list);
  592. spin_unlock_irqrestore(&lc->flush_lock, flags);
  593. return;
  594. }
  595. /*
  596. * userspace_get_resync_work
  597. *
  598. * Get a region that needs recovery. It is valid to return
  599. * an error for this function.
  600. *
  601. * Returns: 1 if region filled, 0 if no work, <0 on error
  602. */
  603. static int userspace_get_resync_work(struct dm_dirty_log *log, region_t *region)
  604. {
  605. int r;
  606. size_t rdata_size;
  607. struct log_c *lc = log->context;
  608. struct {
  609. int64_t i; /* 64-bit for mix arch compatibility */
  610. region_t r;
  611. } pkg;
  612. if (lc->in_sync_hint >= lc->region_count)
  613. return 0;
  614. rdata_size = sizeof(pkg);
  615. r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_RESYNC_WORK,
  616. NULL, 0, (char *)&pkg, &rdata_size);
  617. *region = pkg.r;
  618. return (r) ? r : (int)pkg.i;
  619. }
  620. /*
  621. * userspace_set_region_sync
  622. *
  623. * Set the sync status of a given region. This function
  624. * must not fail.
  625. */
  626. static void userspace_set_region_sync(struct dm_dirty_log *log,
  627. region_t region, int in_sync)
  628. {
  629. struct log_c *lc = log->context;
  630. struct {
  631. region_t r;
  632. int64_t i;
  633. } pkg;
  634. pkg.r = region;
  635. pkg.i = (int64_t)in_sync;
  636. (void) userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC,
  637. (char *)&pkg, sizeof(pkg), NULL, NULL);
  638. /*
  639. * It would be nice to be able to report failures.
  640. * However, it is easy enough to detect and resolve.
  641. */
  642. return;
  643. }
  644. /*
  645. * userspace_get_sync_count
  646. *
  647. * If there is any sort of failure when consulting the server,
  648. * we assume that the sync count is zero.
  649. *
  650. * Returns: sync count on success, 0 on failure
  651. */
  652. static region_t userspace_get_sync_count(struct dm_dirty_log *log)
  653. {
  654. int r;
  655. size_t rdata_size;
  656. uint64_t sync_count;
  657. struct log_c *lc = log->context;
  658. rdata_size = sizeof(sync_count);
  659. r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_SYNC_COUNT,
  660. NULL, 0, (char *)&sync_count, &rdata_size);
  661. if (r)
  662. return 0;
  663. if (sync_count >= lc->region_count)
  664. lc->in_sync_hint = lc->region_count;
  665. return (region_t)sync_count;
  666. }
  667. /*
  668. * userspace_status
  669. *
  670. * Returns: amount of space consumed
  671. */
  672. static int userspace_status(struct dm_dirty_log *log, status_type_t status_type,
  673. char *result, unsigned maxlen)
  674. {
  675. int r = 0;
  676. char *table_args;
  677. size_t sz = (size_t)maxlen;
  678. struct log_c *lc = log->context;
  679. switch (status_type) {
  680. case STATUSTYPE_INFO:
  681. r = userspace_do_request(lc, lc->uuid, DM_ULOG_STATUS_INFO,
  682. NULL, 0, result, &sz);
  683. if (r) {
  684. sz = 0;
  685. DMEMIT("%s 1 COM_FAILURE", log->type->name);
  686. }
  687. break;
  688. case STATUSTYPE_TABLE:
  689. sz = 0;
  690. table_args = strchr(lc->usr_argv_str, ' ');
  691. BUG_ON(!table_args); /* There will always be a ' ' */
  692. table_args++;
  693. DMEMIT("%s %u %s ", log->type->name, lc->usr_argc, lc->uuid);
  694. if (lc->integrated_flush)
  695. DMEMIT("integrated_flush ");
  696. DMEMIT("%s ", table_args);
  697. break;
  698. }
  699. return (r) ? 0 : (int)sz;
  700. }
  701. /*
  702. * userspace_is_remote_recovering
  703. *
  704. * Returns: 1 if region recovering, 0 otherwise
  705. */
  706. static int userspace_is_remote_recovering(struct dm_dirty_log *log,
  707. region_t region)
  708. {
  709. int r;
  710. uint64_t region64 = region;
  711. struct log_c *lc = log->context;
  712. static unsigned long limit;
  713. struct {
  714. int64_t is_recovering;
  715. uint64_t in_sync_hint;
  716. } pkg;
  717. size_t rdata_size = sizeof(pkg);
  718. /*
  719. * Once the mirror has been reported to be in-sync,
  720. * it will never again ask for recovery work. So,
  721. * we can safely say there is not a remote machine
  722. * recovering if the device is in-sync. (in_sync_hint
  723. * must be reset at resume time.)
  724. */
  725. if (region < lc->in_sync_hint)
  726. return 0;
  727. else if (time_after(limit, jiffies))
  728. return 1;
  729. limit = jiffies + (HZ / 4);
  730. r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_REMOTE_RECOVERING,
  731. (char *)&region64, sizeof(region64),
  732. (char *)&pkg, &rdata_size);
  733. if (r)
  734. return 1;
  735. lc->in_sync_hint = pkg.in_sync_hint;
  736. return (int)pkg.is_recovering;
  737. }
  738. static struct dm_dirty_log_type _userspace_type = {
  739. .name = "userspace",
  740. .module = THIS_MODULE,
  741. .ctr = userspace_ctr,
  742. .dtr = userspace_dtr,
  743. .presuspend = userspace_presuspend,
  744. .postsuspend = userspace_postsuspend,
  745. .resume = userspace_resume,
  746. .get_region_size = userspace_get_region_size,
  747. .is_clean = userspace_is_clean,
  748. .in_sync = userspace_in_sync,
  749. .flush = userspace_flush,
  750. .mark_region = userspace_mark_region,
  751. .clear_region = userspace_clear_region,
  752. .get_resync_work = userspace_get_resync_work,
  753. .set_region_sync = userspace_set_region_sync,
  754. .get_sync_count = userspace_get_sync_count,
  755. .status = userspace_status,
  756. .is_remote_recovering = userspace_is_remote_recovering,
  757. };
  758. static int __init userspace_dirty_log_init(void)
  759. {
  760. int r = 0;
  761. _flush_entry_cache = KMEM_CACHE(dm_dirty_log_flush_entry, 0);
  762. if (!_flush_entry_cache) {
  763. DMWARN("Unable to create flush_entry_cache: No memory.");
  764. return -ENOMEM;
  765. }
  766. r = dm_ulog_tfr_init();
  767. if (r) {
  768. DMWARN("Unable to initialize userspace log communications");
  769. kmem_cache_destroy(_flush_entry_cache);
  770. return r;
  771. }
  772. r = dm_dirty_log_type_register(&_userspace_type);
  773. if (r) {
  774. DMWARN("Couldn't register userspace dirty log type");
  775. dm_ulog_tfr_exit();
  776. kmem_cache_destroy(_flush_entry_cache);
  777. return r;
  778. }
  779. DMINFO("version " DM_LOG_USERSPACE_VSN " loaded");
  780. return 0;
  781. }
  782. static void __exit userspace_dirty_log_exit(void)
  783. {
  784. dm_dirty_log_type_unregister(&_userspace_type);
  785. dm_ulog_tfr_exit();
  786. kmem_cache_destroy(_flush_entry_cache);
  787. DMINFO("version " DM_LOG_USERSPACE_VSN " unloaded");
  788. return;
  789. }
  790. module_init(userspace_dirty_log_init);
  791. module_exit(userspace_dirty_log_exit);
  792. MODULE_DESCRIPTION(DM_NAME " userspace dirty log link");
  793. MODULE_AUTHOR("Jonathan Brassow <dm-devel@redhat.com>");
  794. MODULE_LICENSE("GPL");