dm-rq.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602
  1. /*
  2. * Copyright (C) 2016 Red Hat, Inc. All rights reserved.
  3. *
  4. * This file is released under the GPL.
  5. */
  6. #include "dm-core.h"
  7. #include "dm-rq.h"
  8. #include <linux/elevator.h> /* for rq_end_sector() */
  9. #include <linux/blk-mq.h>
  10. #define DM_MSG_PREFIX "core-rq"
  11. /*
  12. * One of these is allocated per request.
  13. */
  14. struct dm_rq_target_io {
  15. struct mapped_device *md;
  16. struct dm_target *ti;
  17. struct request *orig, *clone;
  18. struct kthread_work work;
  19. blk_status_t error;
  20. union map_info info;
  21. struct dm_stats_aux stats_aux;
  22. unsigned long duration_jiffies;
  23. unsigned n_sectors;
  24. unsigned completed;
  25. };
  26. #define DM_MQ_NR_HW_QUEUES 1
  27. #define DM_MQ_QUEUE_DEPTH 2048
  28. static unsigned dm_mq_nr_hw_queues = DM_MQ_NR_HW_QUEUES;
  29. static unsigned dm_mq_queue_depth = DM_MQ_QUEUE_DEPTH;
  30. /*
  31. * Request-based DM's mempools' reserved IOs set by the user.
  32. */
  33. #define RESERVED_REQUEST_BASED_IOS 256
  34. static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS;
  35. unsigned dm_get_reserved_rq_based_ios(void)
  36. {
  37. return __dm_get_module_param(&reserved_rq_based_ios,
  38. RESERVED_REQUEST_BASED_IOS, DM_RESERVED_MAX_IOS);
  39. }
  40. EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios);
  41. static unsigned dm_get_blk_mq_nr_hw_queues(void)
  42. {
  43. return __dm_get_module_param(&dm_mq_nr_hw_queues, 1, 32);
  44. }
  45. static unsigned dm_get_blk_mq_queue_depth(void)
  46. {
  47. return __dm_get_module_param(&dm_mq_queue_depth,
  48. DM_MQ_QUEUE_DEPTH, BLK_MQ_MAX_DEPTH);
  49. }
  50. int dm_request_based(struct mapped_device *md)
  51. {
  52. return queue_is_mq(md->queue);
  53. }
  54. void dm_start_queue(struct request_queue *q)
  55. {
  56. blk_mq_unquiesce_queue(q);
  57. blk_mq_kick_requeue_list(q);
  58. }
  59. void dm_stop_queue(struct request_queue *q)
  60. {
  61. blk_mq_quiesce_queue(q);
  62. }
  63. /*
  64. * Partial completion handling for request-based dm
  65. */
  66. static void end_clone_bio(struct bio *clone)
  67. {
  68. struct dm_rq_clone_bio_info *info =
  69. container_of(clone, struct dm_rq_clone_bio_info, clone);
  70. struct dm_rq_target_io *tio = info->tio;
  71. unsigned int nr_bytes = info->orig->bi_iter.bi_size;
  72. blk_status_t error = clone->bi_status;
  73. bool is_last = !clone->bi_next;
  74. bio_put(clone);
  75. if (tio->error)
  76. /*
  77. * An error has already been detected on the request.
  78. * Once error occurred, just let clone->end_io() handle
  79. * the remainder.
  80. */
  81. return;
  82. else if (error) {
  83. /*
  84. * Don't notice the error to the upper layer yet.
  85. * The error handling decision is made by the target driver,
  86. * when the request is completed.
  87. */
  88. tio->error = error;
  89. goto exit;
  90. }
  91. /*
  92. * I/O for the bio successfully completed.
  93. * Notice the data completion to the upper layer.
  94. */
  95. tio->completed += nr_bytes;
  96. /*
  97. * Update the original request.
  98. * Do not use blk_mq_end_request() here, because it may complete
  99. * the original request before the clone, and break the ordering.
  100. */
  101. if (is_last)
  102. exit:
  103. blk_update_request(tio->orig, BLK_STS_OK, tio->completed);
  104. }
  105. static struct dm_rq_target_io *tio_from_request(struct request *rq)
  106. {
  107. return blk_mq_rq_to_pdu(rq);
  108. }
  109. static void rq_end_stats(struct mapped_device *md, struct request *orig)
  110. {
  111. if (unlikely(dm_stats_used(&md->stats))) {
  112. struct dm_rq_target_io *tio = tio_from_request(orig);
  113. tio->duration_jiffies = jiffies - tio->duration_jiffies;
  114. dm_stats_account_io(&md->stats, rq_data_dir(orig),
  115. blk_rq_pos(orig), tio->n_sectors, true,
  116. tio->duration_jiffies, &tio->stats_aux);
  117. }
  118. }
  119. /*
  120. * Don't touch any member of the md after calling this function because
  121. * the md may be freed in dm_put() at the end of this function.
  122. * Or do dm_get() before calling this function and dm_put() later.
  123. */
  124. static void rq_completed(struct mapped_device *md)
  125. {
  126. /* nudge anyone waiting on suspend queue */
  127. if (unlikely(wq_has_sleeper(&md->wait)))
  128. wake_up(&md->wait);
  129. /*
  130. * dm_put() must be at the end of this function. See the comment above
  131. */
  132. dm_put(md);
  133. }
  134. /*
  135. * Complete the clone and the original request.
  136. * Must be called without clone's queue lock held,
  137. * see end_clone_request() for more details.
  138. */
  139. static void dm_end_request(struct request *clone, blk_status_t error)
  140. {
  141. struct dm_rq_target_io *tio = clone->end_io_data;
  142. struct mapped_device *md = tio->md;
  143. struct request *rq = tio->orig;
  144. blk_rq_unprep_clone(clone);
  145. tio->ti->type->release_clone_rq(clone, NULL);
  146. rq_end_stats(md, rq);
  147. blk_mq_end_request(rq, error);
  148. rq_completed(md);
  149. }
  150. static void __dm_mq_kick_requeue_list(struct request_queue *q, unsigned long msecs)
  151. {
  152. blk_mq_delay_kick_requeue_list(q, msecs);
  153. }
  154. void dm_mq_kick_requeue_list(struct mapped_device *md)
  155. {
  156. __dm_mq_kick_requeue_list(dm_get_md_queue(md), 0);
  157. }
  158. EXPORT_SYMBOL(dm_mq_kick_requeue_list);
  159. static void dm_mq_delay_requeue_request(struct request *rq, unsigned long msecs)
  160. {
  161. blk_mq_requeue_request(rq, false);
  162. __dm_mq_kick_requeue_list(rq->q, msecs);
  163. }
  164. static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_requeue)
  165. {
  166. struct mapped_device *md = tio->md;
  167. struct request *rq = tio->orig;
  168. unsigned long delay_ms = delay_requeue ? 100 : 0;
  169. rq_end_stats(md, rq);
  170. if (tio->clone) {
  171. blk_rq_unprep_clone(tio->clone);
  172. tio->ti->type->release_clone_rq(tio->clone, NULL);
  173. }
  174. dm_mq_delay_requeue_request(rq, delay_ms);
  175. rq_completed(md);
  176. }
  177. static void dm_done(struct request *clone, blk_status_t error, bool mapped)
  178. {
  179. int r = DM_ENDIO_DONE;
  180. struct dm_rq_target_io *tio = clone->end_io_data;
  181. dm_request_endio_fn rq_end_io = NULL;
  182. if (tio->ti) {
  183. rq_end_io = tio->ti->type->rq_end_io;
  184. if (mapped && rq_end_io)
  185. r = rq_end_io(tio->ti, clone, error, &tio->info);
  186. }
  187. if (unlikely(error == BLK_STS_TARGET)) {
  188. if (req_op(clone) == REQ_OP_DISCARD &&
  189. !clone->q->limits.max_discard_sectors)
  190. disable_discard(tio->md);
  191. else if (req_op(clone) == REQ_OP_WRITE_SAME &&
  192. !clone->q->limits.max_write_same_sectors)
  193. disable_write_same(tio->md);
  194. else if (req_op(clone) == REQ_OP_WRITE_ZEROES &&
  195. !clone->q->limits.max_write_zeroes_sectors)
  196. disable_write_zeroes(tio->md);
  197. }
  198. switch (r) {
  199. case DM_ENDIO_DONE:
  200. /* The target wants to complete the I/O */
  201. dm_end_request(clone, error);
  202. break;
  203. case DM_ENDIO_INCOMPLETE:
  204. /* The target will handle the I/O */
  205. return;
  206. case DM_ENDIO_REQUEUE:
  207. /* The target wants to requeue the I/O */
  208. dm_requeue_original_request(tio, false);
  209. break;
  210. case DM_ENDIO_DELAY_REQUEUE:
  211. /* The target wants to requeue the I/O after a delay */
  212. dm_requeue_original_request(tio, true);
  213. break;
  214. default:
  215. DMWARN("unimplemented target endio return value: %d", r);
  216. BUG();
  217. }
  218. }
  219. /*
  220. * Request completion handler for request-based dm
  221. */
  222. static void dm_softirq_done(struct request *rq)
  223. {
  224. bool mapped = true;
  225. struct dm_rq_target_io *tio = tio_from_request(rq);
  226. struct request *clone = tio->clone;
  227. if (!clone) {
  228. struct mapped_device *md = tio->md;
  229. rq_end_stats(md, rq);
  230. blk_mq_end_request(rq, tio->error);
  231. rq_completed(md);
  232. return;
  233. }
  234. if (rq->rq_flags & RQF_FAILED)
  235. mapped = false;
  236. dm_done(clone, tio->error, mapped);
  237. }
  238. /*
  239. * Complete the clone and the original request with the error status
  240. * through softirq context.
  241. */
  242. static void dm_complete_request(struct request *rq, blk_status_t error)
  243. {
  244. struct dm_rq_target_io *tio = tio_from_request(rq);
  245. tio->error = error;
  246. blk_mq_complete_request(rq);
  247. }
  248. /*
  249. * Complete the not-mapped clone and the original request with the error status
  250. * through softirq context.
  251. * Target's rq_end_io() function isn't called.
  252. * This may be used when the target's clone_and_map_rq() function fails.
  253. */
  254. static void dm_kill_unmapped_request(struct request *rq, blk_status_t error)
  255. {
  256. rq->rq_flags |= RQF_FAILED;
  257. dm_complete_request(rq, error);
  258. }
  259. static void end_clone_request(struct request *clone, blk_status_t error)
  260. {
  261. struct dm_rq_target_io *tio = clone->end_io_data;
  262. dm_complete_request(tio->orig, error);
  263. }
  264. static blk_status_t dm_dispatch_clone_request(struct request *clone, struct request *rq)
  265. {
  266. blk_status_t r;
  267. if (blk_queue_io_stat(clone->q))
  268. clone->rq_flags |= RQF_IO_STAT;
  269. clone->start_time_ns = ktime_get_ns();
  270. r = blk_insert_cloned_request(clone->q, clone);
  271. if (r != BLK_STS_OK && r != BLK_STS_RESOURCE && r != BLK_STS_DEV_RESOURCE)
  272. /* must complete clone in terms of original request */
  273. dm_complete_request(rq, r);
  274. return r;
  275. }
  276. static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
  277. void *data)
  278. {
  279. struct dm_rq_target_io *tio = data;
  280. struct dm_rq_clone_bio_info *info =
  281. container_of(bio, struct dm_rq_clone_bio_info, clone);
  282. info->orig = bio_orig;
  283. info->tio = tio;
  284. bio->bi_end_io = end_clone_bio;
  285. return 0;
  286. }
  287. static int setup_clone(struct request *clone, struct request *rq,
  288. struct dm_rq_target_io *tio, gfp_t gfp_mask)
  289. {
  290. int r;
  291. r = blk_rq_prep_clone(clone, rq, &tio->md->bs, gfp_mask,
  292. dm_rq_bio_constructor, tio);
  293. if (r)
  294. return r;
  295. clone->end_io = end_clone_request;
  296. clone->end_io_data = tio;
  297. tio->clone = clone;
  298. return 0;
  299. }
  300. static void init_tio(struct dm_rq_target_io *tio, struct request *rq,
  301. struct mapped_device *md)
  302. {
  303. tio->md = md;
  304. tio->ti = NULL;
  305. tio->clone = NULL;
  306. tio->orig = rq;
  307. tio->error = 0;
  308. tio->completed = 0;
  309. /*
  310. * Avoid initializing info for blk-mq; it passes
  311. * target-specific data through info.ptr
  312. * (see: dm_mq_init_request)
  313. */
  314. if (!md->init_tio_pdu)
  315. memset(&tio->info, 0, sizeof(tio->info));
  316. }
  317. /*
  318. * Returns:
  319. * DM_MAPIO_* : the request has been processed as indicated
  320. * DM_MAPIO_REQUEUE : the original request needs to be immediately requeued
  321. * < 0 : the request was completed due to failure
  322. */
  323. static int map_request(struct dm_rq_target_io *tio)
  324. {
  325. int r;
  326. struct dm_target *ti = tio->ti;
  327. struct mapped_device *md = tio->md;
  328. struct request *rq = tio->orig;
  329. struct request *clone = NULL;
  330. blk_status_t ret;
  331. r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
  332. switch (r) {
  333. case DM_MAPIO_SUBMITTED:
  334. /* The target has taken the I/O to submit by itself later */
  335. break;
  336. case DM_MAPIO_REMAPPED:
  337. if (setup_clone(clone, rq, tio, GFP_ATOMIC)) {
  338. /* -ENOMEM */
  339. ti->type->release_clone_rq(clone, &tio->info);
  340. return DM_MAPIO_REQUEUE;
  341. }
  342. /* The target has remapped the I/O so dispatch it */
  343. trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)),
  344. blk_rq_pos(rq));
  345. ret = dm_dispatch_clone_request(clone, rq);
  346. if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {
  347. blk_rq_unprep_clone(clone);
  348. blk_mq_cleanup_rq(clone);
  349. tio->ti->type->release_clone_rq(clone, &tio->info);
  350. tio->clone = NULL;
  351. return DM_MAPIO_REQUEUE;
  352. }
  353. break;
  354. case DM_MAPIO_REQUEUE:
  355. /* The target wants to requeue the I/O */
  356. break;
  357. case DM_MAPIO_DELAY_REQUEUE:
  358. /* The target wants to requeue the I/O after a delay */
  359. dm_requeue_original_request(tio, true);
  360. break;
  361. case DM_MAPIO_KILL:
  362. /* The target wants to complete the I/O */
  363. dm_kill_unmapped_request(rq, BLK_STS_IOERR);
  364. break;
  365. default:
  366. DMWARN("unimplemented target map return value: %d", r);
  367. BUG();
  368. }
  369. return r;
  370. }
  371. /* DEPRECATED: previously used for request-based merge heuristic in dm_request_fn() */
  372. ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf)
  373. {
  374. return sprintf(buf, "%u\n", 0);
  375. }
  376. ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md,
  377. const char *buf, size_t count)
  378. {
  379. return count;
  380. }
  381. static void dm_start_request(struct mapped_device *md, struct request *orig)
  382. {
  383. blk_mq_start_request(orig);
  384. if (unlikely(dm_stats_used(&md->stats))) {
  385. struct dm_rq_target_io *tio = tio_from_request(orig);
  386. tio->duration_jiffies = jiffies;
  387. tio->n_sectors = blk_rq_sectors(orig);
  388. dm_stats_account_io(&md->stats, rq_data_dir(orig),
  389. blk_rq_pos(orig), tio->n_sectors, false, 0,
  390. &tio->stats_aux);
  391. }
  392. /*
  393. * Hold the md reference here for the in-flight I/O.
  394. * We can't rely on the reference count by device opener,
  395. * because the device may be closed during the request completion
  396. * when all bios are completed.
  397. * See the comment in rq_completed() too.
  398. */
  399. dm_get(md);
  400. }
  401. static int dm_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
  402. unsigned int hctx_idx, unsigned int numa_node)
  403. {
  404. struct mapped_device *md = set->driver_data;
  405. struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
  406. /*
  407. * Must initialize md member of tio, otherwise it won't
  408. * be available in dm_mq_queue_rq.
  409. */
  410. tio->md = md;
  411. if (md->init_tio_pdu) {
  412. /* target-specific per-io data is immediately after the tio */
  413. tio->info.ptr = tio + 1;
  414. }
  415. return 0;
  416. }
  417. static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
  418. const struct blk_mq_queue_data *bd)
  419. {
  420. struct request *rq = bd->rq;
  421. struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
  422. struct mapped_device *md = tio->md;
  423. struct dm_target *ti = md->immutable_target;
  424. if (unlikely(!ti)) {
  425. int srcu_idx;
  426. struct dm_table *map = dm_get_live_table(md, &srcu_idx);
  427. ti = dm_table_find_target(map, 0);
  428. dm_put_live_table(md, srcu_idx);
  429. }
  430. if (ti->type->busy && ti->type->busy(ti))
  431. return BLK_STS_RESOURCE;
  432. dm_start_request(md, rq);
  433. /* Init tio using md established in .init_request */
  434. init_tio(tio, rq, md);
  435. /*
  436. * Establish tio->ti before calling map_request().
  437. */
  438. tio->ti = ti;
  439. /* Direct call is fine since .queue_rq allows allocations */
  440. if (map_request(tio) == DM_MAPIO_REQUEUE) {
  441. /* Undo dm_start_request() before requeuing */
  442. rq_end_stats(md, rq);
  443. rq_completed(md);
  444. return BLK_STS_RESOURCE;
  445. }
  446. return BLK_STS_OK;
  447. }
  448. static const struct blk_mq_ops dm_mq_ops = {
  449. .queue_rq = dm_mq_queue_rq,
  450. .complete = dm_softirq_done,
  451. .init_request = dm_mq_init_request,
  452. };
  453. int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t)
  454. {
  455. struct request_queue *q;
  456. struct dm_target *immutable_tgt;
  457. int err;
  458. md->tag_set = kzalloc_node(sizeof(struct blk_mq_tag_set), GFP_KERNEL, md->numa_node_id);
  459. if (!md->tag_set)
  460. return -ENOMEM;
  461. md->tag_set->ops = &dm_mq_ops;
  462. md->tag_set->queue_depth = dm_get_blk_mq_queue_depth();
  463. md->tag_set->numa_node = md->numa_node_id;
  464. md->tag_set->flags = BLK_MQ_F_SHOULD_MERGE;
  465. md->tag_set->nr_hw_queues = dm_get_blk_mq_nr_hw_queues();
  466. md->tag_set->driver_data = md;
  467. md->tag_set->cmd_size = sizeof(struct dm_rq_target_io);
  468. immutable_tgt = dm_table_get_immutable_target(t);
  469. if (immutable_tgt && immutable_tgt->per_io_data_size) {
  470. /* any target-specific per-io data is immediately after the tio */
  471. md->tag_set->cmd_size += immutable_tgt->per_io_data_size;
  472. md->init_tio_pdu = true;
  473. }
  474. err = blk_mq_alloc_tag_set(md->tag_set);
  475. if (err)
  476. goto out_kfree_tag_set;
  477. q = blk_mq_init_allocated_queue(md->tag_set, md->queue, true);
  478. if (IS_ERR(q)) {
  479. err = PTR_ERR(q);
  480. goto out_tag_set;
  481. }
  482. return 0;
  483. out_tag_set:
  484. blk_mq_free_tag_set(md->tag_set);
  485. out_kfree_tag_set:
  486. kfree(md->tag_set);
  487. md->tag_set = NULL;
  488. return err;
  489. }
  490. void dm_mq_cleanup_mapped_device(struct mapped_device *md)
  491. {
  492. if (md->tag_set) {
  493. blk_mq_free_tag_set(md->tag_set);
  494. kfree(md->tag_set);
  495. md->tag_set = NULL;
  496. }
  497. }
  498. module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR);
  499. MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools");
  500. /* Unused, but preserved for userspace compatibility */
  501. static bool use_blk_mq = true;
  502. module_param(use_blk_mq, bool, S_IRUGO | S_IWUSR);
  503. MODULE_PARM_DESC(use_blk_mq, "Use block multiqueue for request-based DM devices");
  504. module_param(dm_mq_nr_hw_queues, uint, S_IRUGO | S_IWUSR);
  505. MODULE_PARM_DESC(dm_mq_nr_hw_queues, "Number of hardware queues for request-based dm-mq devices");
  506. module_param(dm_mq_queue_depth, uint, S_IRUGO | S_IWUSR);
  507. MODULE_PARM_DESC(dm_mq_queue_depth, "Queue depth for request-based dm-mq devices");