blk-mq-tag.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497
  1. /*
  2. * Tag allocation using scalable bitmaps. Uses active queue tracking to support
  3. * fairer distribution of tags between multiple submitters when a shared tag map
  4. * is used.
  5. *
  6. * Copyright (C) 2013-2014 Jens Axboe
  7. */
  8. #include <linux/kernel.h>
  9. #include <linux/module.h>
  10. #include <linux/blk-mq.h>
  11. #include "blk.h"
  12. #include "blk-mq.h"
  13. #include "blk-mq-tag.h"
  14. bool blk_mq_has_free_tags(struct blk_mq_tags *tags)
  15. {
  16. if (!tags)
  17. return true;
  18. return sbitmap_any_bit_clear(&tags->bitmap_tags.sb);
  19. }
  20. /*
  21. * If a previously inactive queue goes active, bump the active user count.
  22. */
  23. bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
  24. {
  25. if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
  26. !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
  27. atomic_inc(&hctx->tags->active_queues);
  28. return true;
  29. }
  30. /*
  31. * Wakeup all potentially sleeping on tags
  32. */
  33. void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
  34. {
  35. sbitmap_queue_wake_all(&tags->bitmap_tags);
  36. if (include_reserve)
  37. sbitmap_queue_wake_all(&tags->breserved_tags);
  38. }
  39. /*
  40. * If a previously busy queue goes inactive, potential waiters could now
  41. * be allowed to queue. Wake them up and check.
  42. */
  43. void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
  44. {
  45. struct blk_mq_tags *tags = hctx->tags;
  46. if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
  47. return;
  48. atomic_dec(&tags->active_queues);
  49. blk_mq_tag_wakeup_all(tags, false);
  50. }
  51. /*
  52. * For shared tag users, we track the number of currently active users
  53. * and attempt to provide a fair share of the tag depth for each of them.
  54. */
  55. static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
  56. struct sbitmap_queue *bt)
  57. {
  58. unsigned int depth, users;
  59. if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_SHARED))
  60. return true;
  61. if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
  62. return true;
  63. /*
  64. * Don't try dividing an ant
  65. */
  66. if (bt->sb.depth == 1)
  67. return true;
  68. users = atomic_read(&hctx->tags->active_queues);
  69. if (!users)
  70. return true;
  71. /*
  72. * Allow at least some tags
  73. */
  74. depth = max((bt->sb.depth + users - 1) / users, 4U);
  75. return atomic_read(&hctx->nr_active) < depth;
  76. }
  77. static int __blk_mq_get_tag(struct blk_mq_alloc_data *data,
  78. struct sbitmap_queue *bt)
  79. {
  80. if (!(data->flags & BLK_MQ_REQ_INTERNAL) &&
  81. !hctx_may_queue(data->hctx, bt))
  82. return -1;
  83. if (data->shallow_depth)
  84. return __sbitmap_queue_get_shallow(bt, data->shallow_depth);
  85. else
  86. return __sbitmap_queue_get(bt);
  87. }
  88. unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
  89. {
  90. struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
  91. struct sbitmap_queue *bt;
  92. struct sbq_wait_state *ws;
  93. DEFINE_WAIT(wait);
  94. unsigned int tag_offset;
  95. bool drop_ctx;
  96. int tag;
  97. if (data->flags & BLK_MQ_REQ_RESERVED) {
  98. if (unlikely(!tags->nr_reserved_tags)) {
  99. WARN_ON_ONCE(1);
  100. return BLK_MQ_TAG_FAIL;
  101. }
  102. bt = &tags->breserved_tags;
  103. tag_offset = 0;
  104. } else {
  105. bt = &tags->bitmap_tags;
  106. tag_offset = tags->nr_reserved_tags;
  107. }
  108. tag = __blk_mq_get_tag(data, bt);
  109. if (tag != -1)
  110. goto found_tag;
  111. if (data->flags & BLK_MQ_REQ_NOWAIT)
  112. return BLK_MQ_TAG_FAIL;
  113. ws = bt_wait_ptr(bt, data->hctx);
  114. drop_ctx = data->ctx == NULL;
  115. do {
  116. prepare_to_wait(&ws->wait, &wait, TASK_UNINTERRUPTIBLE);
  117. tag = __blk_mq_get_tag(data, bt);
  118. if (tag != -1)
  119. break;
  120. /*
  121. * We're out of tags on this hardware queue, kick any
  122. * pending IO submits before going to sleep waiting for
  123. * some to complete.
  124. */
  125. blk_mq_run_hw_queue(data->hctx, false);
  126. /*
  127. * Retry tag allocation after running the hardware queue,
  128. * as running the queue may also have found completions.
  129. */
  130. tag = __blk_mq_get_tag(data, bt);
  131. if (tag != -1)
  132. break;
  133. if (data->ctx)
  134. blk_mq_put_ctx(data->ctx);
  135. io_schedule();
  136. data->ctx = blk_mq_get_ctx(data->q);
  137. data->hctx = blk_mq_map_queue(data->q, data->ctx->cpu);
  138. tags = blk_mq_tags_from_data(data);
  139. if (data->flags & BLK_MQ_REQ_RESERVED)
  140. bt = &tags->breserved_tags;
  141. else
  142. bt = &tags->bitmap_tags;
  143. finish_wait(&ws->wait, &wait);
  144. ws = bt_wait_ptr(bt, data->hctx);
  145. } while (1);
  146. if (drop_ctx && data->ctx)
  147. blk_mq_put_ctx(data->ctx);
  148. finish_wait(&ws->wait, &wait);
  149. found_tag:
  150. return tag + tag_offset;
  151. }
  152. void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags,
  153. struct blk_mq_ctx *ctx, unsigned int tag)
  154. {
  155. if (!blk_mq_tag_is_reserved(tags, tag)) {
  156. const int real_tag = tag - tags->nr_reserved_tags;
  157. BUG_ON(real_tag >= tags->nr_tags);
  158. sbitmap_queue_clear(&tags->bitmap_tags, real_tag, ctx->cpu);
  159. } else {
  160. BUG_ON(tag >= tags->nr_reserved_tags);
  161. sbitmap_queue_clear(&tags->breserved_tags, tag, ctx->cpu);
  162. }
  163. }
  164. struct bt_iter_data {
  165. struct blk_mq_hw_ctx *hctx;
  166. busy_iter_fn *fn;
  167. void *data;
  168. bool reserved;
  169. };
  170. static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
  171. {
  172. struct bt_iter_data *iter_data = data;
  173. struct blk_mq_hw_ctx *hctx = iter_data->hctx;
  174. struct blk_mq_tags *tags = hctx->tags;
  175. bool reserved = iter_data->reserved;
  176. struct request *rq;
  177. if (!reserved)
  178. bitnr += tags->nr_reserved_tags;
  179. rq = tags->rqs[bitnr];
  180. /*
  181. * We can hit rq == NULL here, because the tagging functions
  182. * test and set the bit before assining ->rqs[].
  183. */
  184. if (rq && rq->q == hctx->queue)
  185. iter_data->fn(hctx, rq, iter_data->data, reserved);
  186. return true;
  187. }
  188. static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt,
  189. busy_iter_fn *fn, void *data, bool reserved)
  190. {
  191. struct bt_iter_data iter_data = {
  192. .hctx = hctx,
  193. .fn = fn,
  194. .data = data,
  195. .reserved = reserved,
  196. };
  197. sbitmap_for_each_set(&bt->sb, bt_iter, &iter_data);
  198. }
  199. struct bt_tags_iter_data {
  200. struct blk_mq_tags *tags;
  201. busy_tag_iter_fn *fn;
  202. void *data;
  203. bool reserved;
  204. };
  205. static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
  206. {
  207. struct bt_tags_iter_data *iter_data = data;
  208. struct blk_mq_tags *tags = iter_data->tags;
  209. bool reserved = iter_data->reserved;
  210. struct request *rq;
  211. if (!reserved)
  212. bitnr += tags->nr_reserved_tags;
  213. /*
  214. * We can hit rq == NULL here, because the tagging functions
  215. * test and set the bit before assining ->rqs[].
  216. */
  217. rq = tags->rqs[bitnr];
  218. if (rq)
  219. iter_data->fn(rq, iter_data->data, reserved);
  220. return true;
  221. }
  222. static void bt_tags_for_each(struct blk_mq_tags *tags, struct sbitmap_queue *bt,
  223. busy_tag_iter_fn *fn, void *data, bool reserved)
  224. {
  225. struct bt_tags_iter_data iter_data = {
  226. .tags = tags,
  227. .fn = fn,
  228. .data = data,
  229. .reserved = reserved,
  230. };
  231. if (tags->rqs)
  232. sbitmap_for_each_set(&bt->sb, bt_tags_iter, &iter_data);
  233. }
  234. static void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags,
  235. busy_tag_iter_fn *fn, void *priv)
  236. {
  237. if (tags->nr_reserved_tags)
  238. bt_tags_for_each(tags, &tags->breserved_tags, fn, priv, true);
  239. bt_tags_for_each(tags, &tags->bitmap_tags, fn, priv, false);
  240. }
  241. void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
  242. busy_tag_iter_fn *fn, void *priv)
  243. {
  244. int i;
  245. for (i = 0; i < tagset->nr_hw_queues; i++) {
  246. if (tagset->tags && tagset->tags[i])
  247. blk_mq_all_tag_busy_iter(tagset->tags[i], fn, priv);
  248. }
  249. }
  250. EXPORT_SYMBOL(blk_mq_tagset_busy_iter);
  251. int blk_mq_reinit_tagset(struct blk_mq_tag_set *set,
  252. int (reinit_request)(void *, struct request *))
  253. {
  254. int i, j, ret = 0;
  255. if (WARN_ON_ONCE(!reinit_request))
  256. goto out;
  257. for (i = 0; i < set->nr_hw_queues; i++) {
  258. struct blk_mq_tags *tags = set->tags[i];
  259. if (!tags)
  260. continue;
  261. for (j = 0; j < tags->nr_tags; j++) {
  262. if (!tags->static_rqs[j])
  263. continue;
  264. ret = reinit_request(set->driver_data,
  265. tags->static_rqs[j]);
  266. if (ret)
  267. goto out;
  268. }
  269. }
  270. out:
  271. return ret;
  272. }
  273. EXPORT_SYMBOL_GPL(blk_mq_reinit_tagset);
  274. void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
  275. void *priv)
  276. {
  277. struct blk_mq_hw_ctx *hctx;
  278. int i;
  279. /*
  280. * __blk_mq_update_nr_hw_queues will update the nr_hw_queues and
  281. * queue_hw_ctx after freeze the queue, so we use q_usage_counter
  282. * to avoid race with it.
  283. */
  284. if (!percpu_ref_tryget(&q->q_usage_counter))
  285. return;
  286. queue_for_each_hw_ctx(q, hctx, i) {
  287. struct blk_mq_tags *tags = hctx->tags;
  288. /*
  289. * If not software queues are currently mapped to this
  290. * hardware queue, there's nothing to check
  291. */
  292. if (!blk_mq_hw_queue_mapped(hctx))
  293. continue;
  294. if (tags->nr_reserved_tags)
  295. bt_for_each(hctx, &tags->breserved_tags, fn, priv, true);
  296. bt_for_each(hctx, &tags->bitmap_tags, fn, priv, false);
  297. }
  298. blk_queue_exit(q);
  299. }
  300. static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth,
  301. bool round_robin, int node)
  302. {
  303. return sbitmap_queue_init_node(bt, depth, -1, round_robin, GFP_KERNEL,
  304. node);
  305. }
  306. static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags,
  307. int node, int alloc_policy)
  308. {
  309. unsigned int depth = tags->nr_tags - tags->nr_reserved_tags;
  310. bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR;
  311. if (bt_alloc(&tags->bitmap_tags, depth, round_robin, node))
  312. goto free_tags;
  313. if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, round_robin,
  314. node))
  315. goto free_bitmap_tags;
  316. return tags;
  317. free_bitmap_tags:
  318. sbitmap_queue_free(&tags->bitmap_tags);
  319. free_tags:
  320. kfree(tags);
  321. return NULL;
  322. }
  323. struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
  324. unsigned int reserved_tags,
  325. int node, int alloc_policy)
  326. {
  327. struct blk_mq_tags *tags;
  328. if (total_tags > BLK_MQ_TAG_MAX) {
  329. pr_err("blk-mq: tag depth too large\n");
  330. return NULL;
  331. }
  332. tags = kzalloc_node(sizeof(*tags), GFP_KERNEL, node);
  333. if (!tags)
  334. return NULL;
  335. tags->nr_tags = total_tags;
  336. tags->nr_reserved_tags = reserved_tags;
  337. return blk_mq_init_bitmap_tags(tags, node, alloc_policy);
  338. }
  339. void blk_mq_free_tags(struct blk_mq_tags *tags)
  340. {
  341. sbitmap_queue_free(&tags->bitmap_tags);
  342. sbitmap_queue_free(&tags->breserved_tags);
  343. kfree(tags);
  344. }
  345. int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
  346. struct blk_mq_tags **tagsptr, unsigned int tdepth,
  347. bool can_grow)
  348. {
  349. struct blk_mq_tags *tags = *tagsptr;
  350. if (tdepth <= tags->nr_reserved_tags)
  351. return -EINVAL;
  352. /*
  353. * If we are allowed to grow beyond the original size, allocate
  354. * a new set of tags before freeing the old one.
  355. */
  356. if (tdepth > tags->nr_tags) {
  357. struct blk_mq_tag_set *set = hctx->queue->tag_set;
  358. struct blk_mq_tags *new;
  359. bool ret;
  360. if (!can_grow)
  361. return -EINVAL;
  362. /*
  363. * We need some sort of upper limit, set it high enough that
  364. * no valid use cases should require more.
  365. */
  366. if (tdepth > 16 * BLKDEV_MAX_RQ)
  367. return -EINVAL;
  368. new = blk_mq_alloc_rq_map(set, hctx->queue_num, tdepth,
  369. tags->nr_reserved_tags);
  370. if (!new)
  371. return -ENOMEM;
  372. ret = blk_mq_alloc_rqs(set, new, hctx->queue_num, tdepth);
  373. if (ret) {
  374. blk_mq_free_rq_map(new);
  375. return -ENOMEM;
  376. }
  377. blk_mq_free_rqs(set, *tagsptr, hctx->queue_num);
  378. blk_mq_free_rq_map(*tagsptr);
  379. *tagsptr = new;
  380. } else {
  381. /*
  382. * Don't need (or can't) update reserved tags here, they
  383. * remain static and should never need resizing.
  384. */
  385. sbitmap_queue_resize(&tags->bitmap_tags,
  386. tdepth - tags->nr_reserved_tags);
  387. }
  388. return 0;
  389. }
  390. /**
  391. * blk_mq_unique_tag() - return a tag that is unique queue-wide
  392. * @rq: request for which to compute a unique tag
  393. *
  394. * The tag field in struct request is unique per hardware queue but not over
  395. * all hardware queues. Hence this function that returns a tag with the
  396. * hardware context index in the upper bits and the per hardware queue tag in
  397. * the lower bits.
  398. *
  399. * Note: When called for a request that is queued on a non-multiqueue request
  400. * queue, the hardware context index is set to zero.
  401. */
  402. u32 blk_mq_unique_tag(struct request *rq)
  403. {
  404. struct request_queue *q = rq->q;
  405. struct blk_mq_hw_ctx *hctx;
  406. int hwq = 0;
  407. if (q->mq_ops) {
  408. hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu);
  409. hwq = hctx->queue_num;
  410. }
  411. return (hwq << BLK_MQ_UNIQUE_TAG_BITS) |
  412. (rq->tag & BLK_MQ_UNIQUE_TAG_MASK);
  413. }
  414. EXPORT_SYMBOL(blk_mq_unique_tag);