blk-ioc.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Functions related to io context handling
  4. */
  5. #include <linux/kernel.h>
  6. #include <linux/module.h>
  7. #include <linux/init.h>
  8. #include <linux/bio.h>
  9. #include <linux/blkdev.h>
  10. #include <linux/slab.h>
  11. #include <linux/sched/task.h>
  12. #include "blk.h"
  13. /*
  14. * For io context allocations
  15. */
  16. static struct kmem_cache *iocontext_cachep;
  17. /**
  18. * get_io_context - increment reference count to io_context
  19. * @ioc: io_context to get
  20. *
  21. * Increment reference count to @ioc.
  22. */
  23. void get_io_context(struct io_context *ioc)
  24. {
  25. BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
  26. atomic_long_inc(&ioc->refcount);
  27. }
  28. EXPORT_SYMBOL(get_io_context);
  29. static void icq_free_icq_rcu(struct rcu_head *head)
  30. {
  31. struct io_cq *icq = container_of(head, struct io_cq, __rcu_head);
  32. kmem_cache_free(icq->__rcu_icq_cache, icq);
  33. }
  34. /*
  35. * Exit an icq. Called with ioc locked for blk-mq, and with both ioc
  36. * and queue locked for legacy.
  37. */
  38. static void ioc_exit_icq(struct io_cq *icq)
  39. {
  40. struct elevator_type *et = icq->q->elevator->type;
  41. if (icq->flags & ICQ_EXITED)
  42. return;
  43. if (et->uses_mq && et->ops.mq.exit_icq)
  44. et->ops.mq.exit_icq(icq);
  45. else if (!et->uses_mq && et->ops.sq.elevator_exit_icq_fn)
  46. et->ops.sq.elevator_exit_icq_fn(icq);
  47. icq->flags |= ICQ_EXITED;
  48. }
  49. /*
  50. * Release an icq. Called with ioc locked for blk-mq, and with both ioc
  51. * and queue locked for legacy.
  52. */
  53. static void ioc_destroy_icq(struct io_cq *icq)
  54. {
  55. struct io_context *ioc = icq->ioc;
  56. struct request_queue *q = icq->q;
  57. struct elevator_type *et = q->elevator->type;
  58. lockdep_assert_held(&ioc->lock);
  59. radix_tree_delete(&ioc->icq_tree, icq->q->id);
  60. hlist_del_init(&icq->ioc_node);
  61. list_del_init(&icq->q_node);
  62. /*
  63. * Both setting lookup hint to and clearing it from @icq are done
  64. * under queue_lock. If it's not pointing to @icq now, it never
  65. * will. Hint assignment itself can race safely.
  66. */
  67. if (rcu_access_pointer(ioc->icq_hint) == icq)
  68. rcu_assign_pointer(ioc->icq_hint, NULL);
  69. ioc_exit_icq(icq);
  70. /*
  71. * @icq->q might have gone away by the time RCU callback runs
  72. * making it impossible to determine icq_cache. Record it in @icq.
  73. */
  74. icq->__rcu_icq_cache = et->icq_cache;
  75. icq->flags |= ICQ_DESTROYED;
  76. call_rcu(&icq->__rcu_head, icq_free_icq_rcu);
  77. }
  78. /*
  79. * Slow path for ioc release in put_io_context(). Performs double-lock
  80. * dancing to unlink all icq's and then frees ioc.
  81. */
  82. static void ioc_release_fn(struct work_struct *work)
  83. {
  84. struct io_context *ioc = container_of(work, struct io_context,
  85. release_work);
  86. unsigned long flags;
  87. /*
  88. * Exiting icq may call into put_io_context() through elevator
  89. * which will trigger lockdep warning. The ioc's are guaranteed to
  90. * be different, use a different locking subclass here. Use
  91. * irqsave variant as there's no spin_lock_irq_nested().
  92. */
  93. spin_lock_irqsave_nested(&ioc->lock, flags, 1);
  94. while (!hlist_empty(&ioc->icq_list)) {
  95. struct io_cq *icq = hlist_entry(ioc->icq_list.first,
  96. struct io_cq, ioc_node);
  97. struct request_queue *q = icq->q;
  98. if (spin_trylock(q->queue_lock)) {
  99. ioc_destroy_icq(icq);
  100. spin_unlock(q->queue_lock);
  101. } else {
  102. spin_unlock_irqrestore(&ioc->lock, flags);
  103. cpu_relax();
  104. spin_lock_irqsave_nested(&ioc->lock, flags, 1);
  105. }
  106. }
  107. spin_unlock_irqrestore(&ioc->lock, flags);
  108. kmem_cache_free(iocontext_cachep, ioc);
  109. }
  110. /**
  111. * put_io_context - put a reference of io_context
  112. * @ioc: io_context to put
  113. *
  114. * Decrement reference count of @ioc and release it if the count reaches
  115. * zero.
  116. */
  117. void put_io_context(struct io_context *ioc)
  118. {
  119. unsigned long flags;
  120. bool free_ioc = false;
  121. if (ioc == NULL)
  122. return;
  123. BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
  124. /*
  125. * Releasing ioc requires reverse order double locking and we may
  126. * already be holding a queue_lock. Do it asynchronously from wq.
  127. */
  128. if (atomic_long_dec_and_test(&ioc->refcount)) {
  129. spin_lock_irqsave(&ioc->lock, flags);
  130. if (!hlist_empty(&ioc->icq_list))
  131. queue_work(system_power_efficient_wq,
  132. &ioc->release_work);
  133. else
  134. free_ioc = true;
  135. spin_unlock_irqrestore(&ioc->lock, flags);
  136. }
  137. if (free_ioc)
  138. kmem_cache_free(iocontext_cachep, ioc);
  139. }
  140. EXPORT_SYMBOL(put_io_context);
  141. /**
  142. * put_io_context_active - put active reference on ioc
  143. * @ioc: ioc of interest
  144. *
  145. * Undo get_io_context_active(). If active reference reaches zero after
  146. * put, @ioc can never issue further IOs and ioscheds are notified.
  147. */
  148. void put_io_context_active(struct io_context *ioc)
  149. {
  150. struct elevator_type *et;
  151. unsigned long flags;
  152. struct io_cq *icq;
  153. if (!atomic_dec_and_test(&ioc->active_ref)) {
  154. put_io_context(ioc);
  155. return;
  156. }
  157. /*
  158. * Need ioc lock to walk icq_list and q lock to exit icq. Perform
  159. * reverse double locking. Read comment in ioc_release_fn() for
  160. * explanation on the nested locking annotation.
  161. */
  162. retry:
  163. spin_lock_irqsave_nested(&ioc->lock, flags, 1);
  164. hlist_for_each_entry(icq, &ioc->icq_list, ioc_node) {
  165. if (icq->flags & ICQ_EXITED)
  166. continue;
  167. et = icq->q->elevator->type;
  168. if (et->uses_mq) {
  169. ioc_exit_icq(icq);
  170. } else {
  171. if (spin_trylock(icq->q->queue_lock)) {
  172. ioc_exit_icq(icq);
  173. spin_unlock(icq->q->queue_lock);
  174. } else {
  175. spin_unlock_irqrestore(&ioc->lock, flags);
  176. cpu_relax();
  177. goto retry;
  178. }
  179. }
  180. }
  181. spin_unlock_irqrestore(&ioc->lock, flags);
  182. put_io_context(ioc);
  183. }
  184. /* Called by the exiting task */
  185. void exit_io_context(struct task_struct *task)
  186. {
  187. struct io_context *ioc;
  188. task_lock(task);
  189. ioc = task->io_context;
  190. task->io_context = NULL;
  191. task_unlock(task);
  192. atomic_dec(&ioc->nr_tasks);
  193. put_io_context_active(ioc);
  194. }
  195. static void __ioc_clear_queue(struct list_head *icq_list)
  196. {
  197. unsigned long flags;
  198. rcu_read_lock();
  199. while (!list_empty(icq_list)) {
  200. struct io_cq *icq = list_entry(icq_list->next,
  201. struct io_cq, q_node);
  202. struct io_context *ioc = icq->ioc;
  203. spin_lock_irqsave(&ioc->lock, flags);
  204. if (icq->flags & ICQ_DESTROYED) {
  205. spin_unlock_irqrestore(&ioc->lock, flags);
  206. continue;
  207. }
  208. ioc_destroy_icq(icq);
  209. spin_unlock_irqrestore(&ioc->lock, flags);
  210. }
  211. rcu_read_unlock();
  212. }
  213. /**
  214. * ioc_clear_queue - break any ioc association with the specified queue
  215. * @q: request_queue being cleared
  216. *
  217. * Walk @q->icq_list and exit all io_cq's.
  218. */
  219. void ioc_clear_queue(struct request_queue *q)
  220. {
  221. LIST_HEAD(icq_list);
  222. spin_lock_irq(q->queue_lock);
  223. list_splice_init(&q->icq_list, &icq_list);
  224. if (q->mq_ops) {
  225. spin_unlock_irq(q->queue_lock);
  226. __ioc_clear_queue(&icq_list);
  227. } else {
  228. __ioc_clear_queue(&icq_list);
  229. spin_unlock_irq(q->queue_lock);
  230. }
  231. }
  232. int create_task_io_context(struct task_struct *task, gfp_t gfp_flags, int node)
  233. {
  234. struct io_context *ioc;
  235. int ret;
  236. ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags | __GFP_ZERO,
  237. node);
  238. if (unlikely(!ioc))
  239. return -ENOMEM;
  240. /* initialize */
  241. atomic_long_set(&ioc->refcount, 1);
  242. atomic_set(&ioc->nr_tasks, 1);
  243. atomic_set(&ioc->active_ref, 1);
  244. spin_lock_init(&ioc->lock);
  245. INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC | __GFP_HIGH);
  246. INIT_HLIST_HEAD(&ioc->icq_list);
  247. INIT_WORK(&ioc->release_work, ioc_release_fn);
  248. /*
  249. * Try to install. ioc shouldn't be installed if someone else
  250. * already did or @task, which isn't %current, is exiting. Note
  251. * that we need to allow ioc creation on exiting %current as exit
  252. * path may issue IOs from e.g. exit_files(). The exit path is
  253. * responsible for not issuing IO after exit_io_context().
  254. */
  255. task_lock(task);
  256. if (!task->io_context &&
  257. (task == current || !(task->flags & PF_EXITING)))
  258. task->io_context = ioc;
  259. else
  260. kmem_cache_free(iocontext_cachep, ioc);
  261. ret = task->io_context ? 0 : -EBUSY;
  262. task_unlock(task);
  263. return ret;
  264. }
  265. /**
  266. * get_task_io_context - get io_context of a task
  267. * @task: task of interest
  268. * @gfp_flags: allocation flags, used if allocation is necessary
  269. * @node: allocation node, used if allocation is necessary
  270. *
  271. * Return io_context of @task. If it doesn't exist, it is created with
  272. * @gfp_flags and @node. The returned io_context has its reference count
  273. * incremented.
  274. *
  275. * This function always goes through task_lock() and it's better to use
  276. * %current->io_context + get_io_context() for %current.
  277. */
  278. struct io_context *get_task_io_context(struct task_struct *task,
  279. gfp_t gfp_flags, int node)
  280. {
  281. struct io_context *ioc;
  282. might_sleep_if(gfpflags_allow_blocking(gfp_flags));
  283. do {
  284. task_lock(task);
  285. ioc = task->io_context;
  286. if (likely(ioc)) {
  287. get_io_context(ioc);
  288. task_unlock(task);
  289. return ioc;
  290. }
  291. task_unlock(task);
  292. } while (!create_task_io_context(task, gfp_flags, node));
  293. return NULL;
  294. }
  295. EXPORT_SYMBOL(get_task_io_context);
  296. /**
  297. * ioc_lookup_icq - lookup io_cq from ioc
  298. * @ioc: the associated io_context
  299. * @q: the associated request_queue
  300. *
  301. * Look up io_cq associated with @ioc - @q pair from @ioc. Must be called
  302. * with @q->queue_lock held.
  303. */
  304. struct io_cq *ioc_lookup_icq(struct io_context *ioc, struct request_queue *q)
  305. {
  306. struct io_cq *icq;
  307. lockdep_assert_held(q->queue_lock);
  308. /*
  309. * icq's are indexed from @ioc using radix tree and hint pointer,
  310. * both of which are protected with RCU. All removals are done
  311. * holding both q and ioc locks, and we're holding q lock - if we
  312. * find a icq which points to us, it's guaranteed to be valid.
  313. */
  314. rcu_read_lock();
  315. icq = rcu_dereference(ioc->icq_hint);
  316. if (icq && icq->q == q)
  317. goto out;
  318. icq = radix_tree_lookup(&ioc->icq_tree, q->id);
  319. if (icq && icq->q == q)
  320. rcu_assign_pointer(ioc->icq_hint, icq); /* allowed to race */
  321. else
  322. icq = NULL;
  323. out:
  324. rcu_read_unlock();
  325. return icq;
  326. }
  327. EXPORT_SYMBOL(ioc_lookup_icq);
  328. /**
  329. * ioc_create_icq - create and link io_cq
  330. * @ioc: io_context of interest
  331. * @q: request_queue of interest
  332. * @gfp_mask: allocation mask
  333. *
  334. * Make sure io_cq linking @ioc and @q exists. If icq doesn't exist, they
  335. * will be created using @gfp_mask.
  336. *
  337. * The caller is responsible for ensuring @ioc won't go away and @q is
  338. * alive and will stay alive until this function returns.
  339. */
  340. struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q,
  341. gfp_t gfp_mask)
  342. {
  343. struct elevator_type *et = q->elevator->type;
  344. struct io_cq *icq;
  345. /* allocate stuff */
  346. icq = kmem_cache_alloc_node(et->icq_cache, gfp_mask | __GFP_ZERO,
  347. q->node);
  348. if (!icq)
  349. return NULL;
  350. if (radix_tree_maybe_preload(gfp_mask) < 0) {
  351. kmem_cache_free(et->icq_cache, icq);
  352. return NULL;
  353. }
  354. icq->ioc = ioc;
  355. icq->q = q;
  356. INIT_LIST_HEAD(&icq->q_node);
  357. INIT_HLIST_NODE(&icq->ioc_node);
  358. /* lock both q and ioc and try to link @icq */
  359. spin_lock_irq(q->queue_lock);
  360. spin_lock(&ioc->lock);
  361. if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) {
  362. hlist_add_head(&icq->ioc_node, &ioc->icq_list);
  363. list_add(&icq->q_node, &q->icq_list);
  364. if (et->uses_mq && et->ops.mq.init_icq)
  365. et->ops.mq.init_icq(icq);
  366. else if (!et->uses_mq && et->ops.sq.elevator_init_icq_fn)
  367. et->ops.sq.elevator_init_icq_fn(icq);
  368. } else {
  369. kmem_cache_free(et->icq_cache, icq);
  370. icq = ioc_lookup_icq(ioc, q);
  371. if (!icq)
  372. printk(KERN_ERR "cfq: icq link failed!\n");
  373. }
  374. spin_unlock(&ioc->lock);
  375. spin_unlock_irq(q->queue_lock);
  376. radix_tree_preload_end();
  377. return icq;
  378. }
  379. static int __init blk_ioc_init(void)
  380. {
  381. iocontext_cachep = kmem_cache_create("blkdev_ioc",
  382. sizeof(struct io_context), 0, SLAB_PANIC, NULL);
  383. return 0;
  384. }
  385. subsys_initcall(blk_ioc_init);