blk-cgroup.h 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. #ifndef _BLK_CGROUP_H
  3. #define _BLK_CGROUP_H
  4. /*
  5. * Common Block IO controller cgroup interface
  6. *
  7. * Based on ideas and code from CFQ, CFS and BFQ:
  8. * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
  9. *
  10. * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
  11. * Paolo Valente <paolo.valente@unimore.it>
  12. *
  13. * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
  14. * Nauman Rafique <nauman@google.com>
  15. */
  16. #include <linux/cgroup.h>
  17. #include <linux/percpu_counter.h>
  18. #include <linux/seq_file.h>
  19. #include <linux/radix-tree.h>
  20. #include <linux/blkdev.h>
  21. #include <linux/atomic.h>
  22. #include <linux/kthread.h>
  23. /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */
  24. #define BLKG_STAT_CPU_BATCH (INT_MAX / 2)
  25. /* Max limits for throttle policy */
  26. #define THROTL_IOPS_MAX UINT_MAX
  27. #ifdef CONFIG_BLK_CGROUP
  28. enum blkg_rwstat_type {
  29. BLKG_RWSTAT_READ,
  30. BLKG_RWSTAT_WRITE,
  31. BLKG_RWSTAT_SYNC,
  32. BLKG_RWSTAT_ASYNC,
  33. BLKG_RWSTAT_DISCARD,
  34. BLKG_RWSTAT_NR,
  35. BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
  36. };
  37. struct blkcg_gq;
  38. struct blkcg {
  39. struct cgroup_subsys_state css;
  40. spinlock_t lock;
  41. struct radix_tree_root blkg_tree;
  42. struct blkcg_gq __rcu *blkg_hint;
  43. struct hlist_head blkg_list;
  44. struct blkcg_policy_data *cpd[BLKCG_MAX_POLS];
  45. struct list_head all_blkcgs_node;
  46. #ifdef CONFIG_CGROUP_WRITEBACK
  47. struct list_head cgwb_list;
  48. refcount_t cgwb_refcnt;
  49. #endif
  50. };
  51. /*
  52. * blkg_[rw]stat->aux_cnt is excluded for local stats but included for
  53. * recursive. Used to carry stats of dead children, and, for blkg_rwstat,
  54. * to carry result values from read and sum operations.
  55. */
  56. struct blkg_stat {
  57. struct percpu_counter cpu_cnt;
  58. atomic64_t aux_cnt;
  59. };
  60. struct blkg_rwstat {
  61. struct percpu_counter cpu_cnt[BLKG_RWSTAT_NR];
  62. atomic64_t aux_cnt[BLKG_RWSTAT_NR];
  63. };
  64. /*
  65. * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a
  66. * request_queue (q). This is used by blkcg policies which need to track
  67. * information per blkcg - q pair.
  68. *
  69. * There can be multiple active blkcg policies and each blkg:policy pair is
  70. * represented by a blkg_policy_data which is allocated and freed by each
  71. * policy's pd_alloc/free_fn() methods. A policy can allocate private data
  72. * area by allocating larger data structure which embeds blkg_policy_data
  73. * at the beginning.
  74. */
  75. struct blkg_policy_data {
  76. /* the blkg and policy id this per-policy data belongs to */
  77. struct blkcg_gq *blkg;
  78. int plid;
  79. };
  80. /*
  81. * Policies that need to keep per-blkcg data which is independent from any
  82. * request_queue associated to it should implement cpd_alloc/free_fn()
  83. * methods. A policy can allocate private data area by allocating larger
  84. * data structure which embeds blkcg_policy_data at the beginning.
  85. * cpd_init() is invoked to let each policy handle per-blkcg data.
  86. */
  87. struct blkcg_policy_data {
  88. /* the blkcg and policy id this per-policy data belongs to */
  89. struct blkcg *blkcg;
  90. int plid;
  91. };
  92. /* association between a blk cgroup and a request queue */
  93. struct blkcg_gq {
  94. /* Pointer to the associated request_queue */
  95. struct request_queue *q;
  96. struct list_head q_node;
  97. struct hlist_node blkcg_node;
  98. struct blkcg *blkcg;
  99. /*
  100. * Each blkg gets congested separately and the congestion state is
  101. * propagated to the matching bdi_writeback_congested.
  102. */
  103. struct bdi_writeback_congested *wb_congested;
  104. /* all non-root blkcg_gq's are guaranteed to have access to parent */
  105. struct blkcg_gq *parent;
  106. /* request allocation list for this blkcg-q pair */
  107. struct request_list rl;
  108. /* reference count */
  109. atomic_t refcnt;
  110. /* is this blkg online? protected by both blkcg and q locks */
  111. bool online;
  112. struct blkg_rwstat stat_bytes;
  113. struct blkg_rwstat stat_ios;
  114. struct blkg_policy_data *pd[BLKCG_MAX_POLS];
  115. struct rcu_head rcu_head;
  116. atomic_t use_delay;
  117. atomic64_t delay_nsec;
  118. atomic64_t delay_start;
  119. u64 last_delay;
  120. int last_use;
  121. };
  122. typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp);
  123. typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd);
  124. typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd);
  125. typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd);
  126. typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp, int node);
  127. typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd);
  128. typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
  129. typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
  130. typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd);
  131. typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd);
  132. typedef size_t (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, char *buf,
  133. size_t size);
  134. struct blkcg_policy {
  135. int plid;
  136. /* cgroup files for the policy */
  137. struct cftype *dfl_cftypes;
  138. struct cftype *legacy_cftypes;
  139. /* operations */
  140. blkcg_pol_alloc_cpd_fn *cpd_alloc_fn;
  141. blkcg_pol_init_cpd_fn *cpd_init_fn;
  142. blkcg_pol_free_cpd_fn *cpd_free_fn;
  143. blkcg_pol_bind_cpd_fn *cpd_bind_fn;
  144. blkcg_pol_alloc_pd_fn *pd_alloc_fn;
  145. blkcg_pol_init_pd_fn *pd_init_fn;
  146. blkcg_pol_online_pd_fn *pd_online_fn;
  147. blkcg_pol_offline_pd_fn *pd_offline_fn;
  148. blkcg_pol_free_pd_fn *pd_free_fn;
  149. blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn;
  150. blkcg_pol_stat_pd_fn *pd_stat_fn;
  151. };
  152. extern struct blkcg blkcg_root;
  153. extern struct cgroup_subsys_state * const blkcg_root_css;
  154. struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
  155. struct request_queue *q, bool update_hint);
  156. struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
  157. struct request_queue *q);
  158. int blkcg_init_queue(struct request_queue *q);
  159. void blkcg_drain_queue(struct request_queue *q);
  160. void blkcg_exit_queue(struct request_queue *q);
  161. /* Blkio controller policy registration */
  162. int blkcg_policy_register(struct blkcg_policy *pol);
  163. void blkcg_policy_unregister(struct blkcg_policy *pol);
  164. int blkcg_activate_policy(struct request_queue *q,
  165. const struct blkcg_policy *pol);
  166. void blkcg_deactivate_policy(struct request_queue *q,
  167. const struct blkcg_policy *pol);
  168. const char *blkg_dev_name(struct blkcg_gq *blkg);
  169. void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
  170. u64 (*prfill)(struct seq_file *,
  171. struct blkg_policy_data *, int),
  172. const struct blkcg_policy *pol, int data,
  173. bool show_total);
  174. u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);
  175. u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
  176. const struct blkg_rwstat *rwstat);
  177. u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off);
  178. u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
  179. int off);
  180. int blkg_print_stat_bytes(struct seq_file *sf, void *v);
  181. int blkg_print_stat_ios(struct seq_file *sf, void *v);
  182. int blkg_print_stat_bytes_recursive(struct seq_file *sf, void *v);
  183. int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v);
  184. u64 blkg_stat_recursive_sum(struct blkcg_gq *blkg,
  185. struct blkcg_policy *pol, int off);
  186. struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg,
  187. struct blkcg_policy *pol, int off);
  188. struct blkg_conf_ctx {
  189. struct gendisk *disk;
  190. struct blkcg_gq *blkg;
  191. char *body;
  192. };
  193. int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
  194. char *input, struct blkg_conf_ctx *ctx);
  195. void blkg_conf_finish(struct blkg_conf_ctx *ctx);
  196. static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
  197. {
  198. return css ? container_of(css, struct blkcg, css) : NULL;
  199. }
  200. static inline struct blkcg *bio_blkcg(struct bio *bio)
  201. {
  202. struct cgroup_subsys_state *css;
  203. if (bio && bio->bi_css)
  204. return css_to_blkcg(bio->bi_css);
  205. css = kthread_blkcg();
  206. if (css)
  207. return css_to_blkcg(css);
  208. return css_to_blkcg(task_css(current, io_cgrp_id));
  209. }
  210. static inline bool blk_cgroup_congested(void)
  211. {
  212. struct cgroup_subsys_state *css;
  213. bool ret = false;
  214. rcu_read_lock();
  215. css = kthread_blkcg();
  216. if (!css)
  217. css = task_css(current, io_cgrp_id);
  218. while (css) {
  219. if (atomic_read(&css->cgroup->congestion_count)) {
  220. ret = true;
  221. break;
  222. }
  223. css = css->parent;
  224. }
  225. rcu_read_unlock();
  226. return ret;
  227. }
  228. /**
  229. * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
  230. * @return: true if this bio needs to be submitted with the root blkg context.
  231. *
  232. * In order to avoid priority inversions we sometimes need to issue a bio as if
  233. * it were attached to the root blkg, and then backcharge to the actual owning
  234. * blkg. The idea is we do bio_blkcg() to look up the actual context for the
  235. * bio and attach the appropriate blkg to the bio. Then we call this helper and
  236. * if it is true run with the root blkg for that queue and then do any
  237. * backcharging to the originating cgroup once the io is complete.
  238. */
  239. static inline bool bio_issue_as_root_blkg(struct bio *bio)
  240. {
  241. return (bio->bi_opf & (REQ_META | REQ_SWAP)) != 0;
  242. }
  243. /**
  244. * blkcg_parent - get the parent of a blkcg
  245. * @blkcg: blkcg of interest
  246. *
  247. * Return the parent blkcg of @blkcg. Can be called anytime.
  248. */
  249. static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
  250. {
  251. return css_to_blkcg(blkcg->css.parent);
  252. }
  253. /**
  254. * __blkg_lookup - internal version of blkg_lookup()
  255. * @blkcg: blkcg of interest
  256. * @q: request_queue of interest
  257. * @update_hint: whether to update lookup hint with the result or not
  258. *
  259. * This is internal version and shouldn't be used by policy
  260. * implementations. Looks up blkgs for the @blkcg - @q pair regardless of
  261. * @q's bypass state. If @update_hint is %true, the caller should be
  262. * holding @q->queue_lock and lookup hint is updated on success.
  263. */
  264. static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
  265. struct request_queue *q,
  266. bool update_hint)
  267. {
  268. struct blkcg_gq *blkg;
  269. if (blkcg == &blkcg_root)
  270. return q->root_blkg;
  271. blkg = rcu_dereference(blkcg->blkg_hint);
  272. if (blkg && blkg->q == q)
  273. return blkg;
  274. return blkg_lookup_slowpath(blkcg, q, update_hint);
  275. }
  276. /**
  277. * blkg_lookup - lookup blkg for the specified blkcg - q pair
  278. * @blkcg: blkcg of interest
  279. * @q: request_queue of interest
  280. *
  281. * Lookup blkg for the @blkcg - @q pair. This function should be called
  282. * under RCU read lock and is guaranteed to return %NULL if @q is bypassing
  283. * - see blk_queue_bypass_start() for details.
  284. */
  285. static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
  286. struct request_queue *q)
  287. {
  288. WARN_ON_ONCE(!rcu_read_lock_held());
  289. if (unlikely(blk_queue_bypass(q)))
  290. return NULL;
  291. return __blkg_lookup(blkcg, q, false);
  292. }
  293. /**
  294. * blk_queue_root_blkg - return blkg for the (blkcg_root, @q) pair
  295. * @q: request_queue of interest
  296. *
  297. * Lookup blkg for @q at the root level. See also blkg_lookup().
  298. */
  299. static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
  300. {
  301. return q->root_blkg;
  302. }
  303. /**
  304. * blkg_to_pdata - get policy private data
  305. * @blkg: blkg of interest
  306. * @pol: policy of interest
  307. *
  308. * Return pointer to private data associated with the @blkg-@pol pair.
  309. */
  310. static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
  311. struct blkcg_policy *pol)
  312. {
  313. return blkg ? blkg->pd[pol->plid] : NULL;
  314. }
  315. static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg,
  316. struct blkcg_policy *pol)
  317. {
  318. return blkcg ? blkcg->cpd[pol->plid] : NULL;
  319. }
  320. /**
  321. * pdata_to_blkg - get blkg associated with policy private data
  322. * @pd: policy private data of interest
  323. *
  324. * @pd is policy private data. Determine the blkg it's associated with.
  325. */
  326. static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd)
  327. {
  328. return pd ? pd->blkg : NULL;
  329. }
  330. static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd)
  331. {
  332. return cpd ? cpd->blkcg : NULL;
  333. }
  334. extern void blkcg_destroy_blkgs(struct blkcg *blkcg);
  335. #ifdef CONFIG_CGROUP_WRITEBACK
  336. /**
  337. * blkcg_cgwb_get - get a reference for blkcg->cgwb_list
  338. * @blkcg: blkcg of interest
  339. *
  340. * This is used to track the number of active wb's related to a blkcg.
  341. */
  342. static inline void blkcg_cgwb_get(struct blkcg *blkcg)
  343. {
  344. refcount_inc(&blkcg->cgwb_refcnt);
  345. }
  346. /**
  347. * blkcg_cgwb_put - put a reference for @blkcg->cgwb_list
  348. * @blkcg: blkcg of interest
  349. *
  350. * This is used to track the number of active wb's related to a blkcg.
  351. * When this count goes to zero, all active wb has finished so the
  352. * blkcg can continue destruction by calling blkcg_destroy_blkgs().
  353. * This work may occur in cgwb_release_workfn() on the cgwb_release
  354. * workqueue.
  355. */
  356. static inline void blkcg_cgwb_put(struct blkcg *blkcg)
  357. {
  358. if (refcount_dec_and_test(&blkcg->cgwb_refcnt))
  359. blkcg_destroy_blkgs(blkcg);
  360. }
  361. #else
  362. static inline void blkcg_cgwb_get(struct blkcg *blkcg) { }
  363. static inline void blkcg_cgwb_put(struct blkcg *blkcg)
  364. {
  365. /* wb isn't being accounted, so trigger destruction right away */
  366. blkcg_destroy_blkgs(blkcg);
  367. }
  368. #endif
  369. /**
  370. * blkg_path - format cgroup path of blkg
  371. * @blkg: blkg of interest
  372. * @buf: target buffer
  373. * @buflen: target buffer length
  374. *
  375. * Format the path of the cgroup of @blkg into @buf.
  376. */
  377. static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
  378. {
  379. return cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
  380. }
  381. /**
  382. * blkg_get - get a blkg reference
  383. * @blkg: blkg to get
  384. *
  385. * The caller should be holding an existing reference.
  386. */
  387. static inline void blkg_get(struct blkcg_gq *blkg)
  388. {
  389. WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
  390. atomic_inc(&blkg->refcnt);
  391. }
  392. /**
  393. * blkg_try_get - try and get a blkg reference
  394. * @blkg: blkg to get
  395. *
  396. * This is for use when doing an RCU lookup of the blkg. We may be in the midst
  397. * of freeing this blkg, so we can only use it if the refcnt is not zero.
  398. */
  399. static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg)
  400. {
  401. if (atomic_inc_not_zero(&blkg->refcnt))
  402. return blkg;
  403. return NULL;
  404. }
  405. void __blkg_release_rcu(struct rcu_head *rcu);
  406. /**
  407. * blkg_put - put a blkg reference
  408. * @blkg: blkg to put
  409. */
  410. static inline void blkg_put(struct blkcg_gq *blkg)
  411. {
  412. WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
  413. if (atomic_dec_and_test(&blkg->refcnt))
  414. call_rcu(&blkg->rcu_head, __blkg_release_rcu);
  415. }
  416. /**
  417. * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants
  418. * @d_blkg: loop cursor pointing to the current descendant
  419. * @pos_css: used for iteration
  420. * @p_blkg: target blkg to walk descendants of
  421. *
  422. * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU
  423. * read locked. If called under either blkcg or queue lock, the iteration
  424. * is guaranteed to include all and only online blkgs. The caller may
  425. * update @pos_css by calling css_rightmost_descendant() to skip subtree.
  426. * @p_blkg is included in the iteration and the first node to be visited.
  427. */
  428. #define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg) \
  429. css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css) \
  430. if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \
  431. (p_blkg)->q, false)))
  432. /**
  433. * blkg_for_each_descendant_post - post-order walk of a blkg's descendants
  434. * @d_blkg: loop cursor pointing to the current descendant
  435. * @pos_css: used for iteration
  436. * @p_blkg: target blkg to walk descendants of
  437. *
  438. * Similar to blkg_for_each_descendant_pre() but performs post-order
  439. * traversal instead. Synchronization rules are the same. @p_blkg is
  440. * included in the iteration and the last node to be visited.
  441. */
  442. #define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg) \
  443. css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css) \
  444. if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \
  445. (p_blkg)->q, false)))
  446. /**
  447. * blk_get_rl - get request_list to use
  448. * @q: request_queue of interest
  449. * @bio: bio which will be attached to the allocated request (may be %NULL)
  450. *
  451. * The caller wants to allocate a request from @q to use for @bio. Find
  452. * the request_list to use and obtain a reference on it. Should be called
  453. * under queue_lock. This function is guaranteed to return non-%NULL
  454. * request_list.
  455. */
  456. static inline struct request_list *blk_get_rl(struct request_queue *q,
  457. struct bio *bio)
  458. {
  459. struct blkcg *blkcg;
  460. struct blkcg_gq *blkg;
  461. rcu_read_lock();
  462. blkcg = bio_blkcg(bio);
  463. /* bypass blkg lookup and use @q->root_rl directly for root */
  464. if (blkcg == &blkcg_root)
  465. goto root_rl;
  466. /*
  467. * Try to use blkg->rl. blkg lookup may fail under memory pressure
  468. * or if either the blkcg or queue is going away. Fall back to
  469. * root_rl in such cases.
  470. */
  471. blkg = blkg_lookup(blkcg, q);
  472. if (unlikely(!blkg))
  473. goto root_rl;
  474. blkg_get(blkg);
  475. rcu_read_unlock();
  476. return &blkg->rl;
  477. root_rl:
  478. rcu_read_unlock();
  479. return &q->root_rl;
  480. }
  481. /**
  482. * blk_put_rl - put request_list
  483. * @rl: request_list to put
  484. *
  485. * Put the reference acquired by blk_get_rl(). Should be called under
  486. * queue_lock.
  487. */
  488. static inline void blk_put_rl(struct request_list *rl)
  489. {
  490. if (rl->blkg->blkcg != &blkcg_root)
  491. blkg_put(rl->blkg);
  492. }
  493. /**
  494. * blk_rq_set_rl - associate a request with a request_list
  495. * @rq: request of interest
  496. * @rl: target request_list
  497. *
  498. * Associate @rq with @rl so that accounting and freeing can know the
  499. * request_list @rq came from.
  500. */
  501. static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl)
  502. {
  503. rq->rl = rl;
  504. }
  505. /**
  506. * blk_rq_rl - return the request_list a request came from
  507. * @rq: request of interest
  508. *
  509. * Return the request_list @rq is allocated from.
  510. */
  511. static inline struct request_list *blk_rq_rl(struct request *rq)
  512. {
  513. return rq->rl;
  514. }
  515. struct request_list *__blk_queue_next_rl(struct request_list *rl,
  516. struct request_queue *q);
  517. /**
  518. * blk_queue_for_each_rl - iterate through all request_lists of a request_queue
  519. *
  520. * Should be used under queue_lock.
  521. */
  522. #define blk_queue_for_each_rl(rl, q) \
  523. for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q)))
  524. static inline int blkg_stat_init(struct blkg_stat *stat, gfp_t gfp)
  525. {
  526. int ret;
  527. ret = percpu_counter_init(&stat->cpu_cnt, 0, gfp);
  528. if (ret)
  529. return ret;
  530. atomic64_set(&stat->aux_cnt, 0);
  531. return 0;
  532. }
  533. static inline void blkg_stat_exit(struct blkg_stat *stat)
  534. {
  535. percpu_counter_destroy(&stat->cpu_cnt);
  536. }
  537. /**
  538. * blkg_stat_add - add a value to a blkg_stat
  539. * @stat: target blkg_stat
  540. * @val: value to add
  541. *
  542. * Add @val to @stat. The caller must ensure that IRQ on the same CPU
  543. * don't re-enter this function for the same counter.
  544. */
  545. static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val)
  546. {
  547. percpu_counter_add_batch(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH);
  548. }
  549. /**
  550. * blkg_stat_read - read the current value of a blkg_stat
  551. * @stat: blkg_stat to read
  552. */
  553. static inline uint64_t blkg_stat_read(struct blkg_stat *stat)
  554. {
  555. return percpu_counter_sum_positive(&stat->cpu_cnt);
  556. }
  557. /**
  558. * blkg_stat_reset - reset a blkg_stat
  559. * @stat: blkg_stat to reset
  560. */
  561. static inline void blkg_stat_reset(struct blkg_stat *stat)
  562. {
  563. percpu_counter_set(&stat->cpu_cnt, 0);
  564. atomic64_set(&stat->aux_cnt, 0);
  565. }
  566. /**
  567. * blkg_stat_add_aux - add a blkg_stat into another's aux count
  568. * @to: the destination blkg_stat
  569. * @from: the source
  570. *
  571. * Add @from's count including the aux one to @to's aux count.
  572. */
  573. static inline void blkg_stat_add_aux(struct blkg_stat *to,
  574. struct blkg_stat *from)
  575. {
  576. atomic64_add(blkg_stat_read(from) + atomic64_read(&from->aux_cnt),
  577. &to->aux_cnt);
  578. }
  579. static inline int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp)
  580. {
  581. int i, ret;
  582. for (i = 0; i < BLKG_RWSTAT_NR; i++) {
  583. ret = percpu_counter_init(&rwstat->cpu_cnt[i], 0, gfp);
  584. if (ret) {
  585. while (--i >= 0)
  586. percpu_counter_destroy(&rwstat->cpu_cnt[i]);
  587. return ret;
  588. }
  589. atomic64_set(&rwstat->aux_cnt[i], 0);
  590. }
  591. return 0;
  592. }
  593. static inline void blkg_rwstat_exit(struct blkg_rwstat *rwstat)
  594. {
  595. int i;
  596. for (i = 0; i < BLKG_RWSTAT_NR; i++)
  597. percpu_counter_destroy(&rwstat->cpu_cnt[i]);
  598. }
  599. /**
  600. * blkg_rwstat_add - add a value to a blkg_rwstat
  601. * @rwstat: target blkg_rwstat
  602. * @op: REQ_OP and flags
  603. * @val: value to add
  604. *
  605. * Add @val to @rwstat. The counters are chosen according to @rw. The
  606. * caller is responsible for synchronizing calls to this function.
  607. */
  608. static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
  609. unsigned int op, uint64_t val)
  610. {
  611. struct percpu_counter *cnt;
  612. if (op_is_discard(op))
  613. cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_DISCARD];
  614. else if (op_is_write(op))
  615. cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE];
  616. else
  617. cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ];
  618. percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH);
  619. if (op_is_sync(op))
  620. cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC];
  621. else
  622. cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC];
  623. percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH);
  624. }
  625. /**
  626. * blkg_rwstat_read - read the current values of a blkg_rwstat
  627. * @rwstat: blkg_rwstat to read
  628. *
  629. * Read the current snapshot of @rwstat and return it in the aux counts.
  630. */
  631. static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat)
  632. {
  633. struct blkg_rwstat result;
  634. int i;
  635. for (i = 0; i < BLKG_RWSTAT_NR; i++)
  636. atomic64_set(&result.aux_cnt[i],
  637. percpu_counter_sum_positive(&rwstat->cpu_cnt[i]));
  638. return result;
  639. }
  640. /**
  641. * blkg_rwstat_total - read the total count of a blkg_rwstat
  642. * @rwstat: blkg_rwstat to read
  643. *
  644. * Return the total count of @rwstat regardless of the IO direction. This
  645. * function can be called without synchronization and takes care of u64
  646. * atomicity.
  647. */
  648. static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat)
  649. {
  650. struct blkg_rwstat tmp = blkg_rwstat_read(rwstat);
  651. return atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) +
  652. atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_WRITE]);
  653. }
  654. /**
  655. * blkg_rwstat_reset - reset a blkg_rwstat
  656. * @rwstat: blkg_rwstat to reset
  657. */
  658. static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
  659. {
  660. int i;
  661. for (i = 0; i < BLKG_RWSTAT_NR; i++) {
  662. percpu_counter_set(&rwstat->cpu_cnt[i], 0);
  663. atomic64_set(&rwstat->aux_cnt[i], 0);
  664. }
  665. }
  666. /**
  667. * blkg_rwstat_add_aux - add a blkg_rwstat into another's aux count
  668. * @to: the destination blkg_rwstat
  669. * @from: the source
  670. *
  671. * Add @from's count including the aux one to @to's aux count.
  672. */
  673. static inline void blkg_rwstat_add_aux(struct blkg_rwstat *to,
  674. struct blkg_rwstat *from)
  675. {
  676. u64 sum[BLKG_RWSTAT_NR];
  677. int i;
  678. for (i = 0; i < BLKG_RWSTAT_NR; i++)
  679. sum[i] = percpu_counter_sum_positive(&from->cpu_cnt[i]);
  680. for (i = 0; i < BLKG_RWSTAT_NR; i++)
  681. atomic64_add(sum[i] + atomic64_read(&from->aux_cnt[i]),
  682. &to->aux_cnt[i]);
  683. }
  684. #ifdef CONFIG_BLK_DEV_THROTTLING
  685. extern bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
  686. struct bio *bio);
  687. #else
  688. static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
  689. struct bio *bio) { return false; }
  690. #endif
  691. static inline bool blkcg_bio_issue_check(struct request_queue *q,
  692. struct bio *bio)
  693. {
  694. struct blkcg *blkcg;
  695. struct blkcg_gq *blkg;
  696. bool throtl = false;
  697. rcu_read_lock();
  698. blkcg = bio_blkcg(bio);
  699. /* associate blkcg if bio hasn't attached one */
  700. bio_associate_blkcg(bio, &blkcg->css);
  701. blkg = blkg_lookup(blkcg, q);
  702. if (unlikely(!blkg)) {
  703. spin_lock_irq(q->queue_lock);
  704. blkg = blkg_lookup_create(blkcg, q);
  705. if (IS_ERR(blkg))
  706. blkg = NULL;
  707. spin_unlock_irq(q->queue_lock);
  708. }
  709. throtl = blk_throtl_bio(q, blkg, bio);
  710. if (!throtl) {
  711. blkg = blkg ?: q->root_blkg;
  712. /*
  713. * If the bio is flagged with BIO_QUEUE_ENTERED it means this
  714. * is a split bio and we would have already accounted for the
  715. * size of the bio.
  716. */
  717. if (!bio_flagged(bio, BIO_QUEUE_ENTERED))
  718. blkg_rwstat_add(&blkg->stat_bytes, bio->bi_opf,
  719. bio->bi_iter.bi_size);
  720. blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1);
  721. }
  722. rcu_read_unlock();
  723. return !throtl;
  724. }
  725. static inline void blkcg_use_delay(struct blkcg_gq *blkg)
  726. {
  727. if (atomic_add_return(1, &blkg->use_delay) == 1)
  728. atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
  729. }
  730. static inline int blkcg_unuse_delay(struct blkcg_gq *blkg)
  731. {
  732. int old = atomic_read(&blkg->use_delay);
  733. if (old == 0)
  734. return 0;
  735. /*
  736. * We do this song and dance because we can race with somebody else
  737. * adding or removing delay. If we just did an atomic_dec we'd end up
  738. * negative and we'd already be in trouble. We need to subtract 1 and
  739. * then check to see if we were the last delay so we can drop the
  740. * congestion count on the cgroup.
  741. */
  742. while (old) {
  743. int cur = atomic_cmpxchg(&blkg->use_delay, old, old - 1);
  744. if (cur == old)
  745. break;
  746. old = cur;
  747. }
  748. if (old == 0)
  749. return 0;
  750. if (old == 1)
  751. atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
  752. return 1;
  753. }
  754. static inline void blkcg_clear_delay(struct blkcg_gq *blkg)
  755. {
  756. int old = atomic_read(&blkg->use_delay);
  757. if (!old)
  758. return;
  759. /* We only want 1 person clearing the congestion count for this blkg. */
  760. while (old) {
  761. int cur = atomic_cmpxchg(&blkg->use_delay, old, 0);
  762. if (cur == old) {
  763. atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
  764. break;
  765. }
  766. old = cur;
  767. }
  768. }
  769. void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta);
  770. void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay);
  771. void blkcg_maybe_throttle_current(void);
  772. #else /* CONFIG_BLK_CGROUP */
  773. struct blkcg {
  774. };
  775. struct blkg_policy_data {
  776. };
  777. struct blkcg_policy_data {
  778. };
  779. struct blkcg_gq {
  780. };
  781. struct blkcg_policy {
  782. };
  783. #define blkcg_root_css ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL))
  784. static inline void blkcg_maybe_throttle_current(void) { }
  785. static inline bool blk_cgroup_congested(void) { return false; }
  786. #ifdef CONFIG_BLOCK
  787. static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { }
  788. static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
  789. static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
  790. { return NULL; }
  791. static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
  792. static inline void blkcg_drain_queue(struct request_queue *q) { }
  793. static inline void blkcg_exit_queue(struct request_queue *q) { }
  794. static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
  795. static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { }
  796. static inline int blkcg_activate_policy(struct request_queue *q,
  797. const struct blkcg_policy *pol) { return 0; }
  798. static inline void blkcg_deactivate_policy(struct request_queue *q,
  799. const struct blkcg_policy *pol) { }
  800. static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
  801. static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
  802. struct blkcg_policy *pol) { return NULL; }
  803. static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
  804. static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
  805. static inline void blkg_get(struct blkcg_gq *blkg) { }
  806. static inline void blkg_put(struct blkcg_gq *blkg) { }
  807. static inline struct request_list *blk_get_rl(struct request_queue *q,
  808. struct bio *bio) { return &q->root_rl; }
  809. static inline void blk_put_rl(struct request_list *rl) { }
  810. static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { }
  811. static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; }
  812. static inline bool blkcg_bio_issue_check(struct request_queue *q,
  813. struct bio *bio) { return true; }
  814. #define blk_queue_for_each_rl(rl, q) \
  815. for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
  816. #endif /* CONFIG_BLOCK */
  817. #endif /* CONFIG_BLK_CGROUP */
  818. #endif /* _BLK_CGROUP_H */