bfq-iosched.h 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996
  1. /*
  2. * Header file for the BFQ I/O scheduler: data structures and
  3. * prototypes of interface functions among BFQ components.
  4. *
  5. * This program is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU General Public License as
  7. * published by the Free Software Foundation; either version 2 of the
  8. * License, or (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * General Public License for more details.
  14. */
  15. #ifndef _BFQ_H
  16. #define _BFQ_H
  17. #include <linux/blktrace_api.h>
  18. #include <linux/hrtimer.h>
  19. #include <linux/blk-cgroup.h>
  20. #define BFQ_IOPRIO_CLASSES 3
  21. #define BFQ_CL_IDLE_TIMEOUT (HZ/5)
  22. #define BFQ_MIN_WEIGHT 1
  23. #define BFQ_MAX_WEIGHT 1000
  24. #define BFQ_WEIGHT_CONVERSION_COEFF 10
  25. #define BFQ_DEFAULT_QUEUE_IOPRIO 4
  26. #define BFQ_WEIGHT_LEGACY_DFL 100
  27. #define BFQ_DEFAULT_GRP_IOPRIO 0
  28. #define BFQ_DEFAULT_GRP_CLASS IOPRIO_CLASS_BE
  29. /*
  30. * Soft real-time applications are extremely more latency sensitive
  31. * than interactive ones. Over-raise the weight of the former to
  32. * privilege them against the latter.
  33. */
  34. #define BFQ_SOFTRT_WEIGHT_FACTOR 100
  35. struct bfq_entity;
  36. /**
  37. * struct bfq_service_tree - per ioprio_class service tree.
  38. *
  39. * Each service tree represents a B-WF2Q+ scheduler on its own. Each
  40. * ioprio_class has its own independent scheduler, and so its own
  41. * bfq_service_tree. All the fields are protected by the queue lock
  42. * of the containing bfqd.
  43. */
  44. struct bfq_service_tree {
  45. /* tree for active entities (i.e., those backlogged) */
  46. struct rb_root active;
  47. /* tree for idle entities (i.e., not backlogged, with V < F_i)*/
  48. struct rb_root idle;
  49. /* idle entity with minimum F_i */
  50. struct bfq_entity *first_idle;
  51. /* idle entity with maximum F_i */
  52. struct bfq_entity *last_idle;
  53. /* scheduler virtual time */
  54. u64 vtime;
  55. /* scheduler weight sum; active and idle entities contribute to it */
  56. unsigned long wsum;
  57. };
  58. /**
  59. * struct bfq_sched_data - multi-class scheduler.
  60. *
  61. * bfq_sched_data is the basic scheduler queue. It supports three
  62. * ioprio_classes, and can be used either as a toplevel queue or as an
  63. * intermediate queue in a hierarchical setup.
  64. *
  65. * The supported ioprio_classes are the same as in CFQ, in descending
  66. * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE.
  67. * Requests from higher priority queues are served before all the
  68. * requests from lower priority queues; among requests of the same
  69. * queue requests are served according to B-WF2Q+.
  70. *
  71. * The schedule is implemented by the service trees, plus the field
  72. * @next_in_service, which points to the entity on the active trees
  73. * that will be served next, if 1) no changes in the schedule occurs
  74. * before the current in-service entity is expired, 2) the in-service
  75. * queue becomes idle when it expires, and 3) if the entity pointed by
  76. * in_service_entity is not a queue, then the in-service child entity
  77. * of the entity pointed by in_service_entity becomes idle on
  78. * expiration. This peculiar definition allows for the following
  79. * optimization, not yet exploited: while a given entity is still in
  80. * service, we already know which is the best candidate for next
  81. * service among the other active entitities in the same parent
  82. * entity. We can then quickly compare the timestamps of the
  83. * in-service entity with those of such best candidate.
  84. *
  85. * All fields are protected by the lock of the containing bfqd.
  86. */
  87. struct bfq_sched_data {
  88. /* entity in service */
  89. struct bfq_entity *in_service_entity;
  90. /* head-of-line entity (see comments above) */
  91. struct bfq_entity *next_in_service;
  92. /* array of service trees, one per ioprio_class */
  93. struct bfq_service_tree service_tree[BFQ_IOPRIO_CLASSES];
  94. /* last time CLASS_IDLE was served */
  95. unsigned long bfq_class_idle_last_service;
  96. };
  97. /**
  98. * struct bfq_weight_counter - counter of the number of all active entities
  99. * with a given weight.
  100. */
  101. struct bfq_weight_counter {
  102. unsigned int weight; /* weight of the entities this counter refers to */
  103. unsigned int num_active; /* nr of active entities with this weight */
  104. /*
  105. * Weights tree member (see bfq_data's @queue_weights_tree and
  106. * @group_weights_tree)
  107. */
  108. struct rb_node weights_node;
  109. };
  110. /**
  111. * struct bfq_entity - schedulable entity.
  112. *
  113. * A bfq_entity is used to represent either a bfq_queue (leaf node in the
  114. * cgroup hierarchy) or a bfq_group into the upper level scheduler. Each
  115. * entity belongs to the sched_data of the parent group in the cgroup
  116. * hierarchy. Non-leaf entities have also their own sched_data, stored
  117. * in @my_sched_data.
  118. *
  119. * Each entity stores independently its priority values; this would
  120. * allow different weights on different devices, but this
  121. * functionality is not exported to userspace by now. Priorities and
  122. * weights are updated lazily, first storing the new values into the
  123. * new_* fields, then setting the @prio_changed flag. As soon as
  124. * there is a transition in the entity state that allows the priority
  125. * update to take place the effective and the requested priority
  126. * values are synchronized.
  127. *
  128. * Unless cgroups are used, the weight value is calculated from the
  129. * ioprio to export the same interface as CFQ. When dealing with
  130. * ``well-behaved'' queues (i.e., queues that do not spend too much
  131. * time to consume their budget and have true sequential behavior, and
  132. * when there are no external factors breaking anticipation) the
  133. * relative weights at each level of the cgroups hierarchy should be
  134. * guaranteed. All the fields are protected by the queue lock of the
  135. * containing bfqd.
  136. */
  137. struct bfq_entity {
  138. /* service_tree member */
  139. struct rb_node rb_node;
  140. /* pointer to the weight counter associated with this entity */
  141. struct bfq_weight_counter *weight_counter;
  142. /*
  143. * Flag, true if the entity is on a tree (either the active or
  144. * the idle one of its service_tree) or is in service.
  145. */
  146. bool on_st;
  147. /* B-WF2Q+ start and finish timestamps [sectors/weight] */
  148. u64 start, finish;
  149. /* tree the entity is enqueued into; %NULL if not on a tree */
  150. struct rb_root *tree;
  151. /*
  152. * minimum start time of the (active) subtree rooted at this
  153. * entity; used for O(log N) lookups into active trees
  154. */
  155. u64 min_start;
  156. /* amount of service received during the last service slot */
  157. int service;
  158. /* budget, used also to calculate F_i: F_i = S_i + @budget / @weight */
  159. int budget;
  160. /* weight of the queue */
  161. int weight;
  162. /* next weight if a change is in progress */
  163. int new_weight;
  164. /* original weight, used to implement weight boosting */
  165. int orig_weight;
  166. /* parent entity, for hierarchical scheduling */
  167. struct bfq_entity *parent;
  168. /*
  169. * For non-leaf nodes in the hierarchy, the associated
  170. * scheduler queue, %NULL on leaf nodes.
  171. */
  172. struct bfq_sched_data *my_sched_data;
  173. /* the scheduler queue this entity belongs to */
  174. struct bfq_sched_data *sched_data;
  175. /* flag, set to request a weight, ioprio or ioprio_class change */
  176. int prio_changed;
  177. };
  178. struct bfq_group;
  179. /**
  180. * struct bfq_ttime - per process thinktime stats.
  181. */
  182. struct bfq_ttime {
  183. /* completion time of the last request */
  184. u64 last_end_request;
  185. /* total process thinktime */
  186. u64 ttime_total;
  187. /* number of thinktime samples */
  188. unsigned long ttime_samples;
  189. /* average process thinktime */
  190. u64 ttime_mean;
  191. };
  192. /**
  193. * struct bfq_queue - leaf schedulable entity.
  194. *
  195. * A bfq_queue is a leaf request queue; it can be associated with an
  196. * io_context or more, if it is async or shared between cooperating
  197. * processes. @cgroup holds a reference to the cgroup, to be sure that it
  198. * does not disappear while a bfqq still references it (mostly to avoid
  199. * races between request issuing and task migration followed by cgroup
  200. * destruction).
  201. * All the fields are protected by the queue lock of the containing bfqd.
  202. */
  203. struct bfq_queue {
  204. /* reference counter */
  205. int ref;
  206. /* parent bfq_data */
  207. struct bfq_data *bfqd;
  208. /* current ioprio and ioprio class */
  209. unsigned short ioprio, ioprio_class;
  210. /* next ioprio and ioprio class if a change is in progress */
  211. unsigned short new_ioprio, new_ioprio_class;
  212. /*
  213. * Shared bfq_queue if queue is cooperating with one or more
  214. * other queues.
  215. */
  216. struct bfq_queue *new_bfqq;
  217. /* request-position tree member (see bfq_group's @rq_pos_tree) */
  218. struct rb_node pos_node;
  219. /* request-position tree root (see bfq_group's @rq_pos_tree) */
  220. struct rb_root *pos_root;
  221. /* sorted list of pending requests */
  222. struct rb_root sort_list;
  223. /* if fifo isn't expired, next request to serve */
  224. struct request *next_rq;
  225. /* number of sync and async requests queued */
  226. int queued[2];
  227. /* number of requests currently allocated */
  228. int allocated;
  229. /* number of pending metadata requests */
  230. int meta_pending;
  231. /* fifo list of requests in sort_list */
  232. struct list_head fifo;
  233. /* entity representing this queue in the scheduler */
  234. struct bfq_entity entity;
  235. /* maximum budget allowed from the feedback mechanism */
  236. int max_budget;
  237. /* budget expiration (in jiffies) */
  238. unsigned long budget_timeout;
  239. /* number of requests on the dispatch list or inside driver */
  240. int dispatched;
  241. /* status flags */
  242. unsigned long flags;
  243. /* node for active/idle bfqq list inside parent bfqd */
  244. struct list_head bfqq_list;
  245. /* associated @bfq_ttime struct */
  246. struct bfq_ttime ttime;
  247. /* bit vector: a 1 for each seeky requests in history */
  248. u32 seek_history;
  249. /* node for the device's burst list */
  250. struct hlist_node burst_list_node;
  251. /* position of the last request enqueued */
  252. sector_t last_request_pos;
  253. /* Number of consecutive pairs of request completion and
  254. * arrival, such that the queue becomes idle after the
  255. * completion, but the next request arrives within an idle
  256. * time slice; used only if the queue's IO_bound flag has been
  257. * cleared.
  258. */
  259. unsigned int requests_within_timer;
  260. /* pid of the process owning the queue, used for logging purposes */
  261. pid_t pid;
  262. /*
  263. * Pointer to the bfq_io_cq owning the bfq_queue, set to %NULL
  264. * if the queue is shared.
  265. */
  266. struct bfq_io_cq *bic;
  267. /* current maximum weight-raising time for this queue */
  268. unsigned long wr_cur_max_time;
  269. /*
  270. * Minimum time instant such that, only if a new request is
  271. * enqueued after this time instant in an idle @bfq_queue with
  272. * no outstanding requests, then the task associated with the
  273. * queue it is deemed as soft real-time (see the comments on
  274. * the function bfq_bfqq_softrt_next_start())
  275. */
  276. unsigned long soft_rt_next_start;
  277. /*
  278. * Start time of the current weight-raising period if
  279. * the @bfq-queue is being weight-raised, otherwise
  280. * finish time of the last weight-raising period.
  281. */
  282. unsigned long last_wr_start_finish;
  283. /* factor by which the weight of this queue is multiplied */
  284. unsigned int wr_coeff;
  285. /*
  286. * Time of the last transition of the @bfq_queue from idle to
  287. * backlogged.
  288. */
  289. unsigned long last_idle_bklogged;
  290. /*
  291. * Cumulative service received from the @bfq_queue since the
  292. * last transition from idle to backlogged.
  293. */
  294. unsigned long service_from_backlogged;
  295. /*
  296. * Cumulative service received from the @bfq_queue since its
  297. * last transition to weight-raised state.
  298. */
  299. unsigned long service_from_wr;
  300. /*
  301. * Value of wr start time when switching to soft rt
  302. */
  303. unsigned long wr_start_at_switch_to_srt;
  304. unsigned long split_time; /* time of last split */
  305. unsigned long first_IO_time; /* time of first I/O for this queue */
  306. /* max service rate measured so far */
  307. u32 max_service_rate;
  308. /*
  309. * Ratio between the service received by bfqq while it is in
  310. * service, and the cumulative service (of requests of other
  311. * queues) that may be injected while bfqq is empty but still
  312. * in service. To increase precision, the coefficient is
  313. * measured in tenths of unit. Here are some example of (1)
  314. * ratios, (2) resulting percentages of service injected
  315. * w.r.t. to the total service dispatched while bfqq is in
  316. * service, and (3) corresponding values of the coefficient:
  317. * 1 (50%) -> 10
  318. * 2 (33%) -> 20
  319. * 10 (9%) -> 100
  320. * 9.9 (9%) -> 99
  321. * 1.5 (40%) -> 15
  322. * 0.5 (66%) -> 5
  323. * 0.1 (90%) -> 1
  324. *
  325. * So, if the coefficient is lower than 10, then
  326. * injected service is more than bfqq service.
  327. */
  328. unsigned int inject_coeff;
  329. /* amount of service injected in current service slot */
  330. unsigned int injected_service;
  331. };
  332. /**
  333. * struct bfq_io_cq - per (request_queue, io_context) structure.
  334. */
  335. struct bfq_io_cq {
  336. /* associated io_cq structure */
  337. struct io_cq icq; /* must be the first member */
  338. /* array of two process queues, the sync and the async */
  339. struct bfq_queue *bfqq[2];
  340. /* per (request_queue, blkcg) ioprio */
  341. int ioprio;
  342. #ifdef CONFIG_BFQ_GROUP_IOSCHED
  343. uint64_t blkcg_serial_nr; /* the current blkcg serial */
  344. #endif
  345. /*
  346. * Snapshot of the has_short_time flag before merging; taken
  347. * to remember its value while the queue is merged, so as to
  348. * be able to restore it in case of split.
  349. */
  350. bool saved_has_short_ttime;
  351. /*
  352. * Same purpose as the previous two fields for the I/O bound
  353. * classification of a queue.
  354. */
  355. bool saved_IO_bound;
  356. /*
  357. * Same purpose as the previous fields for the value of the
  358. * field keeping the queue's belonging to a large burst
  359. */
  360. bool saved_in_large_burst;
  361. /*
  362. * True if the queue belonged to a burst list before its merge
  363. * with another cooperating queue.
  364. */
  365. bool was_in_burst_list;
  366. /*
  367. * Similar to previous fields: save wr information.
  368. */
  369. unsigned long saved_wr_coeff;
  370. unsigned long saved_last_wr_start_finish;
  371. unsigned long saved_wr_start_at_switch_to_srt;
  372. unsigned int saved_wr_cur_max_time;
  373. struct bfq_ttime saved_ttime;
  374. };
  375. /**
  376. * struct bfq_data - per-device data structure.
  377. *
  378. * All the fields are protected by @lock.
  379. */
  380. struct bfq_data {
  381. /* device request queue */
  382. struct request_queue *queue;
  383. /* dispatch queue */
  384. struct list_head dispatch;
  385. /* root bfq_group for the device */
  386. struct bfq_group *root_group;
  387. /*
  388. * rbtree of weight counters of @bfq_queues, sorted by
  389. * weight. Used to keep track of whether all @bfq_queues have
  390. * the same weight. The tree contains one counter for each
  391. * distinct weight associated to some active and not
  392. * weight-raised @bfq_queue (see the comments to the functions
  393. * bfq_weights_tree_[add|remove] for further details).
  394. */
  395. struct rb_root queue_weights_tree;
  396. /*
  397. * rbtree of non-queue @bfq_entity weight counters, sorted by
  398. * weight. Used to keep track of whether all @bfq_groups have
  399. * the same weight. The tree contains one counter for each
  400. * distinct weight associated to some active @bfq_group (see
  401. * the comments to the functions bfq_weights_tree_[add|remove]
  402. * for further details).
  403. */
  404. struct rb_root group_weights_tree;
  405. /*
  406. * Number of bfq_queues containing requests (including the
  407. * queue in service, even if it is idling).
  408. */
  409. int busy_queues;
  410. /* number of weight-raised busy @bfq_queues */
  411. int wr_busy_queues;
  412. /* number of queued requests */
  413. int queued;
  414. /* number of requests dispatched and waiting for completion */
  415. int rq_in_driver;
  416. /*
  417. * Maximum number of requests in driver in the last
  418. * @hw_tag_samples completed requests.
  419. */
  420. int max_rq_in_driver;
  421. /* number of samples used to calculate hw_tag */
  422. int hw_tag_samples;
  423. /* flag set to one if the driver is showing a queueing behavior */
  424. int hw_tag;
  425. /* number of budgets assigned */
  426. int budgets_assigned;
  427. /*
  428. * Timer set when idling (waiting) for the next request from
  429. * the queue in service.
  430. */
  431. struct hrtimer idle_slice_timer;
  432. /* bfq_queue in service */
  433. struct bfq_queue *in_service_queue;
  434. /* on-disk position of the last served request */
  435. sector_t last_position;
  436. /* position of the last served request for the in-service queue */
  437. sector_t in_serv_last_pos;
  438. /* time of last request completion (ns) */
  439. u64 last_completion;
  440. /* time of first rq dispatch in current observation interval (ns) */
  441. u64 first_dispatch;
  442. /* time of last rq dispatch in current observation interval (ns) */
  443. u64 last_dispatch;
  444. /* beginning of the last budget */
  445. ktime_t last_budget_start;
  446. /* beginning of the last idle slice */
  447. ktime_t last_idling_start;
  448. /* number of samples in current observation interval */
  449. int peak_rate_samples;
  450. /* num of samples of seq dispatches in current observation interval */
  451. u32 sequential_samples;
  452. /* total num of sectors transferred in current observation interval */
  453. u64 tot_sectors_dispatched;
  454. /* max rq size seen during current observation interval (sectors) */
  455. u32 last_rq_max_size;
  456. /* time elapsed from first dispatch in current observ. interval (us) */
  457. u64 delta_from_first;
  458. /*
  459. * Current estimate of the device peak rate, measured in
  460. * [(sectors/usec) / 2^BFQ_RATE_SHIFT]. The left-shift by
  461. * BFQ_RATE_SHIFT is performed to increase precision in
  462. * fixed-point calculations.
  463. */
  464. u32 peak_rate;
  465. /* maximum budget allotted to a bfq_queue before rescheduling */
  466. int bfq_max_budget;
  467. /* list of all the bfq_queues active on the device */
  468. struct list_head active_list;
  469. /* list of all the bfq_queues idle on the device */
  470. struct list_head idle_list;
  471. /*
  472. * Timeout for async/sync requests; when it fires, requests
  473. * are served in fifo order.
  474. */
  475. u64 bfq_fifo_expire[2];
  476. /* weight of backward seeks wrt forward ones */
  477. unsigned int bfq_back_penalty;
  478. /* maximum allowed backward seek */
  479. unsigned int bfq_back_max;
  480. /* maximum idling time */
  481. u32 bfq_slice_idle;
  482. /* user-configured max budget value (0 for auto-tuning) */
  483. int bfq_user_max_budget;
  484. /*
  485. * Timeout for bfq_queues to consume their budget; used to
  486. * prevent seeky queues from imposing long latencies to
  487. * sequential or quasi-sequential ones (this also implies that
  488. * seeky queues cannot receive guarantees in the service
  489. * domain; after a timeout they are charged for the time they
  490. * have been in service, to preserve fairness among them, but
  491. * without service-domain guarantees).
  492. */
  493. unsigned int bfq_timeout;
  494. /*
  495. * Number of consecutive requests that must be issued within
  496. * the idle time slice to set again idling to a queue which
  497. * was marked as non-I/O-bound (see the definition of the
  498. * IO_bound flag for further details).
  499. */
  500. unsigned int bfq_requests_within_timer;
  501. /*
  502. * Force device idling whenever needed to provide accurate
  503. * service guarantees, without caring about throughput
  504. * issues. CAVEAT: this may even increase latencies, in case
  505. * of useless idling for processes that did stop doing I/O.
  506. */
  507. bool strict_guarantees;
  508. /*
  509. * Last time at which a queue entered the current burst of
  510. * queues being activated shortly after each other; for more
  511. * details about this and the following parameters related to
  512. * a burst of activations, see the comments on the function
  513. * bfq_handle_burst.
  514. */
  515. unsigned long last_ins_in_burst;
  516. /*
  517. * Reference time interval used to decide whether a queue has
  518. * been activated shortly after @last_ins_in_burst.
  519. */
  520. unsigned long bfq_burst_interval;
  521. /* number of queues in the current burst of queue activations */
  522. int burst_size;
  523. /* common parent entity for the queues in the burst */
  524. struct bfq_entity *burst_parent_entity;
  525. /* Maximum burst size above which the current queue-activation
  526. * burst is deemed as 'large'.
  527. */
  528. unsigned long bfq_large_burst_thresh;
  529. /* true if a large queue-activation burst is in progress */
  530. bool large_burst;
  531. /*
  532. * Head of the burst list (as for the above fields, more
  533. * details in the comments on the function bfq_handle_burst).
  534. */
  535. struct hlist_head burst_list;
  536. /* if set to true, low-latency heuristics are enabled */
  537. bool low_latency;
  538. /*
  539. * Maximum factor by which the weight of a weight-raised queue
  540. * is multiplied.
  541. */
  542. unsigned int bfq_wr_coeff;
  543. /* maximum duration of a weight-raising period (jiffies) */
  544. unsigned int bfq_wr_max_time;
  545. /* Maximum weight-raising duration for soft real-time processes */
  546. unsigned int bfq_wr_rt_max_time;
  547. /*
  548. * Minimum idle period after which weight-raising may be
  549. * reactivated for a queue (in jiffies).
  550. */
  551. unsigned int bfq_wr_min_idle_time;
  552. /*
  553. * Minimum period between request arrivals after which
  554. * weight-raising may be reactivated for an already busy async
  555. * queue (in jiffies).
  556. */
  557. unsigned long bfq_wr_min_inter_arr_async;
  558. /* Max service-rate for a soft real-time queue, in sectors/sec */
  559. unsigned int bfq_wr_max_softrt_rate;
  560. /*
  561. * Cached value of the product ref_rate*ref_wr_duration, used
  562. * for computing the maximum duration of weight raising
  563. * automatically.
  564. */
  565. u64 rate_dur_prod;
  566. /* fallback dummy bfqq for extreme OOM conditions */
  567. struct bfq_queue oom_bfqq;
  568. spinlock_t lock;
  569. /*
  570. * bic associated with the task issuing current bio for
  571. * merging. This and the next field are used as a support to
  572. * be able to perform the bic lookup, needed by bio-merge
  573. * functions, before the scheduler lock is taken, and thus
  574. * avoid taking the request-queue lock while the scheduler
  575. * lock is being held.
  576. */
  577. struct bfq_io_cq *bio_bic;
  578. /* bfqq associated with the task issuing current bio for merging */
  579. struct bfq_queue *bio_bfqq;
  580. /*
  581. * Depth limits used in bfq_limit_depth (see comments on the
  582. * function)
  583. */
  584. unsigned int word_depths[2][2];
  585. };
  586. enum bfqq_state_flags {
  587. BFQQF_just_created = 0, /* queue just allocated */
  588. BFQQF_busy, /* has requests or is in service */
  589. BFQQF_wait_request, /* waiting for a request */
  590. BFQQF_non_blocking_wait_rq, /*
  591. * waiting for a request
  592. * without idling the device
  593. */
  594. BFQQF_fifo_expire, /* FIFO checked in this slice */
  595. BFQQF_has_short_ttime, /* queue has a short think time */
  596. BFQQF_sync, /* synchronous queue */
  597. BFQQF_IO_bound, /*
  598. * bfqq has timed-out at least once
  599. * having consumed at most 2/10 of
  600. * its budget
  601. */
  602. BFQQF_in_large_burst, /*
  603. * bfqq activated in a large burst,
  604. * see comments to bfq_handle_burst.
  605. */
  606. BFQQF_softrt_update, /*
  607. * may need softrt-next-start
  608. * update
  609. */
  610. BFQQF_coop, /* bfqq is shared */
  611. BFQQF_split_coop /* shared bfqq will be split */
  612. };
  613. #define BFQ_BFQQ_FNS(name) \
  614. void bfq_mark_bfqq_##name(struct bfq_queue *bfqq); \
  615. void bfq_clear_bfqq_##name(struct bfq_queue *bfqq); \
  616. int bfq_bfqq_##name(const struct bfq_queue *bfqq);
  617. BFQ_BFQQ_FNS(just_created);
  618. BFQ_BFQQ_FNS(busy);
  619. BFQ_BFQQ_FNS(wait_request);
  620. BFQ_BFQQ_FNS(non_blocking_wait_rq);
  621. BFQ_BFQQ_FNS(fifo_expire);
  622. BFQ_BFQQ_FNS(has_short_ttime);
  623. BFQ_BFQQ_FNS(sync);
  624. BFQ_BFQQ_FNS(IO_bound);
  625. BFQ_BFQQ_FNS(in_large_burst);
  626. BFQ_BFQQ_FNS(coop);
  627. BFQ_BFQQ_FNS(split_coop);
  628. BFQ_BFQQ_FNS(softrt_update);
  629. #undef BFQ_BFQQ_FNS
  630. /* Expiration reasons. */
  631. enum bfqq_expiration {
  632. BFQQE_TOO_IDLE = 0, /*
  633. * queue has been idling for
  634. * too long
  635. */
  636. BFQQE_BUDGET_TIMEOUT, /* budget took too long to be used */
  637. BFQQE_BUDGET_EXHAUSTED, /* budget consumed */
  638. BFQQE_NO_MORE_REQUESTS, /* the queue has no more requests */
  639. BFQQE_PREEMPTED /* preemption in progress */
  640. };
  641. struct bfqg_stats {
  642. #if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP)
  643. /* number of ios merged */
  644. struct blkg_rwstat merged;
  645. /* total time spent on device in ns, may not be accurate w/ queueing */
  646. struct blkg_rwstat service_time;
  647. /* total time spent waiting in scheduler queue in ns */
  648. struct blkg_rwstat wait_time;
  649. /* number of IOs queued up */
  650. struct blkg_rwstat queued;
  651. /* total disk time and nr sectors dispatched by this group */
  652. struct blkg_stat time;
  653. /* sum of number of ios queued across all samples */
  654. struct blkg_stat avg_queue_size_sum;
  655. /* count of samples taken for average */
  656. struct blkg_stat avg_queue_size_samples;
  657. /* how many times this group has been removed from service tree */
  658. struct blkg_stat dequeue;
  659. /* total time spent waiting for it to be assigned a timeslice. */
  660. struct blkg_stat group_wait_time;
  661. /* time spent idling for this blkcg_gq */
  662. struct blkg_stat idle_time;
  663. /* total time with empty current active q with other requests queued */
  664. struct blkg_stat empty_time;
  665. /* fields after this shouldn't be cleared on stat reset */
  666. u64 start_group_wait_time;
  667. u64 start_idle_time;
  668. u64 start_empty_time;
  669. uint16_t flags;
  670. #endif /* CONFIG_BFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */
  671. };
  672. #ifdef CONFIG_BFQ_GROUP_IOSCHED
  673. /*
  674. * struct bfq_group_data - per-blkcg storage for the blkio subsystem.
  675. *
  676. * @ps: @blkcg_policy_storage that this structure inherits
  677. * @weight: weight of the bfq_group
  678. */
  679. struct bfq_group_data {
  680. /* must be the first member */
  681. struct blkcg_policy_data pd;
  682. unsigned int weight;
  683. };
  684. /**
  685. * struct bfq_group - per (device, cgroup) data structure.
  686. * @entity: schedulable entity to insert into the parent group sched_data.
  687. * @sched_data: own sched_data, to contain child entities (they may be
  688. * both bfq_queues and bfq_groups).
  689. * @bfqd: the bfq_data for the device this group acts upon.
  690. * @async_bfqq: array of async queues for all the tasks belonging to
  691. * the group, one queue per ioprio value per ioprio_class,
  692. * except for the idle class that has only one queue.
  693. * @async_idle_bfqq: async queue for the idle class (ioprio is ignored).
  694. * @my_entity: pointer to @entity, %NULL for the toplevel group; used
  695. * to avoid too many special cases during group creation/
  696. * migration.
  697. * @stats: stats for this bfqg.
  698. * @active_entities: number of active entities belonging to the group;
  699. * unused for the root group. Used to know whether there
  700. * are groups with more than one active @bfq_entity
  701. * (see the comments to the function
  702. * bfq_bfqq_may_idle()).
  703. * @rq_pos_tree: rbtree sorted by next_request position, used when
  704. * determining if two or more queues have interleaving
  705. * requests (see bfq_find_close_cooperator()).
  706. *
  707. * Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup
  708. * there is a set of bfq_groups, each one collecting the lower-level
  709. * entities belonging to the group that are acting on the same device.
  710. *
  711. * Locking works as follows:
  712. * o @bfqd is protected by the queue lock, RCU is used to access it
  713. * from the readers.
  714. * o All the other fields are protected by the @bfqd queue lock.
  715. */
  716. struct bfq_group {
  717. /* must be the first member */
  718. struct blkg_policy_data pd;
  719. /* cached path for this blkg (see comments in bfq_bic_update_cgroup) */
  720. char blkg_path[128];
  721. /* reference counter (see comments in bfq_bic_update_cgroup) */
  722. int ref;
  723. struct bfq_entity entity;
  724. struct bfq_sched_data sched_data;
  725. void *bfqd;
  726. struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];
  727. struct bfq_queue *async_idle_bfqq;
  728. struct bfq_entity *my_entity;
  729. int active_entities;
  730. struct rb_root rq_pos_tree;
  731. struct bfqg_stats stats;
  732. };
  733. #else
  734. struct bfq_group {
  735. struct bfq_sched_data sched_data;
  736. struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];
  737. struct bfq_queue *async_idle_bfqq;
  738. struct rb_root rq_pos_tree;
  739. };
  740. #endif
  741. struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity);
  742. /* --------------- main algorithm interface ----------------- */
  743. #define BFQ_SERVICE_TREE_INIT ((struct bfq_service_tree) \
  744. { RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 })
  745. extern const int bfq_timeout;
  746. struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync);
  747. void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync);
  748. struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic);
  749. void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq);
  750. void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_entity *entity,
  751. struct rb_root *root);
  752. void __bfq_weights_tree_remove(struct bfq_data *bfqd,
  753. struct bfq_entity *entity,
  754. struct rb_root *root);
  755. void bfq_weights_tree_remove(struct bfq_data *bfqd,
  756. struct bfq_queue *bfqq);
  757. void bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq,
  758. bool compensate, enum bfqq_expiration reason);
  759. void bfq_put_queue(struct bfq_queue *bfqq);
  760. void bfq_end_wr_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
  761. void bfq_schedule_dispatch(struct bfq_data *bfqd);
  762. void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
  763. /* ------------ end of main algorithm interface -------------- */
  764. /* ---------------- cgroups-support interface ---------------- */
  765. void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
  766. unsigned int op);
  767. void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op);
  768. void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op);
  769. void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
  770. u64 io_start_time_ns, unsigned int op);
  771. void bfqg_stats_update_dequeue(struct bfq_group *bfqg);
  772. void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg);
  773. void bfqg_stats_update_idle_time(struct bfq_group *bfqg);
  774. void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg);
  775. void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg);
  776. void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
  777. struct bfq_group *bfqg);
  778. void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg);
  779. void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio);
  780. void bfq_end_wr_async(struct bfq_data *bfqd);
  781. struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
  782. struct blkcg *blkcg);
  783. struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
  784. struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
  785. struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node);
  786. void bfqg_and_blkg_put(struct bfq_group *bfqg);
  787. #ifdef CONFIG_BFQ_GROUP_IOSCHED
  788. extern struct cftype bfq_blkcg_legacy_files[];
  789. extern struct cftype bfq_blkg_files[];
  790. extern struct blkcg_policy blkcg_policy_bfq;
  791. #endif
  792. /* ------------- end of cgroups-support interface ------------- */
  793. /* - interface of the internal hierarchical B-WF2Q+ scheduler - */
  794. #ifdef CONFIG_BFQ_GROUP_IOSCHED
  795. /* both next loops stop at one of the child entities of the root group */
  796. #define for_each_entity(entity) \
  797. for (; entity ; entity = entity->parent)
  798. /*
  799. * For each iteration, compute parent in advance, so as to be safe if
  800. * entity is deallocated during the iteration. Such a deallocation may
  801. * happen as a consequence of a bfq_put_queue that frees the bfq_queue
  802. * containing entity.
  803. */
  804. #define for_each_entity_safe(entity, parent) \
  805. for (; entity && ({ parent = entity->parent; 1; }); entity = parent)
  806. #else /* CONFIG_BFQ_GROUP_IOSCHED */
  807. /*
  808. * Next two macros are fake loops when cgroups support is not
  809. * enabled. I fact, in such a case, there is only one level to go up
  810. * (to reach the root group).
  811. */
  812. #define for_each_entity(entity) \
  813. for (; entity ; entity = NULL)
  814. #define for_each_entity_safe(entity, parent) \
  815. for (parent = NULL; entity ; entity = parent)
  816. #endif /* CONFIG_BFQ_GROUP_IOSCHED */
  817. struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq);
  818. struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity);
  819. struct bfq_service_tree *bfq_entity_service_tree(struct bfq_entity *entity);
  820. struct bfq_entity *bfq_entity_of(struct rb_node *node);
  821. unsigned short bfq_ioprio_to_weight(int ioprio);
  822. void bfq_put_idle_entity(struct bfq_service_tree *st,
  823. struct bfq_entity *entity);
  824. struct bfq_service_tree *
  825. __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
  826. struct bfq_entity *entity,
  827. bool update_class_too);
  828. void bfq_bfqq_served(struct bfq_queue *bfqq, int served);
  829. void bfq_bfqq_charge_time(struct bfq_data *bfqd, struct bfq_queue *bfqq,
  830. unsigned long time_ms);
  831. bool __bfq_deactivate_entity(struct bfq_entity *entity,
  832. bool ins_into_idle_tree);
  833. bool next_queue_may_preempt(struct bfq_data *bfqd);
  834. struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd);
  835. void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd);
  836. void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
  837. bool ins_into_idle_tree, bool expiration);
  838. void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq);
  839. void bfq_requeue_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
  840. bool expiration);
  841. void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
  842. bool expiration);
  843. void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq);
  844. /* --------------- end of interface of B-WF2Q+ ---------------- */
  845. /* Logging facilities. */
  846. #ifdef CONFIG_BFQ_GROUP_IOSCHED
  847. struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
  848. #define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \
  849. blk_add_cgroup_trace_msg((bfqd)->queue, \
  850. bfqg_to_blkg(bfqq_group(bfqq))->blkcg, \
  851. "bfq%d%c " fmt, (bfqq)->pid, \
  852. bfq_bfqq_sync((bfqq)) ? 'S' : 'A', ##args); \
  853. } while (0)
  854. #define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do { \
  855. blk_add_cgroup_trace_msg((bfqd)->queue, \
  856. bfqg_to_blkg(bfqg)->blkcg, fmt, ##args); \
  857. } while (0)
  858. #else /* CONFIG_BFQ_GROUP_IOSCHED */
  859. #define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \
  860. blk_add_trace_msg((bfqd)->queue, "bfq%d%c " fmt, (bfqq)->pid, \
  861. bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \
  862. ##args)
  863. #define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do {} while (0)
  864. #endif /* CONFIG_BFQ_GROUP_IOSCHED */
  865. #define bfq_log(bfqd, fmt, args...) \
  866. blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args)
  867. #endif /* _BFQ_H */