mali_kbase_js_policy.h 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764
  1. /*
  2. *
  3. * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
  4. *
  5. * This program is free software and is provided to you under the terms of the
  6. * GNU General Public License version 2 as published by the Free Software
  7. * Foundation, and any use by you of this program is subject to the terms
  8. * of such GNU licence.
  9. *
  10. * A copy of the licence is included with the program, and can also be obtained
  11. * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  12. * Boston, MA 02110-1301, USA.
  13. *
  14. */
  15. /**
  16. * @file mali_kbase_js_policy.h
  17. * Job Scheduler Policy APIs.
  18. */
  19. #ifndef _KBASE_JS_POLICY_H_
  20. #define _KBASE_JS_POLICY_H_
  21. /**
  22. * @page page_kbase_js_policy Job Scheduling Policies
  23. * The Job Scheduling system is described in the following:
  24. * - @subpage page_kbase_js_policy_overview
  25. * - @subpage page_kbase_js_policy_operation
  26. *
  27. * The API details are as follows:
  28. * - @ref kbase_jm
  29. * - @ref kbase_js
  30. * - @ref kbase_js_policy
  31. */
  32. /**
  33. * @page page_kbase_js_policy_overview Overview of the Policy System
  34. *
  35. * The Job Scheduler Policy manages:
  36. * - The assigning of KBase Contexts to GPU Address Spaces (\em ASs)
  37. * - The choosing of Job Chains (\em Jobs) from a KBase context, to run on the
  38. * GPU's Job Slots (\em JSs).
  39. * - The amount of \em time a context is assigned to (<em>scheduled on</em>) an
  40. * Address Space
  41. * - The amount of \em time a Job spends running on the GPU
  42. *
  43. * The Policy implements this management via 2 components:
  44. * - A Policy Queue, which manages a set of contexts that are ready to run,
  45. * but not currently running.
  46. * - A Policy Run Pool, which manages the currently running contexts (one per Address
  47. * Space) and the jobs to run on the Job Slots.
  48. *
  49. * Each Graphics Process in the system has at least one KBase Context. Therefore,
  50. * the Policy Queue can be seen as a queue of Processes waiting to run Jobs on
  51. * the GPU.
  52. *
  53. * <!-- The following needs to be all on one line, due to doxygen's parser -->
  54. * @dotfile policy_overview.dot "Diagram showing a very simplified overview of the Policy System. IRQ handling, soft/hard-stopping, contexts re-entering the system and Policy details are omitted"
  55. *
  56. * The main operations on the queue are:
  57. * - Enqueuing a Context to it
  58. * - Dequeuing a Context from it, to run it.
  59. * - Note: requeuing a context is much the same as enqueuing a context, but
  60. * occurs when a context is scheduled out of the system to allow other contexts
  61. * to run.
  62. *
  63. * These operations have much the same meaning for the Run Pool - Jobs are
  64. * dequeued to run on a Jobslot, and requeued when they are scheduled out of
  65. * the GPU.
  66. *
  67. * @note This is an over-simplification of the Policy APIs - there are more
  68. * operations than 'Enqueue'/'Dequeue', and a Dequeue from the Policy Queue
  69. * takes at least two function calls: one to Dequeue from the Queue, one to add
  70. * to the Run Pool.
  71. *
  72. * As indicated on the diagram, Jobs permanently leave the scheduling system
  73. * when they are completed, otherwise they get dequeued/requeued until this
  74. * happens. Similarly, Contexts leave the scheduling system when their jobs
  75. * have all completed. However, Contexts may later return to the scheduling
  76. * system (not shown on the diagram) if more Bags of Jobs are submitted to
  77. * them.
  78. */
  79. /**
  80. * @page page_kbase_js_policy_operation Policy Operation
  81. *
  82. * We describe the actions that the Job Scheduler Core takes on the Policy in
  83. * the following cases:
  84. * - The IRQ Path
  85. * - The Job Submission Path
  86. * - The High Priority Job Submission Path
  87. *
  88. * This shows how the Policy APIs will be used by the Job Scheduler core.
  89. *
  90. * The following diagram shows an example Policy that contains a Low Priority
  91. * queue, and a Real-time (High Priority) Queue. The RT queue is examined
  92. * before the LowP one on dequeuing from the head. The Low Priority Queue is
  93. * ordered by time, and the RT queue is ordered by time weighted by
  94. * RT-priority. In addition, it shows that the Job Scheduler Core will start a
  95. * Soft-Stop Timer (SS-Timer) when it dequeue's and submits a job. The
  96. * Soft-Stop time is set by a global configuration value, and must be a value
  97. * appropriate for the policy. For example, this could include "don't run a
  98. * soft-stop timer" for a First-Come-First-Served (FCFS) policy.
  99. *
  100. * <!-- The following needs to be all on one line, due to doxygen's parser -->
  101. * @dotfile policy_operation_diagram.dot "Diagram showing the objects managed by an Example Policy, and the operations made upon these objects by the Job Scheduler Core."
  102. *
  103. * @section sec_kbase_js_policy_operation_prio Dealing with Priority
  104. *
  105. * Priority applies separately to a context as a whole, and to the jobs within
  106. * a context. The jobs specify a priority in the base_jd_atom::prio member, but
  107. * it is independent of the context priority. That is, it only affects
  108. * scheduling of atoms within a context. Refer to @ref base_jd_prio for more
  109. * details. The meaning of the context's priority value is up to the policy
  110. * itself, and could be a logarithmic scale instead of a linear scale (e.g. the
  111. * policy could implement an increase/decrease in priority by 1 results in an
  112. * increase/decrease in \em proportion of time spent scheduled in by 25%, an
  113. * effective change in timeslice by 11%).
  114. *
  115. * It is up to the policy whether a boost in priority boosts the priority of
  116. * the entire context (e.g. to such an extent where it may pre-empt other
  117. * running contexts). If it chooses to do this, the Policy must make sure that
  118. * only jobs from high-priority contexts are run, and that the context is
  119. * scheduled out once only jobs from low priority contexts remain. This ensures
  120. * that the low priority contexts do not gain from the priority boost, yet they
  121. * still get scheduled correctly with respect to other low priority contexts.
  122. *
  123. *
  124. * @section sec_kbase_js_policy_operation_irq IRQ Path
  125. *
  126. * The following happens on the IRQ path from the Job Scheduler Core:
  127. * - Note the slot that completed (for later)
  128. * - Log the time spent by the job (and implicitly, the time spent by the
  129. * context)
  130. * - call kbasep_js_policy_log_job_result() <em>in the context of the irq
  131. * handler.</em>
  132. * - This must happen regardless of whether the job completed successfully or
  133. * not (otherwise the context gets away with DoS'ing the system with faulty jobs)
  134. * - What was the result of the job?
  135. * - If Completed: job is just removed from the system
  136. * - If Hard-stop or failure: job is removed from the system
  137. * - If Soft-stop: queue the book-keeping work onto a work-queue: have a
  138. * work-queue call kbasep_js_policy_enqueue_job()
  139. * - Check the timeslice used by the owning context
  140. * - call kbasep_js_policy_should_remove_ctx() <em>in the context of the irq
  141. * handler.</em>
  142. * - If this returns true, clear the "allowed" flag.
  143. * - Check the ctx's flags for "allowed", "has jobs to run" and "is running
  144. * jobs"
  145. * - And so, should the context stay scheduled in?
  146. * - If No, push onto a work-queue the work of scheduling out the old context,
  147. * and getting a new one. That is:
  148. * - kbasep_js_policy_runpool_remove_ctx() on old_ctx
  149. * - kbasep_js_policy_enqueue_ctx() on old_ctx
  150. * - kbasep_js_policy_dequeue_head_ctx() to get new_ctx
  151. * - kbasep_js_policy_runpool_add_ctx() on new_ctx
  152. * - (all of this work is deferred on a work-queue to keep the IRQ handler quick)
  153. * - If there is space in the completed job slots' HEAD/NEXT registers, run the next job:
  154. * - kbasep_js_policy_dequeue_job() <em>in the context of the irq
  155. * handler</em> with core_req set to that of the completing slot
  156. * - if this returned true, submit the job to the completed slot.
  157. * - This is repeated until kbasep_js_policy_dequeue_job() returns
  158. * false, or the job slot has a job queued on both the HEAD and NEXT registers.
  159. * - If kbasep_js_policy_dequeue_job() returned false, submit some work to
  160. * the work-queue to retry from outside of IRQ context (calling
  161. * kbasep_js_policy_dequeue_job() from a work-queue).
  162. *
  163. * Since the IRQ handler submits new jobs \em and re-checks the IRQ_RAWSTAT,
  164. * this sequence could loop a large number of times: this could happen if
  165. * the jobs submitted completed on the GPU very quickly (in a few cycles), such
  166. * as GPU NULL jobs. Then, the HEAD/NEXT registers will always be free to take
  167. * more jobs, causing us to loop until we run out of jobs.
  168. *
  169. * To mitigate this, we must limit the number of jobs submitted per slot during
  170. * the IRQ handler - for example, no more than 2 jobs per slot per IRQ should
  171. * be sufficient (to fill up the HEAD + NEXT registers in normal cases). For
  172. * Mali-T600 with 3 job slots, this means that up to 6 jobs could be submitted per
  173. * slot. Note that IRQ Throttling can make this situation commonplace: 6 jobs
  174. * could complete but the IRQ for each of them is delayed by the throttling. By
  175. * the time you get the IRQ, all 6 jobs could've completed, meaning you can
  176. * submit jobs to fill all 6 HEAD+NEXT registers again.
  177. *
  178. * @note As much work is deferred as possible, which includes the scheduling
  179. * out of a context and scheduling in a new context. However, we can still make
  180. * starting a single high-priorty context quick despite this:
  181. * - On Mali-T600 family, there is one more AS than JSs.
  182. * - This means we can very quickly schedule out one AS, no matter what the
  183. * situation (because there will always be one AS that's not currently running
  184. * on the job slot - it can only have a job in the NEXT register).
  185. * - Even with this scheduling out, fair-share can still be guaranteed e.g. by
  186. * a timeline-based Completely Fair Scheduler.
  187. * - When our high-priority context comes in, we can do this quick-scheduling
  188. * out immediately, and then schedule in the high-priority context without having to block.
  189. * - This all assumes that the context to schedule out is of lower
  190. * priority. Otherwise, we will have to block waiting for some other low
  191. * priority context to finish its jobs. Note that it's likely (but not
  192. * impossible) that the high-priority context \b is running jobs, by virtue of
  193. * it being high priority.
  194. * - Therefore, we can give a high liklihood that on Mali-T600 at least one
  195. * high-priority context can be started very quickly. For the general case, we
  196. * can guarantee starting (no. ASs) - (no. JSs) high priority contexts
  197. * quickly. In any case, there is a high likelihood that we're able to start
  198. * more than one high priority context quickly.
  199. *
  200. * In terms of the functions used in the IRQ handler directly, these are the
  201. * perfomance considerations:
  202. * - kbase_js_policy_log_job_result():
  203. * - This is just adding to a 64-bit value (possibly even a 32-bit value if we
  204. * only store the time the job's recently spent - see below on 'priority weighting')
  205. * - For priority weighting, a divide operation ('div') could happen, but
  206. * this can happen in a deferred context (outside of IRQ) when scheduling out
  207. * the ctx; as per our Engineering Specification, the contexts of different
  208. * priority still stay scheduled in for the same timeslice, but higher priority
  209. * ones scheduled back in more often.
  210. * - That is, the weighted and unweighted times must be stored separately, and
  211. * the weighted time is only updated \em outside of IRQ context.
  212. * - Of course, this divide is more likely to be a 'multiply by inverse of the
  213. * weight', assuming that the weight (priority) doesn't change.
  214. * - kbasep_js_policy_should_remove_ctx():
  215. * - This is usually just a comparison of the stored time value against some
  216. * maximum value.
  217. *
  218. * @note all deferred work can be wrapped up into one call - we usually need to
  219. * indicate that a job/bag is done outside of IRQ context anyway.
  220. *
  221. *
  222. *
  223. * @section sec_kbase_js_policy_operation_submit Submission path
  224. *
  225. * Start with a Context with no jobs present, and assume equal priority of all
  226. * contexts in the system. The following work all happens outside of IRQ
  227. * Context :
  228. * - As soon as job is made 'ready to 'run', then is must be registerd with the Job
  229. * Scheduler Policy:
  230. * - 'Ready to run' means they've satisified their dependencies in the
  231. * Kernel-side Job Dispatch system.
  232. * - Call kbasep_js_policy_enqueue_job()
  233. * - This indicates that the job should be scheduled (it is ready to run).
  234. * - As soon as a ctx changes from having 0 jobs 'ready to run' to >0 jobs
  235. * 'ready to run', we enqueue the context on the policy queue:
  236. * - Call kbasep_js_policy_enqueue_ctx()
  237. * - This indicates that the \em ctx should be scheduled (it is ready to run)
  238. *
  239. * Next, we need to handle adding a context to the Run Pool - if it's sensible
  240. * to do so. This can happen due to two reasons:
  241. * -# A context is enqueued as above, and there are ASs free for it to run on
  242. * (e.g. it is the first context to be run, in which case it can be added to
  243. * the Run Pool immediately after enqueuing on the Policy Queue)
  244. * -# A previous IRQ caused another ctx to be scheduled out, requiring that the
  245. * context at the head of the queue be scheduled in. Such steps would happen in
  246. * a work queue (work deferred from the IRQ context).
  247. *
  248. * In both cases, we'd handle it as follows:
  249. * - Get the context at the Head of the Policy Queue:
  250. * - Call kbasep_js_policy_dequeue_head_ctx()
  251. * - Assign the Context an Address Space (Assert that there will be one free,
  252. * given the above two reasons)
  253. * - Add this context to the Run Pool:
  254. * - Call kbasep_js_policy_runpool_add_ctx()
  255. * - Now see if a job should be run:
  256. * - Mostly, this will be done in the IRQ handler at the completion of a
  257. * previous job.
  258. * - However, there are two cases where this cannot be done: a) The first job
  259. * enqueued to the system (there is no previous IRQ to act upon) b) When jobs
  260. * are submitted at a low enough rate to not fill up all Job Slots (or, not to
  261. * fill both the 'HEAD' and 'NEXT' registers in the job-slots)
  262. * - Hence, on each ctx <b>and job</b> submission we should try to see if we
  263. * can run a job:
  264. * - For each job slot that has free space (in NEXT or HEAD+NEXT registers):
  265. * - Call kbasep_js_policy_dequeue_job() with core_req set to that of the
  266. * slot
  267. * - if we got one, submit it to the job slot.
  268. * - This is repeated until kbasep_js_policy_dequeue_job() returns
  269. * false, or the job slot has a job queued on both the HEAD and NEXT registers.
  270. *
  271. * The above case shows that we should attempt to run jobs in cases where a) a ctx
  272. * has been added to the Run Pool, and b) new jobs have been added to a context
  273. * in the Run Pool:
  274. * - In the latter case, the context is in the runpool because it's got a job
  275. * ready to run, or is already running a job
  276. * - We could just wait until the IRQ handler fires, but for certain types of
  277. * jobs this can take comparatively a long time to complete, e.g. GLES FS jobs
  278. * generally take much longer to run that GLES CS jobs, which are vertex shader
  279. * jobs.
  280. * - Therefore, when a new job appears in the ctx, we must check the job-slots
  281. * to see if they're free, and run the jobs as before.
  282. *
  283. *
  284. *
  285. * @section sec_kbase_js_policy_operation_submit_hipri Submission path for High Priority Contexts
  286. *
  287. * For High Priority Contexts on Mali-T600, we can make sure that at least 1 of
  288. * them can be scheduled in immediately to start high prioriy jobs. In general,
  289. * (no. ASs) - (no JSs) high priority contexts may be started immediately. The
  290. * following describes how this happens:
  291. *
  292. * Similar to the previous section, consider what happens with a high-priority
  293. * context (a context with a priority higher than that of any in the Run Pool)
  294. * that starts out with no jobs:
  295. * - A job becomes ready to run on the context, and so we enqueue the context
  296. * on the Policy's Queue.
  297. * - However, we'd like to schedule in this context immediately, instead of
  298. * waiting for one of the Run Pool contexts' timeslice to expire
  299. * - The policy's Enqueue function must detect this (because it is the policy
  300. * that embodies the concept of priority), and take appropriate action
  301. * - That is, kbasep_js_policy_enqueue_ctx() should check the Policy's Run
  302. * Pool to see if a lower priority context should be scheduled out, and then
  303. * schedule in the High Priority context.
  304. * - For Mali-T600, we can always pick a context to schedule out immediately
  305. * (because there are more ASs than JSs), and so scheduling out a victim context
  306. * and scheduling in the high priority context can happen immediately.
  307. * - If a policy implements fair-sharing, then this can still ensure the
  308. * victim later on gets a fair share of the GPU.
  309. * - As a note, consider whether the victim can be of equal/higher priority
  310. * than the incoming context:
  311. * - Usually, higher priority contexts will be the ones currently running
  312. * jobs, and so the context with the lowest priority is usually not running
  313. * jobs.
  314. * - This makes it likely that the victim context is low priority, but
  315. * it's not impossible for it to be a high priority one:
  316. * - Suppose 3 high priority contexts are submitting only FS jobs, and one low
  317. * priority context submitting CS jobs. Then, the context not running jobs will
  318. * be one of the hi priority contexts (because only 2 FS jobs can be
  319. * queued/running on the GPU HW for Mali-T600).
  320. * - The problem can be mitigated by extra action, but it's questionable
  321. * whether we need to: we already have a high likelihood that there's at least
  322. * one high priority context - that should be good enough.
  323. * - And so, this method makes sure that at least one high priority context
  324. * can be started very quickly, but more than one high priority contexts could be
  325. * delayed (up to one timeslice).
  326. * - To improve this, use a GPU with a higher number of Address Spaces vs Job
  327. * Slots.
  328. * - At this point, let's assume this high priority context has been scheduled
  329. * in immediately. The next step is to ensure it can start some jobs quickly.
  330. * - It must do this by Soft-Stopping jobs on any of the Job Slots that it can
  331. * submit to.
  332. * - The rest of the logic for starting the jobs is taken care of by the IRQ
  333. * handler. All the policy needs to do is ensure that
  334. * kbasep_js_policy_dequeue_job() will return the jobs from the high priority
  335. * context.
  336. *
  337. * @note in SS state, we currently only use 2 job-slots (even for T608, but
  338. * this might change in future). In this case, it's always possible to schedule
  339. * out 2 ASs quickly (their jobs won't be in the HEAD registers). At the same
  340. * time, this maximizes usage of the job-slots (only 2 are in use), because you
  341. * can guarantee starting of the jobs from the High Priority contexts immediately too.
  342. *
  343. *
  344. *
  345. * @section sec_kbase_js_policy_operation_notes Notes
  346. *
  347. * - In this design, a separate 'init' is needed from dequeue/requeue, so that
  348. * information can be retained between the dequeue/requeue calls. For example,
  349. * the total time spent for a context/job could be logged between
  350. * dequeue/requeuing, to implement Fair Sharing. In this case, 'init' just
  351. * initializes that information to some known state.
  352. *
  353. *
  354. *
  355. */
  356. /**
  357. * @addtogroup base_api
  358. * @{
  359. */
  360. /**
  361. * @addtogroup base_kbase_api
  362. * @{
  363. */
  364. /**
  365. * @addtogroup kbase_js_policy Job Scheduler Policy APIs
  366. * @{
  367. *
  368. * <b>Refer to @ref page_kbase_js_policy for an overview and detailed operation of
  369. * the Job Scheduler Policy and its use from the Job Scheduler Core</b>.
  370. */
  371. /**
  372. * @brief Job Scheduler Policy structure
  373. */
  374. union kbasep_js_policy;
  375. /**
  376. * @brief Initialize the Job Scheduler Policy
  377. */
  378. int kbasep_js_policy_init(struct kbase_device *kbdev);
  379. /**
  380. * @brief Terminate the Job Scheduler Policy
  381. */
  382. void kbasep_js_policy_term(union kbasep_js_policy *js_policy);
  383. /**
  384. * @addtogroup kbase_js_policy_ctx Job Scheduler Policy, Context Management API
  385. * @{
  386. *
  387. * <b>Refer to @ref page_kbase_js_policy for an overview and detailed operation of
  388. * the Job Scheduler Policy and its use from the Job Scheduler Core</b>.
  389. */
  390. /**
  391. * @brief Job Scheduler Policy Ctx Info structure
  392. *
  393. * This structure is embedded in the struct kbase_context structure. It is used to:
  394. * - track information needed for the policy to schedule the context (e.g. time
  395. * used, OS priority etc.)
  396. * - link together kbase_contexts into a queue, so that a struct kbase_context can be
  397. * obtained as the container of the policy ctx info. This allows the API to
  398. * return what "the next context" should be.
  399. * - obtain other information already stored in the struct kbase_context for
  400. * scheduling purposes (e.g process ID to get the priority of the originating
  401. * process)
  402. */
  403. union kbasep_js_policy_ctx_info;
  404. /**
  405. * @brief Initialize a ctx for use with the Job Scheduler Policy
  406. *
  407. * This effectively initializes the union kbasep_js_policy_ctx_info structure within
  408. * the struct kbase_context (itself located within the kctx->jctx.sched_info structure).
  409. */
  410. int kbasep_js_policy_init_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
  411. /**
  412. * @brief Terminate resources associated with using a ctx in the Job Scheduler
  413. * Policy.
  414. */
  415. void kbasep_js_policy_term_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
  416. /**
  417. * @brief Enqueue a context onto the Job Scheduler Policy Queue
  418. *
  419. * If the context enqueued has a priority higher than any in the Run Pool, then
  420. * it is the Policy's responsibility to decide whether to schedule out a low
  421. * priority context from the Run Pool to allow the high priority context to be
  422. * scheduled in.
  423. *
  424. * If the context has the privileged flag set, it will always be kept at the
  425. * head of the queue.
  426. *
  427. * The caller will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
  428. * The caller will be holding kbasep_js_device_data::queue_mutex.
  429. */
  430. void kbasep_js_policy_enqueue_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
  431. /**
  432. * @brief Dequeue a context from the Head of the Job Scheduler Policy Queue
  433. *
  434. * The caller will be holding kbasep_js_device_data::queue_mutex.
  435. *
  436. * @return true if a context was available, and *kctx_ptr points to
  437. * the kctx dequeued.
  438. * @return false if no contexts were available.
  439. */
  440. bool kbasep_js_policy_dequeue_head_ctx(union kbasep_js_policy *js_policy, struct kbase_context ** const kctx_ptr);
  441. /**
  442. * @brief Evict a context from the Job Scheduler Policy Queue
  443. *
  444. * This is only called as part of destroying a kbase_context.
  445. *
  446. * There are many reasons why this might fail during the lifetime of a
  447. * context. For example, the context is in the process of being scheduled. In
  448. * that case a thread doing the scheduling might have a pointer to it, but the
  449. * context is neither in the Policy Queue, nor is it in the Run
  450. * Pool. Crucially, neither the Policy Queue, Run Pool, or the Context itself
  451. * are locked.
  452. *
  453. * Hence to find out where in the system the context is, it is important to do
  454. * more than just check the kbasep_js_kctx_info::ctx::is_scheduled member.
  455. *
  456. * The caller will be holding kbasep_js_device_data::queue_mutex.
  457. *
  458. * @return true if the context was evicted from the Policy Queue
  459. * @return false if the context was not found in the Policy Queue
  460. */
  461. bool kbasep_js_policy_try_evict_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
  462. /**
  463. * @brief Call a function on all jobs belonging to a non-queued, non-running
  464. * context, optionally detaching the jobs from the context as it goes.
  465. *
  466. * At the time of the call, the context is guarenteed to be not-currently
  467. * scheduled on the Run Pool (is_scheduled == false), and not present in
  468. * the Policy Queue. This is because one of the following functions was used
  469. * recently on the context:
  470. * - kbasep_js_policy_evict_ctx()
  471. * - kbasep_js_policy_runpool_remove_ctx()
  472. *
  473. * In both cases, no subsequent call was made on the context to any of:
  474. * - kbasep_js_policy_runpool_add_ctx()
  475. * - kbasep_js_policy_enqueue_ctx()
  476. *
  477. * Due to the locks that might be held at the time of the call, the callback
  478. * may need to defer work on a workqueue to complete its actions (e.g. when
  479. * cancelling jobs)
  480. *
  481. * \a detach_jobs must only be set when cancelling jobs (which occurs as part
  482. * of context destruction).
  483. *
  484. * The locking conditions on the caller are as follows:
  485. * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
  486. */
  487. void kbasep_js_policy_foreach_ctx_job(union kbasep_js_policy *js_policy, struct kbase_context *kctx,
  488. kbasep_js_policy_ctx_job_cb callback, bool detach_jobs);
  489. /**
  490. * @brief Add a context to the Job Scheduler Policy's Run Pool
  491. *
  492. * If the context enqueued has a priority higher than any in the Run Pool, then
  493. * it is the Policy's responsibility to decide whether to schedule out low
  494. * priority jobs that are currently running on the GPU.
  495. *
  496. * The number of contexts present in the Run Pool will never be more than the
  497. * number of Address Spaces.
  498. *
  499. * The following guarentees are made about the state of the system when this
  500. * is called:
  501. * - kctx->as_nr member is valid
  502. * - the context has its submit_allowed flag set
  503. * - kbasep_js_device_data::runpool_irq::per_as_data[kctx->as_nr] is valid
  504. * - The refcount of the context is guarenteed to be zero.
  505. * - kbasep_js_kctx_info::ctx::is_scheduled will be true.
  506. *
  507. * The locking conditions on the caller are as follows:
  508. * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
  509. * - it will be holding kbasep_js_device_data::runpool_mutex.
  510. * - it will be holding kbasep_js_device_data::runpool_irq::lock (a spinlock)
  511. *
  512. * Due to a spinlock being held, this function must not call any APIs that sleep.
  513. */
  514. void kbasep_js_policy_runpool_add_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
  515. /**
  516. * @brief Remove a context from the Job Scheduler Policy's Run Pool
  517. *
  518. * The kctx->as_nr member is valid and the context has its submit_allowed flag
  519. * set when this is called. The state of
  520. * kbasep_js_device_data::runpool_irq::per_as_data[kctx->as_nr] is also
  521. * valid. The refcount of the context is guarenteed to be zero.
  522. *
  523. * The locking conditions on the caller are as follows:
  524. * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
  525. * - it will be holding kbasep_js_device_data::runpool_mutex.
  526. * - it will be holding kbasep_js_device_data::runpool_irq::lock (a spinlock)
  527. *
  528. * Due to a spinlock being held, this function must not call any APIs that sleep.
  529. */
  530. void kbasep_js_policy_runpool_remove_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
  531. /**
  532. * @brief Indicate whether a context should be removed from the Run Pool
  533. * (should be scheduled out).
  534. *
  535. * The kbasep_js_device_data::runpool_irq::lock will be held by the caller.
  536. *
  537. * @note This API is called from IRQ context.
  538. */
  539. bool kbasep_js_policy_should_remove_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
  540. /**
  541. * @brief Synchronize with any timers acting upon the runpool
  542. *
  543. * The policy should check whether any timers it owns should be running. If
  544. * they should not, the policy must cancel such timers and ensure they are not
  545. * re-run by the time this function finishes.
  546. *
  547. * In particular, the timers must not be running when there are no more contexts
  548. * on the runpool, because the GPU could be powered off soon after this call.
  549. *
  550. * The locking conditions on the caller are as follows:
  551. * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
  552. * - it will be holding kbasep_js_device_data::runpool_mutex.
  553. */
  554. void kbasep_js_policy_runpool_timers_sync(union kbasep_js_policy *js_policy);
  555. /**
  556. * @brief Indicate whether a new context has an higher priority than the current context.
  557. *
  558. *
  559. * The caller has the following conditions on locking:
  560. * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held for \a new_ctx
  561. *
  562. * This function must not sleep, because an IRQ spinlock might be held whilst
  563. * this is called.
  564. *
  565. * @note There is nothing to stop the priority of \a current_ctx changing
  566. * during or immediately after this function is called (because its jsctx_mutex
  567. * cannot be held). Therefore, this function should only be seen as a heuristic
  568. * guide as to whether \a new_ctx is higher priority than \a current_ctx
  569. */
  570. bool kbasep_js_policy_ctx_has_priority(union kbasep_js_policy *js_policy, struct kbase_context *current_ctx, struct kbase_context *new_ctx);
  571. /** @} *//* end group kbase_js_policy_ctx */
  572. /**
  573. * @addtogroup kbase_js_policy_job Job Scheduler Policy, Job Chain Management API
  574. * @{
  575. *
  576. * <b>Refer to @ref page_kbase_js_policy for an overview and detailed operation of
  577. * the Job Scheduler Policy and its use from the Job Scheduler Core</b>.
  578. */
  579. /**
  580. * @brief Job Scheduler Policy Job Info structure
  581. *
  582. * This structure is embedded in the struct kbase_jd_atom structure. It is used to:
  583. * - track information needed for the policy to schedule the job (e.g. time
  584. * used, etc.)
  585. * - link together jobs into a queue/buffer, so that a struct kbase_jd_atom can be
  586. * obtained as the container of the policy job info. This allows the API to
  587. * return what "the next job" should be.
  588. */
  589. union kbasep_js_policy_job_info;
  590. /**
  591. * @brief Initialize a job for use with the Job Scheduler Policy
  592. *
  593. * This function initializes the union kbasep_js_policy_job_info structure within the
  594. * kbase_jd_atom. It will only initialize/allocate resources that are specific
  595. * to the job.
  596. *
  597. * That is, this function makes \b no attempt to:
  598. * - initialize any context/policy-wide information
  599. * - enqueue the job on the policy.
  600. *
  601. * At some later point, the following functions must be called on the job, in this order:
  602. * - kbasep_js_policy_register_job() to register the job and initialize policy/context wide data.
  603. * - kbasep_js_policy_enqueue_job() to enqueue the job
  604. *
  605. * A job must only ever be initialized on the Policy once, and must be
  606. * terminated on the Policy before the job is freed.
  607. *
  608. * The caller will not be holding any locks, and so this function will not
  609. * modify any information in \a kctx or \a js_policy.
  610. *
  611. * @return 0 if initialization was correct.
  612. */
  613. int kbasep_js_policy_init_job(const union kbasep_js_policy *js_policy, const struct kbase_context *kctx, struct kbase_jd_atom *katom);
  614. /**
  615. * @brief Register context/policy-wide information for a job on the Job Scheduler Policy.
  616. *
  617. * Registers the job with the policy. This is used to track the job before it
  618. * has been enqueued/requeued by kbasep_js_policy_enqueue_job(). Specifically,
  619. * it is used to update information under a lock that could not be updated at
  620. * kbasep_js_policy_init_job() time (such as context/policy-wide data).
  621. *
  622. * @note This function will not fail, and hence does not allocate any
  623. * resources. Any failures that could occur on registration will be caught
  624. * during kbasep_js_policy_init_job() instead.
  625. *
  626. * A job must only ever be registerd on the Policy once, and must be
  627. * deregistered on the Policy on completion (whether or not that completion was
  628. * success/failure).
  629. *
  630. * The caller has the following conditions on locking:
  631. * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held.
  632. */
  633. void kbasep_js_policy_register_job(union kbasep_js_policy *js_policy, struct kbase_context *kctx, struct kbase_jd_atom *katom);
  634. /**
  635. * @brief De-register context/policy-wide information for a on the Job Scheduler Policy.
  636. *
  637. * This must be used before terminating the resources associated with using a
  638. * job in the Job Scheduler Policy. This function does not itself terminate any
  639. * resources, at most it just updates information in the policy and context.
  640. *
  641. * The caller has the following conditions on locking:
  642. * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held.
  643. */
  644. void kbasep_js_policy_deregister_job(union kbasep_js_policy *js_policy, struct kbase_context *kctx, struct kbase_jd_atom *katom);
  645. /**
  646. * @brief Dequeue a Job for a job slot from the Job Scheduler Policy Run Pool
  647. *
  648. * The job returned by the policy will match at least one of the bits in the
  649. * job slot's core requirements (but it may match more than one, or all @ref
  650. * base_jd_core_req bits supported by the job slot).
  651. *
  652. * In addition, the requirements of the job returned will be a subset of those
  653. * requested - the job returned will not have requirements that \a job_slot_idx
  654. * cannot satisfy.
  655. *
  656. * The caller will submit the job to the GPU as soon as the GPU's NEXT register
  657. * for the corresponding slot is empty. Of course, the GPU will then only run
  658. * this new job when the currently executing job (in the jobslot's HEAD
  659. * register) has completed.
  660. *
  661. * @return true if a job was available, and *kctx_ptr points to
  662. * the kctx dequeued.
  663. * @return false if no jobs were available among all ctxs in the Run Pool.
  664. *
  665. * @note base_jd_core_req is currently a u8 - beware of type conversion.
  666. *
  667. * The caller has the following conditions on locking:
  668. * - kbasep_js_device_data::runpool_lock::irq will be held.
  669. * - kbasep_js_device_data::runpool_mutex will be held.
  670. * - kbasep_js_kctx_info::ctx::jsctx_mutex. will be held
  671. */
  672. bool kbasep_js_policy_dequeue_job(struct kbase_device *kbdev, int job_slot_idx, struct kbase_jd_atom ** const katom_ptr);
  673. /**
  674. * @brief Requeue a Job back into the Job Scheduler Policy Run Pool
  675. *
  676. * This will be used to enqueue a job after its creation and also to requeue
  677. * a job into the Run Pool that was previously dequeued (running). It notifies
  678. * the policy that the job should be run again at some point later.
  679. *
  680. * The caller has the following conditions on locking:
  681. * - kbasep_js_device_data::runpool_irq::lock (a spinlock) will be held.
  682. * - kbasep_js_device_data::runpool_mutex will be held.
  683. * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held.
  684. */
  685. void kbasep_js_policy_enqueue_job(union kbasep_js_policy *js_policy, struct kbase_jd_atom *katom);
  686. /**
  687. * @brief Log the result of a job: the time spent on a job/context, and whether
  688. * the job failed or not.
  689. *
  690. * Since a struct kbase_jd_atom contains a pointer to the struct kbase_context owning it,
  691. * then this can also be used to log time on either/both the job and the
  692. * containing context.
  693. *
  694. * The completion state of the job can be found by examining \a katom->event.event_code
  695. *
  696. * If the Job failed and the policy is implementing fair-sharing, then the
  697. * policy must penalize the failing job/context:
  698. * - At the very least, it should penalize the time taken by the amount of
  699. * time spent processing the IRQ in SW. This because a job in the NEXT slot
  700. * waiting to run will be delayed until the failing job has had the IRQ
  701. * cleared.
  702. * - \b Optionally, the policy could apply other penalties. For example, based
  703. * on a threshold of a number of failing jobs, after which a large penalty is
  704. * applied.
  705. *
  706. * The kbasep_js_device_data::runpool_mutex will be held by the caller.
  707. *
  708. * @note This API is called from IRQ context.
  709. *
  710. * The caller has the following conditions on locking:
  711. * - kbasep_js_device_data::runpool_irq::lock will be held.
  712. *
  713. * @param js_policy job scheduler policy
  714. * @param katom job dispatch atom
  715. * @param time_spent_us the time spent by the job, in microseconds (10^-6 seconds).
  716. */
  717. void kbasep_js_policy_log_job_result(union kbasep_js_policy *js_policy, struct kbase_jd_atom *katom, u64 time_spent_us);
  718. /** @} *//* end group kbase_js_policy_job */
  719. /** @} *//* end group kbase_js_policy */
  720. /** @} *//* end group base_kbase_api */
  721. /** @} *//* end group base_api */
  722. #endif /* _KBASE_JS_POLICY_H_ */