123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400 |
- // SPDX-License-Identifier: GPL-2.0
- /*
- * Per Entity Load Tracking
- *
- * Copyright (C) 2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
- *
- * Interactivity improvements by Mike Galbraith
- * (C) 2007 Mike Galbraith <efault@gmx.de>
- *
- * Various enhancements by Dmitry Adamushko.
- * (C) 2007 Dmitry Adamushko <dmitry.adamushko@gmail.com>
- *
- * Group scheduling enhancements by Srivatsa Vaddagiri
- * Copyright IBM Corporation, 2007
- * Author: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
- *
- * Scaled math optimizations by Thomas Gleixner
- * Copyright (C) 2007, Thomas Gleixner <tglx@linutronix.de>
- *
- * Adaptive scheduling granularity, math enhancements by Peter Zijlstra
- * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
- *
- * Move PELT related code from fair.c into this pelt.c file
- * Author: Vincent Guittot <vincent.guittot@linaro.org>
- */
- #include <linux/sched.h>
- #include "sched.h"
- #include "sched-pelt.h"
- #include "pelt.h"
- /*
- * Approximate:
- * val * y^n, where y^32 ~= 0.5 (~1 scheduling period)
- */
- static u64 decay_load(u64 val, u64 n)
- {
- unsigned int local_n;
- if (unlikely(n > LOAD_AVG_PERIOD * 63))
- return 0;
- /* after bounds checking we can collapse to 32-bit */
- local_n = n;
- /*
- * As y^PERIOD = 1/2, we can combine
- * y^n = 1/2^(n/PERIOD) * y^(n%PERIOD)
- * With a look-up table which covers y^n (n<PERIOD)
- *
- * To achieve constant time decay_load.
- */
- if (unlikely(local_n >= LOAD_AVG_PERIOD)) {
- val >>= local_n / LOAD_AVG_PERIOD;
- local_n %= LOAD_AVG_PERIOD;
- }
- val = mul_u64_u32_shr(val, runnable_avg_yN_inv[local_n], 32);
- return val;
- }
- static u32 __accumulate_pelt_segments(u64 periods, u32 d1, u32 d3)
- {
- u32 c1, c2, c3 = d3; /* y^0 == 1 */
- /*
- * c1 = d1 y^p
- */
- c1 = decay_load((u64)d1, periods);
- /*
- * p-1
- * c2 = 1024 \Sum y^n
- * n=1
- *
- * inf inf
- * = 1024 ( \Sum y^n - \Sum y^n - y^0 )
- * n=0 n=p
- */
- c2 = LOAD_AVG_MAX - decay_load(LOAD_AVG_MAX, periods) - 1024;
- return c1 + c2 + c3;
- }
- #define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
- /*
- * Accumulate the three separate parts of the sum; d1 the remainder
- * of the last (incomplete) period, d2 the span of full periods and d3
- * the remainder of the (incomplete) current period.
- *
- * d1 d2 d3
- * ^ ^ ^
- * | | |
- * |<->|<----------------->|<--->|
- * ... |---x---|------| ... |------|-----x (now)
- *
- * p-1
- * u' = (u + d1) y^p + 1024 \Sum y^n + d3 y^0
- * n=1
- *
- * = u y^p + (Step 1)
- *
- * p-1
- * d1 y^p + 1024 \Sum y^n + d3 y^0 (Step 2)
- * n=1
- */
- static __always_inline u32
- accumulate_sum(u64 delta, int cpu, struct sched_avg *sa,
- unsigned long load, unsigned long runnable, int running)
- {
- unsigned long scale_freq, scale_cpu;
- u32 contrib = (u32)delta; /* p == 0 -> delta < 1024 */
- u64 periods;
- scale_freq = arch_scale_freq_capacity(cpu);
- scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
- delta += sa->period_contrib;
- periods = delta / 1024; /* A period is 1024us (~1ms) */
- /*
- * Step 1: decay old *_sum if we crossed period boundaries.
- */
- if (periods) {
- sa->load_sum = decay_load(sa->load_sum, periods);
- sa->runnable_load_sum =
- decay_load(sa->runnable_load_sum, periods);
- sa->util_sum = decay_load((u64)(sa->util_sum), periods);
- /*
- * Step 2
- */
- delta %= 1024;
- contrib = __accumulate_pelt_segments(periods,
- 1024 - sa->period_contrib, delta);
- }
- sa->period_contrib = delta;
- contrib = cap_scale(contrib, scale_freq);
- if (load)
- sa->load_sum += load * contrib;
- if (runnable)
- sa->runnable_load_sum += runnable * contrib;
- if (running)
- sa->util_sum += contrib * scale_cpu;
- return periods;
- }
- /*
- * We can represent the historical contribution to runnable average as the
- * coefficients of a geometric series. To do this we sub-divide our runnable
- * history into segments of approximately 1ms (1024us); label the segment that
- * occurred N-ms ago p_N, with p_0 corresponding to the current period, e.g.
- *
- * [<- 1024us ->|<- 1024us ->|<- 1024us ->| ...
- * p0 p1 p2
- * (now) (~1ms ago) (~2ms ago)
- *
- * Let u_i denote the fraction of p_i that the entity was runnable.
- *
- * We then designate the fractions u_i as our co-efficients, yielding the
- * following representation of historical load:
- * u_0 + u_1*y + u_2*y^2 + u_3*y^3 + ...
- *
- * We choose y based on the with of a reasonably scheduling period, fixing:
- * y^32 = 0.5
- *
- * This means that the contribution to load ~32ms ago (u_32) will be weighted
- * approximately half as much as the contribution to load within the last ms
- * (u_0).
- *
- * When a period "rolls over" and we have new u_0`, multiplying the previous
- * sum again by y is sufficient to update:
- * load_avg = u_0` + y*(u_0 + u_1*y + u_2*y^2 + ... )
- * = u_0 + u_1*y + u_2*y^2 + ... [re-labeling u_i --> u_{i+1}]
- */
- static __always_inline int
- ___update_load_sum(u64 now, int cpu, struct sched_avg *sa,
- unsigned long load, unsigned long runnable, int running)
- {
- u64 delta;
- delta = now - sa->last_update_time;
- /*
- * This should only happen when time goes backwards, which it
- * unfortunately does during sched clock init when we swap over to TSC.
- */
- if ((s64)delta < 0) {
- sa->last_update_time = now;
- return 0;
- }
- /*
- * Use 1024ns as the unit of measurement since it's a reasonable
- * approximation of 1us and fast to compute.
- */
- delta >>= 10;
- if (!delta)
- return 0;
- sa->last_update_time += delta << 10;
- /*
- * running is a subset of runnable (weight) so running can't be set if
- * runnable is clear. But there are some corner cases where the current
- * se has been already dequeued but cfs_rq->curr still points to it.
- * This means that weight will be 0 but not running for a sched_entity
- * but also for a cfs_rq if the latter becomes idle. As an example,
- * this happens during idle_balance() which calls
- * update_blocked_averages()
- */
- if (!load)
- runnable = running = 0;
- /*
- * Now we know we crossed measurement unit boundaries. The *_avg
- * accrues by two steps:
- *
- * Step 1: accumulate *_sum since last_update_time. If we haven't
- * crossed period boundaries, finish.
- */
- if (!accumulate_sum(delta, cpu, sa, load, runnable, running))
- return 0;
- return 1;
- }
- static __always_inline void
- ___update_load_avg(struct sched_avg *sa, unsigned long load, unsigned long runnable)
- {
- u32 divider = LOAD_AVG_MAX - 1024 + sa->period_contrib;
- /*
- * Step 2: update *_avg.
- */
- sa->load_avg = div_u64(load * sa->load_sum, divider);
- sa->runnable_load_avg = div_u64(runnable * sa->runnable_load_sum, divider);
- WRITE_ONCE(sa->util_avg, sa->util_sum / divider);
- }
- /*
- * sched_entity:
- *
- * task:
- * se_runnable() == se_weight()
- *
- * group: [ see update_cfs_group() ]
- * se_weight() = tg->weight * grq->load_avg / tg->load_avg
- * se_runnable() = se_weight(se) * grq->runnable_load_avg / grq->load_avg
- *
- * load_sum := runnable_sum
- * load_avg = se_weight(se) * runnable_avg
- *
- * runnable_load_sum := runnable_sum
- * runnable_load_avg = se_runnable(se) * runnable_avg
- *
- * XXX collapse load_sum and runnable_load_sum
- *
- * cfq_rq:
- *
- * load_sum = \Sum se_weight(se) * se->avg.load_sum
- * load_avg = \Sum se->avg.load_avg
- *
- * runnable_load_sum = \Sum se_runnable(se) * se->avg.runnable_load_sum
- * runnable_load_avg = \Sum se->avg.runable_load_avg
- */
- int __update_load_avg_blocked_se(u64 now, int cpu, struct sched_entity *se)
- {
- if (entity_is_task(se))
- se->runnable_weight = se->load.weight;
- if (___update_load_sum(now, cpu, &se->avg, 0, 0, 0)) {
- ___update_load_avg(&se->avg, se_weight(se), se_runnable(se));
- return 1;
- }
- return 0;
- }
- int __update_load_avg_se(u64 now, int cpu, struct cfs_rq *cfs_rq, struct sched_entity *se)
- {
- if (entity_is_task(se))
- se->runnable_weight = se->load.weight;
- if (___update_load_sum(now, cpu, &se->avg, !!se->on_rq, !!se->on_rq,
- cfs_rq->curr == se)) {
- ___update_load_avg(&se->avg, se_weight(se), se_runnable(se));
- cfs_se_util_change(&se->avg);
- return 1;
- }
- return 0;
- }
- int __update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq)
- {
- if (___update_load_sum(now, cpu, &cfs_rq->avg,
- scale_load_down(cfs_rq->load.weight),
- scale_load_down(cfs_rq->runnable_weight),
- cfs_rq->curr != NULL)) {
- ___update_load_avg(&cfs_rq->avg, 1, 1);
- return 1;
- }
- return 0;
- }
- /*
- * rt_rq:
- *
- * util_sum = \Sum se->avg.util_sum but se->avg.util_sum is not tracked
- * util_sum = cpu_scale * load_sum
- * runnable_load_sum = load_sum
- *
- * load_avg and runnable_load_avg are not supported and meaningless.
- *
- */
- int update_rt_rq_load_avg(u64 now, struct rq *rq, int running)
- {
- if (___update_load_sum(now, rq->cpu, &rq->avg_rt,
- running,
- running,
- running)) {
- ___update_load_avg(&rq->avg_rt, 1, 1);
- return 1;
- }
- return 0;
- }
- /*
- * dl_rq:
- *
- * util_sum = \Sum se->avg.util_sum but se->avg.util_sum is not tracked
- * util_sum = cpu_scale * load_sum
- * runnable_load_sum = load_sum
- *
- */
- int update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
- {
- if (___update_load_sum(now, rq->cpu, &rq->avg_dl,
- running,
- running,
- running)) {
- ___update_load_avg(&rq->avg_dl, 1, 1);
- return 1;
- }
- return 0;
- }
- #ifdef CONFIG_HAVE_SCHED_AVG_IRQ
- /*
- * irq:
- *
- * util_sum = \Sum se->avg.util_sum but se->avg.util_sum is not tracked
- * util_sum = cpu_scale * load_sum
- * runnable_load_sum = load_sum
- *
- */
- int update_irq_load_avg(struct rq *rq, u64 running)
- {
- int ret = 0;
- /*
- * We know the time that has been used by interrupt since last update
- * but we don't when. Let be pessimistic and assume that interrupt has
- * happened just before the update. This is not so far from reality
- * because interrupt will most probably wake up task and trig an update
- * of rq clock during which the metric si updated.
- * We start to decay with normal context time and then we add the
- * interrupt context time.
- * We can safely remove running from rq->clock because
- * rq->clock += delta with delta >= running
- */
- ret = ___update_load_sum(rq->clock - running, rq->cpu, &rq->avg_irq,
- 0,
- 0,
- 0);
- ret += ___update_load_sum(rq->clock, rq->cpu, &rq->avg_irq,
- 1,
- 1,
- 1);
- if (ret)
- ___update_load_avg(&rq->avg_irq, 1, 1);
- return ret;
- }
- #endif
|