123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729 |
- /* $OpenBSD: kern_sched.c,v 1.36 2015/03/14 03:38:50 jsg Exp $ */
- /*
- * Copyright (c) 2007, 2008 Artur Grabowski <art@openbsd.org>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
- #include <sys/param.h>
- #include <sys/sched.h>
- #include <sys/proc.h>
- #include <sys/kthread.h>
- #include <sys/systm.h>
- #include <sys/resourcevar.h>
- #include <sys/signalvar.h>
- #include <sys/mutex.h>
- #include <uvm/uvm_extern.h>
- void sched_kthreads_create(void *);
- int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p);
- struct proc *sched_steal_proc(struct cpu_info *);
- /*
- * To help choosing which cpu should run which process we keep track
- * of cpus which are currently idle and which cpus have processes
- * queued.
- */
- struct cpuset sched_idle_cpus;
- struct cpuset sched_queued_cpus;
- struct cpuset sched_all_cpus;
- /*
- * Some general scheduler counters.
- */
- uint64_t sched_nmigrations; /* Cpu migration counter */
- uint64_t sched_nomigrations; /* Cpu no migration counter */
- uint64_t sched_noidle; /* Times we didn't pick the idle task */
- uint64_t sched_stolen; /* Times we stole proc from other cpus */
- uint64_t sched_choose; /* Times we chose a cpu */
- uint64_t sched_wasidle; /* Times we came out of idle */
- /*
- * A few notes about cpu_switchto that is implemented in MD code.
- *
- * cpu_switchto takes two arguments, the old proc and the proc
- * it should switch to. The new proc will never be NULL, so we always have
- * a saved state that we need to switch to. The old proc however can
- * be NULL if the process is exiting. NULL for the old proc simply
- * means "don't bother saving old state".
- *
- * cpu_switchto is supposed to atomically load the new state of the process
- * including the pcb, pmap and setting curproc, the p_cpu pointer in the
- * proc and p_stat to SONPROC. Atomically with respect to interrupts, other
- * cpus in the system must not depend on this state being consistent.
- * Therefore no locking is necessary in cpu_switchto other than blocking
- * interrupts during the context switch.
- */
- /*
- * sched_init_cpu is called from main() for the boot cpu, then it's the
- * responsibility of the MD code to call it for all other cpus.
- */
- void
- sched_init_cpu(struct cpu_info *ci)
- {
- struct schedstate_percpu *spc = &ci->ci_schedstate;
- int i;
- for (i = 0; i < SCHED_NQS; i++)
- TAILQ_INIT(&spc->spc_qs[i]);
- spc->spc_idleproc = NULL;
- kthread_create_deferred(sched_kthreads_create, ci);
- LIST_INIT(&spc->spc_deadproc);
- /*
- * Slight hack here until the cpuset code handles cpu_info
- * structures.
- */
- cpuset_init_cpu(ci);
- cpuset_add(&sched_all_cpus, ci);
- }
- void
- sched_kthreads_create(void *v)
- {
- struct cpu_info *ci = v;
- struct schedstate_percpu *spc = &ci->ci_schedstate;
- static int num;
- if (fork1(&proc0, FORK_SHAREVM|FORK_SHAREFILES|FORK_NOZOMBIE|
- FORK_SYSTEM|FORK_SIGHAND|FORK_IDLE, NULL, 0, sched_idle, ci, NULL,
- &spc->spc_idleproc))
- panic("fork idle");
- /* Name it as specified. */
- snprintf(spc->spc_idleproc->p_comm, sizeof(spc->spc_idleproc->p_comm),
- "idle%d", num);
- num++;
- }
- void
- sched_idle(void *v)
- {
- struct schedstate_percpu *spc;
- struct proc *p = curproc;
- struct cpu_info *ci = v;
- int s;
- KERNEL_UNLOCK();
- spc = &ci->ci_schedstate;
- /*
- * First time we enter here, we're not supposed to idle,
- * just go away for a while.
- */
- SCHED_LOCK(s);
- cpuset_add(&sched_idle_cpus, ci);
- p->p_stat = SSLEEP;
- p->p_cpu = ci;
- atomic_setbits_int(&p->p_flag, P_CPUPEG);
- mi_switch();
- cpuset_del(&sched_idle_cpus, ci);
- SCHED_UNLOCK(s);
- KASSERT(ci == curcpu());
- KASSERT(curproc == spc->spc_idleproc);
- while (1) {
- while (!curcpu_is_idle()) {
- struct proc *dead;
- SCHED_LOCK(s);
- p->p_stat = SSLEEP;
- mi_switch();
- SCHED_UNLOCK(s);
- while ((dead = LIST_FIRST(&spc->spc_deadproc))) {
- LIST_REMOVE(dead, p_hash);
- exit2(dead);
- }
- }
- splassert(IPL_NONE);
- cpuset_add(&sched_idle_cpus, ci);
- cpu_idle_enter();
- while (spc->spc_whichqs == 0) {
- #ifdef MULTIPROCESSOR
- if (spc->spc_schedflags & SPCF_SHOULDHALT &&
- (spc->spc_schedflags & SPCF_HALTED) == 0) {
- cpuset_del(&sched_idle_cpus, ci);
- SCHED_LOCK(s);
- atomic_setbits_int(&spc->spc_schedflags,
- spc->spc_whichqs ? 0 : SPCF_HALTED);
- SCHED_UNLOCK(s);
- wakeup(spc);
- }
- #endif
- cpu_idle_cycle();
- }
- cpu_idle_leave();
- cpuset_del(&sched_idle_cpus, ci);
- }
- }
- /*
- * To free our address space we have to jump through a few hoops.
- * The freeing is done by the reaper, but until we have one reaper
- * per cpu, we have no way of putting this proc on the deadproc list
- * and waking up the reaper without risking having our address space and
- * stack torn from under us before we manage to switch to another proc.
- * Therefore we have a per-cpu list of dead processes where we put this
- * proc and have idle clean up that list and move it to the reaper list.
- * All this will be unnecessary once we can bind the reaper this cpu
- * and not risk having it switch to another in case it sleeps.
- */
- void
- sched_exit(struct proc *p)
- {
- struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
- struct timespec ts;
- struct proc *idle;
- int s;
- nanouptime(&ts);
- timespecsub(&ts, &spc->spc_runtime, &ts);
- timespecadd(&p->p_rtime, &ts, &p->p_rtime);
- LIST_INSERT_HEAD(&spc->spc_deadproc, p, p_hash);
- /* This process no longer needs to hold the kernel lock. */
- KERNEL_UNLOCK();
- SCHED_LOCK(s);
- idle = spc->spc_idleproc;
- idle->p_stat = SRUN;
- cpu_switchto(NULL, idle);
- panic("cpu_switchto returned");
- }
- /*
- * Run queue management.
- */
- void
- sched_init_runqueues(void)
- {
- }
- void
- setrunqueue(struct proc *p)
- {
- struct schedstate_percpu *spc;
- int queue = p->p_priority >> 2;
- SCHED_ASSERT_LOCKED();
- spc = &p->p_cpu->ci_schedstate;
- spc->spc_nrun++;
- TAILQ_INSERT_TAIL(&spc->spc_qs[queue], p, p_runq);
- spc->spc_whichqs |= (1 << queue);
- cpuset_add(&sched_queued_cpus, p->p_cpu);
- if (cpuset_isset(&sched_idle_cpus, p->p_cpu))
- cpu_unidle(p->p_cpu);
- }
- void
- remrunqueue(struct proc *p)
- {
- struct schedstate_percpu *spc;
- int queue = p->p_priority >> 2;
- SCHED_ASSERT_LOCKED();
- spc = &p->p_cpu->ci_schedstate;
- spc->spc_nrun--;
- TAILQ_REMOVE(&spc->spc_qs[queue], p, p_runq);
- if (TAILQ_EMPTY(&spc->spc_qs[queue])) {
- spc->spc_whichqs &= ~(1 << queue);
- if (spc->spc_whichqs == 0)
- cpuset_del(&sched_queued_cpus, p->p_cpu);
- }
- }
- struct proc *
- sched_chooseproc(void)
- {
- struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
- struct proc *p;
- int queue;
- SCHED_ASSERT_LOCKED();
- #ifdef MULTIPROCESSOR
- if (spc->spc_schedflags & SPCF_SHOULDHALT) {
- if (spc->spc_whichqs) {
- for (queue = 0; queue < SCHED_NQS; queue++) {
- while ((p = TAILQ_FIRST(&spc->spc_qs[queue]))) {
- remrunqueue(p);
- p->p_cpu = sched_choosecpu(p);
- KASSERT(p->p_cpu != curcpu());
- setrunqueue(p);
- }
- }
- }
- p = spc->spc_idleproc;
- KASSERT(p);
- KASSERT(p->p_wchan == NULL);
- p->p_stat = SRUN;
- return (p);
- }
- #endif
- again:
- if (spc->spc_whichqs) {
- queue = ffs(spc->spc_whichqs) - 1;
- p = TAILQ_FIRST(&spc->spc_qs[queue]);
- remrunqueue(p);
- sched_noidle++;
- KASSERT(p->p_stat == SRUN);
- } else if ((p = sched_steal_proc(curcpu())) == NULL) {
- p = spc->spc_idleproc;
- if (p == NULL) {
- int s;
- /*
- * We get here if someone decides to switch during
- * boot before forking kthreads, bleh.
- * This is kind of like a stupid idle loop.
- */
- #ifdef MULTIPROCESSOR
- __mp_unlock(&sched_lock);
- #endif
- spl0();
- delay(10);
- SCHED_LOCK(s);
- goto again;
- }
- KASSERT(p);
- p->p_stat = SRUN;
- }
- KASSERT(p->p_wchan == NULL);
- return (p);
- }
- struct cpu_info *
- sched_choosecpu_fork(struct proc *parent, int flags)
- {
- #ifdef MULTIPROCESSOR
- struct cpu_info *choice = NULL;
- fixpt_t load, best_load = ~0;
- int run, best_run = INT_MAX;
- struct cpu_info *ci;
- struct cpuset set;
- #if 0
- /*
- * XXX
- * Don't do this until we have a painless way to move the cpu in exec.
- * Preferably when nuking the old pmap and getting a new one on a
- * new cpu.
- */
- /*
- * PPWAIT forks are simple. We know that the parent will not
- * run until we exec and choose another cpu, so we just steal its
- * cpu.
- */
- if (flags & FORK_PPWAIT)
- return (parent->p_cpu);
- #endif
- /*
- * Look at all cpus that are currently idle and have nothing queued.
- * If there are none, pick the one with least queued procs first,
- * then the one with lowest load average.
- */
- cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus);
- cpuset_intersection(&set, &set, &sched_all_cpus);
- if (cpuset_first(&set) == NULL)
- cpuset_copy(&set, &sched_all_cpus);
- while ((ci = cpuset_first(&set)) != NULL) {
- cpuset_del(&set, ci);
- load = ci->ci_schedstate.spc_ldavg;
- run = ci->ci_schedstate.spc_nrun;
- if (choice == NULL || run < best_run ||
- (run == best_run &&load < best_load)) {
- choice = ci;
- best_load = load;
- best_run = run;
- }
- }
- return (choice);
- #else
- return (curcpu());
- #endif
- }
- struct cpu_info *
- sched_choosecpu(struct proc *p)
- {
- #ifdef MULTIPROCESSOR
- struct cpu_info *choice = NULL;
- int last_cost = INT_MAX;
- struct cpu_info *ci;
- struct cpuset set;
- /*
- * If pegged to a cpu, don't allow it to move.
- */
- if (p->p_flag & P_CPUPEG)
- return (p->p_cpu);
- sched_choose++;
- /*
- * Look at all cpus that are currently idle and have nothing queued.
- * If there are none, pick the cheapest of those.
- * (idle + queued could mean that the cpu is handling an interrupt
- * at this moment and haven't had time to leave idle yet).
- */
- cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus);
- cpuset_intersection(&set, &set, &sched_all_cpus);
- /*
- * First, just check if our current cpu is in that set, if it is,
- * this is simple.
- * Also, our cpu might not be idle, but if it's the current cpu
- * and it has nothing else queued and we're curproc, take it.
- */
- if (cpuset_isset(&set, p->p_cpu) ||
- (p->p_cpu == curcpu() && p->p_cpu->ci_schedstate.spc_nrun == 0 &&
- (p->p_cpu->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0 &&
- curproc == p)) {
- sched_wasidle++;
- return (p->p_cpu);
- }
- if (cpuset_first(&set) == NULL)
- cpuset_copy(&set, &sched_all_cpus);
- while ((ci = cpuset_first(&set)) != NULL) {
- int cost = sched_proc_to_cpu_cost(ci, p);
- if (choice == NULL || cost < last_cost) {
- choice = ci;
- last_cost = cost;
- }
- cpuset_del(&set, ci);
- }
- if (p->p_cpu != choice)
- sched_nmigrations++;
- else
- sched_nomigrations++;
- return (choice);
- #else
- return (curcpu());
- #endif
- }
- /*
- * Attempt to steal a proc from some cpu.
- */
- struct proc *
- sched_steal_proc(struct cpu_info *self)
- {
- struct proc *best = NULL;
- #ifdef MULTIPROCESSOR
- struct schedstate_percpu *spc;
- int bestcost = INT_MAX;
- struct cpu_info *ci;
- struct cpuset set;
- KASSERT((self->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0);
- cpuset_copy(&set, &sched_queued_cpus);
- while ((ci = cpuset_first(&set)) != NULL) {
- struct proc *p;
- int queue;
- int cost;
- cpuset_del(&set, ci);
- spc = &ci->ci_schedstate;
- queue = ffs(spc->spc_whichqs) - 1;
- TAILQ_FOREACH(p, &spc->spc_qs[queue], p_runq) {
- if (p->p_flag & P_CPUPEG)
- continue;
- cost = sched_proc_to_cpu_cost(self, p);
- if (best == NULL || cost < bestcost) {
- best = p;
- bestcost = cost;
- }
- }
- }
- if (best == NULL)
- return (NULL);
- spc = &best->p_cpu->ci_schedstate;
- remrunqueue(best);
- best->p_cpu = self;
- sched_stolen++;
- #endif
- return (best);
- }
- #ifdef MULTIPROCESSOR
- /*
- * Base 2 logarithm of an int. returns 0 for 0 (yeye, I know).
- */
- static int
- log2(unsigned int i)
- {
- int ret = 0;
- while (i >>= 1)
- ret++;
- return (ret);
- }
- /*
- * Calculate the cost of moving the proc to this cpu.
- *
- * What we want is some guesstimate of how much "performance" it will
- * cost us to move the proc here. Not just for caches and TLBs and NUMA
- * memory, but also for the proc itself. A highly loaded cpu might not
- * be the best candidate for this proc since it won't get run.
- *
- * Just total guesstimates for now.
- */
- int sched_cost_load = 1;
- int sched_cost_priority = 1;
- int sched_cost_runnable = 3;
- int sched_cost_resident = 1;
- #endif
- int
- sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p)
- {
- int cost = 0;
- #ifdef MULTIPROCESSOR
- struct schedstate_percpu *spc;
- int l2resident = 0;
- spc = &ci->ci_schedstate;
- /*
- * First, account for the priority of the proc we want to move.
- * More willing to move, the lower the priority of the destination
- * and the higher the priority of the proc.
- */
- if (!cpuset_isset(&sched_idle_cpus, ci)) {
- cost += (p->p_priority - spc->spc_curpriority) *
- sched_cost_priority;
- cost += sched_cost_runnable;
- }
- if (cpuset_isset(&sched_queued_cpus, ci))
- cost += spc->spc_nrun * sched_cost_runnable;
- /*
- * Higher load on the destination means we don't want to go there.
- */
- cost += ((sched_cost_load * spc->spc_ldavg) >> FSHIFT);
- /*
- * If the proc is on this cpu already, lower the cost by how much
- * it has been running and an estimate of its footprint.
- */
- if (p->p_cpu == ci && p->p_slptime == 0) {
- l2resident =
- log2(pmap_resident_count(p->p_vmspace->vm_map.pmap));
- cost -= l2resident * sched_cost_resident;
- }
- #endif
- return (cost);
- }
- /*
- * Peg a proc to a cpu.
- */
- void
- sched_peg_curproc(struct cpu_info *ci)
- {
- struct proc *p = curproc;
- int s;
- SCHED_LOCK(s);
- p->p_priority = p->p_usrpri;
- p->p_stat = SRUN;
- p->p_cpu = ci;
- atomic_setbits_int(&p->p_flag, P_CPUPEG);
- setrunqueue(p);
- p->p_ru.ru_nvcsw++;
- mi_switch();
- SCHED_UNLOCK(s);
- }
- #ifdef MULTIPROCESSOR
- void
- sched_start_secondary_cpus(void)
- {
- CPU_INFO_ITERATOR cii;
- struct cpu_info *ci;
- CPU_INFO_FOREACH(cii, ci) {
- struct schedstate_percpu *spc = &ci->ci_schedstate;
- if (CPU_IS_PRIMARY(ci))
- continue;
- cpuset_add(&sched_all_cpus, ci);
- atomic_clearbits_int(&spc->spc_schedflags,
- SPCF_SHOULDHALT | SPCF_HALTED);
- }
- }
- void
- sched_stop_secondary_cpus(void)
- {
- CPU_INFO_ITERATOR cii;
- struct cpu_info *ci;
- /*
- * Make sure we stop the secondary CPUs.
- */
- CPU_INFO_FOREACH(cii, ci) {
- struct schedstate_percpu *spc = &ci->ci_schedstate;
- if (CPU_IS_PRIMARY(ci))
- continue;
- cpuset_del(&sched_all_cpus, ci);
- atomic_setbits_int(&spc->spc_schedflags, SPCF_SHOULDHALT);
- }
- CPU_INFO_FOREACH(cii, ci) {
- struct schedstate_percpu *spc = &ci->ci_schedstate;
- struct sleep_state sls;
- if (CPU_IS_PRIMARY(ci))
- continue;
- while ((spc->spc_schedflags & SPCF_HALTED) == 0) {
- sleep_setup(&sls, spc, PZERO, "schedstate");
- sleep_finish(&sls,
- (spc->spc_schedflags & SPCF_HALTED) == 0);
- }
- }
- }
- #endif
- /*
- * Functions to manipulate cpu sets.
- */
- struct cpu_info *cpuset_infos[MAXCPUS];
- static struct cpuset cpuset_all;
- void
- cpuset_init_cpu(struct cpu_info *ci)
- {
- cpuset_add(&cpuset_all, ci);
- cpuset_infos[CPU_INFO_UNIT(ci)] = ci;
- }
- void
- cpuset_clear(struct cpuset *cs)
- {
- memset(cs, 0, sizeof(*cs));
- }
- void
- cpuset_add(struct cpuset *cs, struct cpu_info *ci)
- {
- unsigned int num = CPU_INFO_UNIT(ci);
- atomic_setbits_int(&cs->cs_set[num/32], (1 << (num % 32)));
- }
- void
- cpuset_del(struct cpuset *cs, struct cpu_info *ci)
- {
- unsigned int num = CPU_INFO_UNIT(ci);
- atomic_clearbits_int(&cs->cs_set[num/32], (1 << (num % 32)));
- }
- int
- cpuset_isset(struct cpuset *cs, struct cpu_info *ci)
- {
- unsigned int num = CPU_INFO_UNIT(ci);
- return (cs->cs_set[num/32] & (1 << (num % 32)));
- }
- void
- cpuset_add_all(struct cpuset *cs)
- {
- cpuset_copy(cs, &cpuset_all);
- }
- void
- cpuset_copy(struct cpuset *to, struct cpuset *from)
- {
- memcpy(to, from, sizeof(*to));
- }
- struct cpu_info *
- cpuset_first(struct cpuset *cs)
- {
- int i;
- for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
- if (cs->cs_set[i])
- return (cpuset_infos[i * 32 + ffs(cs->cs_set[i]) - 1]);
- return (NULL);
- }
- void
- cpuset_union(struct cpuset *to, struct cpuset *a, struct cpuset *b)
- {
- int i;
- for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
- to->cs_set[i] = a->cs_set[i] | b->cs_set[i];
- }
- void
- cpuset_intersection(struct cpuset *to, struct cpuset *a, struct cpuset *b)
- {
- int i;
- for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
- to->cs_set[i] = a->cs_set[i] & b->cs_set[i];
- }
- void
- cpuset_complement(struct cpuset *to, struct cpuset *a, struct cpuset *b)
- {
- int i;
- for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
- to->cs_set[i] = b->cs_set[i] & ~a->cs_set[i];
- }
|