123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634 |
- /*-
- * SPDX-License-Identifier: BSD-2-Clause
- *
- * Copyright (c) 2018 Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted providing that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
- * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
- #include <sys/types.h>
- #include <sys/param.h>
- #include <sys/sbuf.h>
- #include <sys/module.h>
- #include <sys/systm.h>
- #include <sys/errno.h>
- #include <sys/param.h>
- #include <sys/kernel.h>
- #include <sys/bus.h>
- #include <sys/cpu.h>
- #include <sys/smp.h>
- #include <sys/proc.h>
- #include <sys/sched.h>
- #include <machine/cpu.h>
- #include <machine/md_var.h>
- #include <machine/cputypes.h>
- #include <machine/specialreg.h>
- #include <contrib/dev/acpica/include/acpi.h>
- #include <dev/acpica/acpivar.h>
- #include <x86/cpufreq/hwpstate_intel_internal.h>
- #include "acpi_if.h"
- #include "cpufreq_if.h"
- extern uint64_t tsc_freq;
- static int intel_hwpstate_probe(device_t dev);
- static int intel_hwpstate_attach(device_t dev);
- static int intel_hwpstate_detach(device_t dev);
- static int intel_hwpstate_suspend(device_t dev);
- static int intel_hwpstate_resume(device_t dev);
- static int intel_hwpstate_get(device_t dev, struct cf_setting *cf);
- static int intel_hwpstate_type(device_t dev, int *type);
- static device_method_t intel_hwpstate_methods[] = {
- /* Device interface */
- DEVMETHOD(device_identify, intel_hwpstate_identify),
- DEVMETHOD(device_probe, intel_hwpstate_probe),
- DEVMETHOD(device_attach, intel_hwpstate_attach),
- DEVMETHOD(device_detach, intel_hwpstate_detach),
- DEVMETHOD(device_suspend, intel_hwpstate_suspend),
- DEVMETHOD(device_resume, intel_hwpstate_resume),
- /* cpufreq interface */
- DEVMETHOD(cpufreq_drv_get, intel_hwpstate_get),
- DEVMETHOD(cpufreq_drv_type, intel_hwpstate_type),
- DEVMETHOD_END
- };
- struct hwp_softc {
- device_t dev;
- bool hwp_notifications;
- bool hwp_activity_window;
- bool hwp_pref_ctrl;
- bool hwp_pkg_ctrl;
- bool hwp_pkg_ctrl_en;
- bool hwp_perf_bias;
- bool hwp_perf_bias_cached;
- uint64_t req; /* Cached copy of HWP_REQUEST */
- uint64_t hwp_energy_perf_bias; /* Cache PERF_BIAS */
- uint8_t high;
- uint8_t guaranteed;
- uint8_t efficient;
- uint8_t low;
- };
- static driver_t hwpstate_intel_driver = {
- "hwpstate_intel",
- intel_hwpstate_methods,
- sizeof(struct hwp_softc),
- };
- DRIVER_MODULE(hwpstate_intel, cpu, hwpstate_intel_driver, NULL, NULL);
- MODULE_VERSION(hwpstate_intel, 1);
- static bool hwpstate_pkg_ctrl_enable = true;
- SYSCTL_BOOL(_machdep, OID_AUTO, hwpstate_pkg_ctrl, CTLFLAG_RDTUN,
- &hwpstate_pkg_ctrl_enable, 0,
- "Set 1 (default) to enable package-level control, 0 to disable");
- static int
- intel_hwp_dump_sysctl_handler(SYSCTL_HANDLER_ARGS)
- {
- device_t dev;
- struct pcpu *pc;
- struct sbuf *sb;
- struct hwp_softc *sc;
- uint64_t data, data2;
- int ret;
- sc = (struct hwp_softc *)arg1;
- dev = sc->dev;
- pc = cpu_get_pcpu(dev);
- if (pc == NULL)
- return (ENXIO);
- sb = sbuf_new(NULL, NULL, 1024, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
- sbuf_putc(sb, '\n');
- thread_lock(curthread);
- sched_bind(curthread, pc->pc_cpuid);
- thread_unlock(curthread);
- rdmsr_safe(MSR_IA32_PM_ENABLE, &data);
- sbuf_printf(sb, "CPU%d: HWP %sabled\n", pc->pc_cpuid,
- ((data & 1) ? "En" : "Dis"));
- if (data == 0) {
- ret = 0;
- goto out;
- }
- rdmsr_safe(MSR_IA32_HWP_CAPABILITIES, &data);
- sbuf_printf(sb, "\tHighest Performance: %03ju\n", data & 0xff);
- sbuf_printf(sb, "\tGuaranteed Performance: %03ju\n", (data >> 8) & 0xff);
- sbuf_printf(sb, "\tEfficient Performance: %03ju\n", (data >> 16) & 0xff);
- sbuf_printf(sb, "\tLowest Performance: %03ju\n", (data >> 24) & 0xff);
- rdmsr_safe(MSR_IA32_HWP_REQUEST, &data);
- data2 = 0;
- if (sc->hwp_pkg_ctrl && (data & IA32_HWP_REQUEST_PACKAGE_CONTROL))
- rdmsr_safe(MSR_IA32_HWP_REQUEST_PKG, &data2);
- sbuf_putc(sb, '\n');
- #define pkg_print(x, name, offset) do { \
- if (!sc->hwp_pkg_ctrl || (data & x) != 0) \
- sbuf_printf(sb, "\t%s: %03u\n", name, \
- (unsigned)(data >> offset) & 0xff); \
- else \
- sbuf_printf(sb, "\t%s: %03u\n", name, \
- (unsigned)(data2 >> offset) & 0xff); \
- } while (0)
- pkg_print(IA32_HWP_REQUEST_EPP_VALID,
- "Requested Efficiency Performance Preference", 24);
- pkg_print(IA32_HWP_REQUEST_DESIRED_VALID,
- "Requested Desired Performance", 16);
- pkg_print(IA32_HWP_REQUEST_MAXIMUM_VALID,
- "Requested Maximum Performance", 8);
- pkg_print(IA32_HWP_REQUEST_MINIMUM_VALID,
- "Requested Minimum Performance", 0);
- #undef pkg_print
- sbuf_putc(sb, '\n');
- out:
- thread_lock(curthread);
- sched_unbind(curthread);
- thread_unlock(curthread);
- ret = sbuf_finish(sb);
- if (ret == 0)
- ret = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb));
- sbuf_delete(sb);
- return (ret);
- }
- static inline int
- percent_to_raw(int x)
- {
- MPASS(x <= 100 && x >= 0);
- return (0xff * x / 100);
- }
- /*
- * Given x * 10 in [0, 1000], round to the integer nearest x.
- *
- * This allows round-tripping nice human readable numbers through this
- * interface. Otherwise, user-provided percentages such as 25, 50, 75 get
- * rounded down to 24, 49, and 74, which is a bit ugly.
- */
- static inline int
- round10(int xtimes10)
- {
- return ((xtimes10 + 5) / 10);
- }
- static inline int
- raw_to_percent(int x)
- {
- MPASS(x <= 0xff && x >= 0);
- return (round10(x * 1000 / 0xff));
- }
- /* Range of MSR_IA32_ENERGY_PERF_BIAS is more limited: 0-0xf. */
- static inline int
- percent_to_raw_perf_bias(int x)
- {
- /*
- * Round up so that raw values present as nice round human numbers and
- * also round-trip to the same raw value.
- */
- MPASS(x <= 100 && x >= 0);
- return (((0xf * x) + 50) / 100);
- }
- static inline int
- raw_to_percent_perf_bias(int x)
- {
- /* Rounding to nice human numbers despite a step interval of 6.67%. */
- MPASS(x <= 0xf && x >= 0);
- return (((x * 20) / 0xf) * 5);
- }
- static int
- sysctl_epp_select(SYSCTL_HANDLER_ARGS)
- {
- struct hwp_softc *sc;
- device_t dev;
- struct pcpu *pc;
- uint64_t epb;
- uint32_t val;
- int ret;
- dev = oidp->oid_arg1;
- sc = device_get_softc(dev);
- if (!sc->hwp_pref_ctrl && !sc->hwp_perf_bias)
- return (ENODEV);
- pc = cpu_get_pcpu(dev);
- if (pc == NULL)
- return (ENXIO);
- thread_lock(curthread);
- sched_bind(curthread, pc->pc_cpuid);
- thread_unlock(curthread);
- if (sc->hwp_pref_ctrl) {
- val = (sc->req & IA32_HWP_REQUEST_ENERGY_PERFORMANCE_PREFERENCE) >> 24;
- val = raw_to_percent(val);
- } else {
- /*
- * If cpuid indicates EPP is not supported, the HWP controller
- * uses MSR_IA32_ENERGY_PERF_BIAS instead (Intel SDM §14.4.4).
- * This register is per-core (but not HT).
- */
- if (!sc->hwp_perf_bias_cached) {
- ret = rdmsr_safe(MSR_IA32_ENERGY_PERF_BIAS, &epb);
- if (ret)
- goto out;
- sc->hwp_energy_perf_bias = epb;
- sc->hwp_perf_bias_cached = true;
- }
- val = sc->hwp_energy_perf_bias &
- IA32_ENERGY_PERF_BIAS_POLICY_HINT_MASK;
- val = raw_to_percent_perf_bias(val);
- }
- MPASS(val >= 0 && val <= 100);
- ret = sysctl_handle_int(oidp, &val, 0, req);
- if (ret || req->newptr == NULL)
- goto out;
- if (val > 100) {
- ret = EINVAL;
- goto out;
- }
- if (sc->hwp_pref_ctrl) {
- val = percent_to_raw(val);
- sc->req =
- ((sc->req & ~IA32_HWP_REQUEST_ENERGY_PERFORMANCE_PREFERENCE)
- | (val << 24u));
- if (sc->hwp_pkg_ctrl_en)
- ret = wrmsr_safe(MSR_IA32_HWP_REQUEST_PKG, sc->req);
- else
- ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req);
- } else {
- val = percent_to_raw_perf_bias(val);
- MPASS((val & ~IA32_ENERGY_PERF_BIAS_POLICY_HINT_MASK) == 0);
- sc->hwp_energy_perf_bias =
- ((sc->hwp_energy_perf_bias &
- ~IA32_ENERGY_PERF_BIAS_POLICY_HINT_MASK) | val);
- ret = wrmsr_safe(MSR_IA32_ENERGY_PERF_BIAS,
- sc->hwp_energy_perf_bias);
- }
- out:
- thread_lock(curthread);
- sched_unbind(curthread);
- thread_unlock(curthread);
- return (ret);
- }
- void
- intel_hwpstate_identify(driver_t *driver, device_t parent)
- {
- if (device_find_child(parent, "hwpstate_intel", -1) != NULL)
- return;
- if (cpu_vendor_id != CPU_VENDOR_INTEL)
- return;
- if (resource_disabled("hwpstate_intel", 0))
- return;
- /*
- * Intel SDM 14.4.1 (HWP Programming Interfaces):
- * Availability of HWP baseline resource and capability,
- * CPUID.06H:EAX[bit 7]: If this bit is set, HWP provides several new
- * architectural MSRs: IA32_PM_ENABLE, IA32_HWP_CAPABILITIES,
- * IA32_HWP_REQUEST, IA32_HWP_STATUS.
- */
- if ((cpu_power_eax & CPUTPM1_HWP) == 0)
- return;
- if (BUS_ADD_CHILD(parent, 10, "hwpstate_intel", device_get_unit(parent))
- == NULL)
- device_printf(parent, "hwpstate_intel: add child failed\n");
- }
- static int
- intel_hwpstate_probe(device_t dev)
- {
- device_set_desc(dev, "Intel Speed Shift");
- return (BUS_PROBE_NOWILDCARD);
- }
- static int
- set_autonomous_hwp(struct hwp_softc *sc)
- {
- struct pcpu *pc;
- device_t dev;
- uint64_t caps;
- int ret;
- dev = sc->dev;
- pc = cpu_get_pcpu(dev);
- if (pc == NULL)
- return (ENXIO);
- thread_lock(curthread);
- sched_bind(curthread, pc->pc_cpuid);
- thread_unlock(curthread);
- /* XXX: Many MSRs aren't readable until feature is enabled */
- ret = wrmsr_safe(MSR_IA32_PM_ENABLE, 1);
- if (ret) {
- /*
- * This is actually a package-level MSR, and only the first
- * write is not ignored. So it is harmless to enable it across
- * all devices, and this allows us not to care especially in
- * which order cores (and packages) are probed. This error
- * condition should not happen given we gate on the HWP CPUID
- * feature flag, if the Intel SDM is correct.
- */
- device_printf(dev, "Failed to enable HWP for cpu%d (%d)\n",
- pc->pc_cpuid, ret);
- goto out;
- }
- ret = rdmsr_safe(MSR_IA32_HWP_REQUEST, &sc->req);
- if (ret) {
- device_printf(dev,
- "Failed to read HWP request MSR for cpu%d (%d)\n",
- pc->pc_cpuid, ret);
- goto out;
- }
- ret = rdmsr_safe(MSR_IA32_HWP_CAPABILITIES, &caps);
- if (ret) {
- device_printf(dev,
- "Failed to read HWP capabilities MSR for cpu%d (%d)\n",
- pc->pc_cpuid, ret);
- goto out;
- }
- /*
- * High and low are static; "guaranteed" is dynamic; and efficient is
- * also dynamic.
- */
- sc->high = IA32_HWP_CAPABILITIES_HIGHEST_PERFORMANCE(caps);
- sc->guaranteed = IA32_HWP_CAPABILITIES_GUARANTEED_PERFORMANCE(caps);
- sc->efficient = IA32_HWP_CAPABILITIES_EFFICIENT_PERFORMANCE(caps);
- sc->low = IA32_HWP_CAPABILITIES_LOWEST_PERFORMANCE(caps);
- /* hardware autonomous selection determines the performance target */
- sc->req &= ~IA32_HWP_DESIRED_PERFORMANCE;
- /* enable HW dynamic selection of window size */
- sc->req &= ~IA32_HWP_ACTIVITY_WINDOW;
- /* IA32_HWP_REQUEST.Minimum_Performance = IA32_HWP_CAPABILITIES.Lowest_Performance */
- sc->req &= ~IA32_HWP_MINIMUM_PERFORMANCE;
- sc->req |= sc->low;
- /* IA32_HWP_REQUEST.Maximum_Performance = IA32_HWP_CAPABILITIES.Highest_Performance. */
- sc->req &= ~IA32_HWP_REQUEST_MAXIMUM_PERFORMANCE;
- sc->req |= sc->high << 8;
- /* If supported, request package-level control for this CPU. */
- if (sc->hwp_pkg_ctrl_en)
- ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req |
- IA32_HWP_REQUEST_PACKAGE_CONTROL);
- else
- ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req);
- if (ret) {
- device_printf(dev,
- "Failed to setup%s autonomous HWP for cpu%d\n",
- sc->hwp_pkg_ctrl_en ? " PKG" : "", pc->pc_cpuid);
- goto out;
- }
- /* If supported, write the PKG-wide control MSR. */
- if (sc->hwp_pkg_ctrl_en) {
- /*
- * "The structure of the IA32_HWP_REQUEST_PKG MSR
- * (package-level) is identical to the IA32_HWP_REQUEST MSR
- * with the exception of the Package Control field, which does
- * not exist." (Intel SDM §14.4.4)
- */
- ret = wrmsr_safe(MSR_IA32_HWP_REQUEST_PKG, sc->req);
- if (ret) {
- device_printf(dev,
- "Failed to set autonomous HWP for package\n");
- }
- }
- out:
- thread_lock(curthread);
- sched_unbind(curthread);
- thread_unlock(curthread);
- return (ret);
- }
- static int
- intel_hwpstate_attach(device_t dev)
- {
- struct hwp_softc *sc;
- int ret;
- sc = device_get_softc(dev);
- sc->dev = dev;
- /* eax */
- if (cpu_power_eax & CPUTPM1_HWP_NOTIFICATION)
- sc->hwp_notifications = true;
- if (cpu_power_eax & CPUTPM1_HWP_ACTIVITY_WINDOW)
- sc->hwp_activity_window = true;
- if (cpu_power_eax & CPUTPM1_HWP_PERF_PREF)
- sc->hwp_pref_ctrl = true;
- if (cpu_power_eax & CPUTPM1_HWP_PKG)
- sc->hwp_pkg_ctrl = true;
- /* Allow administrators to disable pkg-level control. */
- sc->hwp_pkg_ctrl_en = (sc->hwp_pkg_ctrl && hwpstate_pkg_ctrl_enable);
- /* ecx */
- if (cpu_power_ecx & CPUID_PERF_BIAS)
- sc->hwp_perf_bias = true;
- ret = set_autonomous_hwp(sc);
- if (ret)
- return (ret);
- SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
- SYSCTL_STATIC_CHILDREN(_debug), OID_AUTO, device_get_nameunit(dev),
- CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_MPSAFE,
- sc, 0, intel_hwp_dump_sysctl_handler, "A", "");
- SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
- SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
- "epp", CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, dev, 0,
- sysctl_epp_select, "I",
- "Efficiency/Performance Preference "
- "(range from 0, most performant, through 100, most efficient)");
- return (cpufreq_register(dev));
- }
- static int
- intel_hwpstate_detach(device_t dev)
- {
- return (cpufreq_unregister(dev));
- }
- static int
- intel_hwpstate_get(device_t dev, struct cf_setting *set)
- {
- struct pcpu *pc;
- uint64_t rate;
- int ret;
- if (set == NULL)
- return (EINVAL);
- pc = cpu_get_pcpu(dev);
- if (pc == NULL)
- return (ENXIO);
- memset(set, CPUFREQ_VAL_UNKNOWN, sizeof(*set));
- set->dev = dev;
- ret = cpu_est_clockrate(pc->pc_cpuid, &rate);
- if (ret == 0)
- set->freq = rate / 1000000;
- set->volts = CPUFREQ_VAL_UNKNOWN;
- set->power = CPUFREQ_VAL_UNKNOWN;
- set->lat = CPUFREQ_VAL_UNKNOWN;
- return (0);
- }
- static int
- intel_hwpstate_type(device_t dev, int *type)
- {
- if (type == NULL)
- return (EINVAL);
- *type = CPUFREQ_TYPE_ABSOLUTE | CPUFREQ_FLAG_INFO_ONLY | CPUFREQ_FLAG_UNCACHED;
- return (0);
- }
- static int
- intel_hwpstate_suspend(device_t dev)
- {
- return (0);
- }
- /*
- * Redo a subset of set_autonomous_hwp on resume; untested. Without this,
- * testers observed that on resume MSR_IA32_HWP_REQUEST was bogus.
- */
- static int
- intel_hwpstate_resume(device_t dev)
- {
- struct hwp_softc *sc;
- struct pcpu *pc;
- int ret;
- sc = device_get_softc(dev);
- pc = cpu_get_pcpu(dev);
- if (pc == NULL)
- return (ENXIO);
- thread_lock(curthread);
- sched_bind(curthread, pc->pc_cpuid);
- thread_unlock(curthread);
- ret = wrmsr_safe(MSR_IA32_PM_ENABLE, 1);
- if (ret) {
- device_printf(dev,
- "Failed to enable HWP for cpu%d after suspend (%d)\n",
- pc->pc_cpuid, ret);
- goto out;
- }
- if (sc->hwp_pkg_ctrl_en)
- ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req |
- IA32_HWP_REQUEST_PACKAGE_CONTROL);
- else
- ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req);
- if (ret) {
- device_printf(dev,
- "Failed to set%s autonomous HWP for cpu%d after suspend\n",
- sc->hwp_pkg_ctrl_en ? " PKG" : "", pc->pc_cpuid);
- goto out;
- }
- if (sc->hwp_pkg_ctrl_en) {
- ret = wrmsr_safe(MSR_IA32_HWP_REQUEST_PKG, sc->req);
- if (ret) {
- device_printf(dev,
- "Failed to set autonomous HWP for package after "
- "suspend\n");
- goto out;
- }
- }
- if (!sc->hwp_pref_ctrl && sc->hwp_perf_bias_cached) {
- ret = wrmsr_safe(MSR_IA32_ENERGY_PERF_BIAS,
- sc->hwp_energy_perf_bias);
- if (ret) {
- device_printf(dev,
- "Failed to set energy perf bias for cpu%d after "
- "suspend\n", pc->pc_cpuid);
- }
- }
- out:
- thread_lock(curthread);
- sched_unbind(curthread);
- thread_unlock(curthread);
- return (ret);
- }
|