x86_pkg_temp_thermal.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558
  1. /*
  2. * x86_pkg_temp_thermal driver
  3. * Copyright (c) 2013, Intel Corporation.
  4. *
  5. * This program is free software; you can redistribute it and/or modify it
  6. * under the terms and conditions of the GNU General Public License,
  7. * version 2, as published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope it will be useful, but WITHOUT
  10. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11. * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  12. * more details.
  13. *
  14. * You should have received a copy of the GNU General Public License along with
  15. * this program; if not, write to the Free Software Foundation, Inc.
  16. *
  17. */
  18. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  19. #include <linux/module.h>
  20. #include <linux/init.h>
  21. #include <linux/err.h>
  22. #include <linux/param.h>
  23. #include <linux/device.h>
  24. #include <linux/platform_device.h>
  25. #include <linux/cpu.h>
  26. #include <linux/smp.h>
  27. #include <linux/slab.h>
  28. #include <linux/pm.h>
  29. #include <linux/thermal.h>
  30. #include <linux/debugfs.h>
  31. #include <asm/cpu_device_id.h>
  32. #include <asm/mce.h>
  33. /*
  34. * Rate control delay: Idea is to introduce denounce effect
  35. * This should be long enough to avoid reduce events, when
  36. * threshold is set to a temperature, which is constantly
  37. * violated, but at the short enough to take any action.
  38. * The action can be remove threshold or change it to next
  39. * interesting setting. Based on experiments, in around
  40. * every 5 seconds under load will give us a significant
  41. * temperature change.
  42. */
  43. #define PKG_TEMP_THERMAL_NOTIFY_DELAY 5000
  44. static int notify_delay_ms = PKG_TEMP_THERMAL_NOTIFY_DELAY;
  45. module_param(notify_delay_ms, int, 0644);
  46. MODULE_PARM_DESC(notify_delay_ms,
  47. "User space notification delay in milli seconds.");
  48. /* Number of trip points in thermal zone. Currently it can't
  49. * be more than 2. MSR can allow setting and getting notifications
  50. * for only 2 thresholds. This define enforces this, if there
  51. * is some wrong values returned by cpuid for number of thresholds.
  52. */
  53. #define MAX_NUMBER_OF_TRIPS 2
  54. struct pkg_device {
  55. int cpu;
  56. bool work_scheduled;
  57. u32 tj_max;
  58. u32 msr_pkg_therm_low;
  59. u32 msr_pkg_therm_high;
  60. struct delayed_work work;
  61. struct thermal_zone_device *tzone;
  62. struct cpumask cpumask;
  63. };
  64. static struct thermal_zone_params pkg_temp_tz_params = {
  65. .no_hwmon = true,
  66. };
  67. /* Keep track of how many package pointers we allocated in init() */
  68. static int max_packages __read_mostly;
  69. /* Array of package pointers */
  70. static struct pkg_device **packages;
  71. /* Serializes interrupt notification, work and hotplug */
  72. static DEFINE_SPINLOCK(pkg_temp_lock);
  73. /* Protects zone operation in the work function against hotplug removal */
  74. static DEFINE_MUTEX(thermal_zone_mutex);
  75. /* The dynamically assigned cpu hotplug state for module_exit() */
  76. static enum cpuhp_state pkg_thermal_hp_state __read_mostly;
  77. /* Debug counters to show using debugfs */
  78. static struct dentry *debugfs;
  79. static unsigned int pkg_interrupt_cnt;
  80. static unsigned int pkg_work_cnt;
  81. static int pkg_temp_debugfs_init(void)
  82. {
  83. struct dentry *d;
  84. debugfs = debugfs_create_dir("pkg_temp_thermal", NULL);
  85. if (!debugfs)
  86. return -ENOENT;
  87. d = debugfs_create_u32("pkg_thres_interrupt", S_IRUGO, debugfs,
  88. (u32 *)&pkg_interrupt_cnt);
  89. if (!d)
  90. goto err_out;
  91. d = debugfs_create_u32("pkg_thres_work", S_IRUGO, debugfs,
  92. (u32 *)&pkg_work_cnt);
  93. if (!d)
  94. goto err_out;
  95. return 0;
  96. err_out:
  97. debugfs_remove_recursive(debugfs);
  98. return -ENOENT;
  99. }
  100. /*
  101. * Protection:
  102. *
  103. * - cpu hotplug: Read serialized by cpu hotplug lock
  104. * Write must hold pkg_temp_lock
  105. *
  106. * - Other callsites: Must hold pkg_temp_lock
  107. */
  108. static struct pkg_device *pkg_temp_thermal_get_dev(unsigned int cpu)
  109. {
  110. int pkgid = topology_logical_package_id(cpu);
  111. if (pkgid >= 0 && pkgid < max_packages)
  112. return packages[pkgid];
  113. return NULL;
  114. }
  115. /*
  116. * tj-max is is interesting because threshold is set relative to this
  117. * temperature.
  118. */
  119. static int get_tj_max(int cpu, u32 *tj_max)
  120. {
  121. u32 eax, edx, val;
  122. int err;
  123. err = rdmsr_safe_on_cpu(cpu, MSR_IA32_TEMPERATURE_TARGET, &eax, &edx);
  124. if (err)
  125. return err;
  126. val = (eax >> 16) & 0xff;
  127. *tj_max = val * 1000;
  128. return val ? 0 : -EINVAL;
  129. }
  130. static int sys_get_curr_temp(struct thermal_zone_device *tzd, int *temp)
  131. {
  132. struct pkg_device *pkgdev = tzd->devdata;
  133. u32 eax, edx;
  134. rdmsr_on_cpu(pkgdev->cpu, MSR_IA32_PACKAGE_THERM_STATUS, &eax, &edx);
  135. if (eax & 0x80000000) {
  136. *temp = pkgdev->tj_max - ((eax >> 16) & 0x7f) * 1000;
  137. pr_debug("sys_get_curr_temp %d\n", *temp);
  138. return 0;
  139. }
  140. return -EINVAL;
  141. }
  142. static int sys_get_trip_temp(struct thermal_zone_device *tzd,
  143. int trip, int *temp)
  144. {
  145. struct pkg_device *pkgdev = tzd->devdata;
  146. unsigned long thres_reg_value;
  147. u32 mask, shift, eax, edx;
  148. int ret;
  149. if (trip >= MAX_NUMBER_OF_TRIPS)
  150. return -EINVAL;
  151. if (trip) {
  152. mask = THERM_MASK_THRESHOLD1;
  153. shift = THERM_SHIFT_THRESHOLD1;
  154. } else {
  155. mask = THERM_MASK_THRESHOLD0;
  156. shift = THERM_SHIFT_THRESHOLD0;
  157. }
  158. ret = rdmsr_on_cpu(pkgdev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
  159. &eax, &edx);
  160. if (ret < 0)
  161. return ret;
  162. thres_reg_value = (eax & mask) >> shift;
  163. if (thres_reg_value)
  164. *temp = pkgdev->tj_max - thres_reg_value * 1000;
  165. else
  166. *temp = 0;
  167. pr_debug("sys_get_trip_temp %d\n", *temp);
  168. return 0;
  169. }
  170. static int
  171. sys_set_trip_temp(struct thermal_zone_device *tzd, int trip, int temp)
  172. {
  173. struct pkg_device *pkgdev = tzd->devdata;
  174. u32 l, h, mask, shift, intr;
  175. int ret;
  176. if (trip >= MAX_NUMBER_OF_TRIPS || temp >= pkgdev->tj_max)
  177. return -EINVAL;
  178. ret = rdmsr_on_cpu(pkgdev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
  179. &l, &h);
  180. if (ret < 0)
  181. return ret;
  182. if (trip) {
  183. mask = THERM_MASK_THRESHOLD1;
  184. shift = THERM_SHIFT_THRESHOLD1;
  185. intr = THERM_INT_THRESHOLD1_ENABLE;
  186. } else {
  187. mask = THERM_MASK_THRESHOLD0;
  188. shift = THERM_SHIFT_THRESHOLD0;
  189. intr = THERM_INT_THRESHOLD0_ENABLE;
  190. }
  191. l &= ~mask;
  192. /*
  193. * When users space sets a trip temperature == 0, which is indication
  194. * that, it is no longer interested in receiving notifications.
  195. */
  196. if (!temp) {
  197. l &= ~intr;
  198. } else {
  199. l |= (pkgdev->tj_max - temp)/1000 << shift;
  200. l |= intr;
  201. }
  202. return wrmsr_on_cpu(pkgdev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
  203. }
  204. static int sys_get_trip_type(struct thermal_zone_device *thermal, int trip,
  205. enum thermal_trip_type *type)
  206. {
  207. *type = THERMAL_TRIP_PASSIVE;
  208. return 0;
  209. }
  210. /* Thermal zone callback registry */
  211. static struct thermal_zone_device_ops tzone_ops = {
  212. .get_temp = sys_get_curr_temp,
  213. .get_trip_temp = sys_get_trip_temp,
  214. .get_trip_type = sys_get_trip_type,
  215. .set_trip_temp = sys_set_trip_temp,
  216. };
  217. static bool pkg_thermal_rate_control(void)
  218. {
  219. return true;
  220. }
  221. /* Enable threshold interrupt on local package/cpu */
  222. static inline void enable_pkg_thres_interrupt(void)
  223. {
  224. u8 thres_0, thres_1;
  225. u32 l, h;
  226. rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
  227. /* only enable/disable if it had valid threshold value */
  228. thres_0 = (l & THERM_MASK_THRESHOLD0) >> THERM_SHIFT_THRESHOLD0;
  229. thres_1 = (l & THERM_MASK_THRESHOLD1) >> THERM_SHIFT_THRESHOLD1;
  230. if (thres_0)
  231. l |= THERM_INT_THRESHOLD0_ENABLE;
  232. if (thres_1)
  233. l |= THERM_INT_THRESHOLD1_ENABLE;
  234. wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
  235. }
  236. /* Disable threshold interrupt on local package/cpu */
  237. static inline void disable_pkg_thres_interrupt(void)
  238. {
  239. u32 l, h;
  240. rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
  241. l &= ~(THERM_INT_THRESHOLD0_ENABLE | THERM_INT_THRESHOLD1_ENABLE);
  242. wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
  243. }
  244. static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work)
  245. {
  246. struct thermal_zone_device *tzone = NULL;
  247. int cpu = smp_processor_id();
  248. struct pkg_device *pkgdev;
  249. u64 msr_val, wr_val;
  250. mutex_lock(&thermal_zone_mutex);
  251. spin_lock_irq(&pkg_temp_lock);
  252. ++pkg_work_cnt;
  253. pkgdev = pkg_temp_thermal_get_dev(cpu);
  254. if (!pkgdev) {
  255. spin_unlock_irq(&pkg_temp_lock);
  256. mutex_unlock(&thermal_zone_mutex);
  257. return;
  258. }
  259. pkgdev->work_scheduled = false;
  260. rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
  261. wr_val = msr_val & ~(THERM_LOG_THRESHOLD0 | THERM_LOG_THRESHOLD1);
  262. if (wr_val != msr_val) {
  263. wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS, wr_val);
  264. tzone = pkgdev->tzone;
  265. }
  266. enable_pkg_thres_interrupt();
  267. spin_unlock_irq(&pkg_temp_lock);
  268. /*
  269. * If tzone is not NULL, then thermal_zone_mutex will prevent the
  270. * concurrent removal in the cpu offline callback.
  271. */
  272. if (tzone)
  273. thermal_zone_device_update(tzone, THERMAL_EVENT_UNSPECIFIED);
  274. mutex_unlock(&thermal_zone_mutex);
  275. }
  276. static void pkg_thermal_schedule_work(int cpu, struct delayed_work *work)
  277. {
  278. unsigned long ms = msecs_to_jiffies(notify_delay_ms);
  279. schedule_delayed_work_on(cpu, work, ms);
  280. }
  281. static int pkg_thermal_notify(u64 msr_val)
  282. {
  283. int cpu = smp_processor_id();
  284. struct pkg_device *pkgdev;
  285. unsigned long flags;
  286. spin_lock_irqsave(&pkg_temp_lock, flags);
  287. ++pkg_interrupt_cnt;
  288. disable_pkg_thres_interrupt();
  289. /* Work is per package, so scheduling it once is enough. */
  290. pkgdev = pkg_temp_thermal_get_dev(cpu);
  291. if (pkgdev && !pkgdev->work_scheduled) {
  292. pkgdev->work_scheduled = true;
  293. pkg_thermal_schedule_work(pkgdev->cpu, &pkgdev->work);
  294. }
  295. spin_unlock_irqrestore(&pkg_temp_lock, flags);
  296. return 0;
  297. }
  298. static int pkg_temp_thermal_device_add(unsigned int cpu)
  299. {
  300. int pkgid = topology_logical_package_id(cpu);
  301. u32 tj_max, eax, ebx, ecx, edx;
  302. struct pkg_device *pkgdev;
  303. int thres_count, err;
  304. if (pkgid >= max_packages)
  305. return -ENOMEM;
  306. cpuid(6, &eax, &ebx, &ecx, &edx);
  307. thres_count = ebx & 0x07;
  308. if (!thres_count)
  309. return -ENODEV;
  310. thres_count = clamp_val(thres_count, 0, MAX_NUMBER_OF_TRIPS);
  311. err = get_tj_max(cpu, &tj_max);
  312. if (err)
  313. return err;
  314. pkgdev = kzalloc(sizeof(*pkgdev), GFP_KERNEL);
  315. if (!pkgdev)
  316. return -ENOMEM;
  317. INIT_DELAYED_WORK(&pkgdev->work, pkg_temp_thermal_threshold_work_fn);
  318. pkgdev->cpu = cpu;
  319. pkgdev->tj_max = tj_max;
  320. pkgdev->tzone = thermal_zone_device_register("x86_pkg_temp",
  321. thres_count,
  322. (thres_count == MAX_NUMBER_OF_TRIPS) ? 0x03 : 0x01,
  323. pkgdev, &tzone_ops, &pkg_temp_tz_params, 0, 0);
  324. if (IS_ERR(pkgdev->tzone)) {
  325. err = PTR_ERR(pkgdev->tzone);
  326. kfree(pkgdev);
  327. return err;
  328. }
  329. /* Store MSR value for package thermal interrupt, to restore at exit */
  330. rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, pkgdev->msr_pkg_therm_low,
  331. pkgdev->msr_pkg_therm_high);
  332. cpumask_set_cpu(cpu, &pkgdev->cpumask);
  333. spin_lock_irq(&pkg_temp_lock);
  334. packages[pkgid] = pkgdev;
  335. spin_unlock_irq(&pkg_temp_lock);
  336. return 0;
  337. }
  338. static int pkg_thermal_cpu_offline(unsigned int cpu)
  339. {
  340. struct pkg_device *pkgdev = pkg_temp_thermal_get_dev(cpu);
  341. bool lastcpu, was_target;
  342. int target;
  343. if (!pkgdev)
  344. return 0;
  345. target = cpumask_any_but(&pkgdev->cpumask, cpu);
  346. cpumask_clear_cpu(cpu, &pkgdev->cpumask);
  347. lastcpu = target >= nr_cpu_ids;
  348. /*
  349. * Remove the sysfs files, if this is the last cpu in the package
  350. * before doing further cleanups.
  351. */
  352. if (lastcpu) {
  353. struct thermal_zone_device *tzone = pkgdev->tzone;
  354. /*
  355. * We must protect against a work function calling
  356. * thermal_zone_update, after/while unregister. We null out
  357. * the pointer under the zone mutex, so the worker function
  358. * won't try to call.
  359. */
  360. mutex_lock(&thermal_zone_mutex);
  361. pkgdev->tzone = NULL;
  362. mutex_unlock(&thermal_zone_mutex);
  363. thermal_zone_device_unregister(tzone);
  364. }
  365. /* Protect against work and interrupts */
  366. spin_lock_irq(&pkg_temp_lock);
  367. /*
  368. * Check whether this cpu was the current target and store the new
  369. * one. When we drop the lock, then the interrupt notify function
  370. * will see the new target.
  371. */
  372. was_target = pkgdev->cpu == cpu;
  373. pkgdev->cpu = target;
  374. /*
  375. * If this is the last CPU in the package remove the package
  376. * reference from the array and restore the interrupt MSR. When we
  377. * drop the lock neither the interrupt notify function nor the
  378. * worker will see the package anymore.
  379. */
  380. if (lastcpu) {
  381. packages[topology_logical_package_id(cpu)] = NULL;
  382. /* After this point nothing touches the MSR anymore. */
  383. wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
  384. pkgdev->msr_pkg_therm_low, pkgdev->msr_pkg_therm_high);
  385. }
  386. /*
  387. * Check whether there is work scheduled and whether the work is
  388. * targeted at the outgoing CPU.
  389. */
  390. if (pkgdev->work_scheduled && was_target) {
  391. /*
  392. * To cancel the work we need to drop the lock, otherwise
  393. * we might deadlock if the work needs to be flushed.
  394. */
  395. spin_unlock_irq(&pkg_temp_lock);
  396. cancel_delayed_work_sync(&pkgdev->work);
  397. spin_lock_irq(&pkg_temp_lock);
  398. /*
  399. * If this is not the last cpu in the package and the work
  400. * did not run after we dropped the lock above, then we
  401. * need to reschedule the work, otherwise the interrupt
  402. * stays disabled forever.
  403. */
  404. if (!lastcpu && pkgdev->work_scheduled)
  405. pkg_thermal_schedule_work(target, &pkgdev->work);
  406. }
  407. spin_unlock_irq(&pkg_temp_lock);
  408. /* Final cleanup if this is the last cpu */
  409. if (lastcpu)
  410. kfree(pkgdev);
  411. return 0;
  412. }
  413. static int pkg_thermal_cpu_online(unsigned int cpu)
  414. {
  415. struct pkg_device *pkgdev = pkg_temp_thermal_get_dev(cpu);
  416. struct cpuinfo_x86 *c = &cpu_data(cpu);
  417. /* Paranoia check */
  418. if (!cpu_has(c, X86_FEATURE_DTHERM) || !cpu_has(c, X86_FEATURE_PTS))
  419. return -ENODEV;
  420. /* If the package exists, nothing to do */
  421. if (pkgdev) {
  422. cpumask_set_cpu(cpu, &pkgdev->cpumask);
  423. return 0;
  424. }
  425. return pkg_temp_thermal_device_add(cpu);
  426. }
  427. static const struct x86_cpu_id __initconst pkg_temp_thermal_ids[] = {
  428. { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_PTS },
  429. {}
  430. };
  431. MODULE_DEVICE_TABLE(x86cpu, pkg_temp_thermal_ids);
  432. static int __init pkg_temp_thermal_init(void)
  433. {
  434. int ret;
  435. if (!x86_match_cpu(pkg_temp_thermal_ids))
  436. return -ENODEV;
  437. max_packages = topology_max_packages();
  438. packages = kzalloc(max_packages * sizeof(struct pkg_device *), GFP_KERNEL);
  439. if (!packages)
  440. return -ENOMEM;
  441. ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "thermal/x86_pkg:online",
  442. pkg_thermal_cpu_online, pkg_thermal_cpu_offline);
  443. if (ret < 0)
  444. goto err;
  445. /* Store the state for module exit */
  446. pkg_thermal_hp_state = ret;
  447. platform_thermal_package_notify = pkg_thermal_notify;
  448. platform_thermal_package_rate_control = pkg_thermal_rate_control;
  449. /* Don't care if it fails */
  450. pkg_temp_debugfs_init();
  451. return 0;
  452. err:
  453. kfree(packages);
  454. return ret;
  455. }
  456. module_init(pkg_temp_thermal_init)
  457. static void __exit pkg_temp_thermal_exit(void)
  458. {
  459. platform_thermal_package_notify = NULL;
  460. platform_thermal_package_rate_control = NULL;
  461. cpuhp_remove_state(pkg_thermal_hp_state);
  462. debugfs_remove_recursive(debugfs);
  463. kfree(packages);
  464. }
  465. module_exit(pkg_temp_thermal_exit)
  466. MODULE_DESCRIPTION("X86 PKG TEMP Thermal Driver");
  467. MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
  468. MODULE_LICENSE("GPL v2");