cpu_cooling.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821
  1. /*
  2. * linux/drivers/thermal/cpu_cooling.c
  3. *
  4. * Copyright (C) 2012 Samsung Electronics Co., Ltd(http://www.samsung.com)
  5. * Copyright (C) 2012 Amit Daniel <amit.kachhap@linaro.org>
  6. *
  7. * Copyright (C) 2014 Viresh Kumar <viresh.kumar@linaro.org>
  8. *
  9. * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  10. * This program is free software; you can redistribute it and/or modify
  11. * it under the terms of the GNU General Public License as published by
  12. * the Free Software Foundation; version 2 of the License.
  13. *
  14. * This program is distributed in the hope that it will be useful, but
  15. * WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. * General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU General Public License along
  20. * with this program; if not, write to the Free Software Foundation, Inc.,
  21. * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
  22. *
  23. * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  24. */
  25. #include <linux/module.h>
  26. #include <linux/thermal.h>
  27. #include <linux/cpufreq.h>
  28. #include <linux/err.h>
  29. #include <linux/idr.h>
  30. #include <linux/pm_opp.h>
  31. #include <linux/slab.h>
  32. #include <linux/cpu.h>
  33. #include <linux/cpu_cooling.h>
  34. #include <trace/events/thermal.h>
  35. /*
  36. * Cooling state <-> CPUFreq frequency
  37. *
  38. * Cooling states are translated to frequencies throughout this driver and this
  39. * is the relation between them.
  40. *
  41. * Highest cooling state corresponds to lowest possible frequency.
  42. *
  43. * i.e.
  44. * level 0 --> 1st Max Freq
  45. * level 1 --> 2nd Max Freq
  46. * ...
  47. */
  48. /**
  49. * struct freq_table - frequency table along with power entries
  50. * @frequency: frequency in KHz
  51. * @power: power in mW
  52. *
  53. * This structure is built when the cooling device registers and helps
  54. * in translating frequency to power and vice versa.
  55. */
  56. struct freq_table {
  57. u32 frequency;
  58. u32 power;
  59. };
  60. /**
  61. * struct time_in_idle - Idle time stats
  62. * @time: previous reading of the absolute time that this cpu was idle
  63. * @timestamp: wall time of the last invocation of get_cpu_idle_time_us()
  64. */
  65. struct time_in_idle {
  66. u64 time;
  67. u64 timestamp;
  68. };
  69. /**
  70. * struct cpufreq_cooling_device - data for cooling device with cpufreq
  71. * @id: unique integer value corresponding to each cpufreq_cooling_device
  72. * registered.
  73. * @last_load: load measured by the latest call to cpufreq_get_requested_power()
  74. * @cpufreq_state: integer value representing the current state of cpufreq
  75. * cooling devices.
  76. * @clipped_freq: integer value representing the absolute value of the clipped
  77. * frequency.
  78. * @max_level: maximum cooling level. One less than total number of valid
  79. * cpufreq frequencies.
  80. * @freq_table: Freq table in descending order of frequencies
  81. * @cdev: thermal_cooling_device pointer to keep track of the
  82. * registered cooling device.
  83. * @policy: cpufreq policy.
  84. * @node: list_head to link all cpufreq_cooling_device together.
  85. * @idle_time: idle time stats
  86. *
  87. * This structure is required for keeping information of each registered
  88. * cpufreq_cooling_device.
  89. */
  90. struct cpufreq_cooling_device {
  91. int id;
  92. u32 last_load;
  93. unsigned int cpufreq_state;
  94. unsigned int clipped_freq;
  95. unsigned int max_level;
  96. struct freq_table *freq_table; /* In descending order */
  97. struct thermal_cooling_device *cdev;
  98. struct cpufreq_policy *policy;
  99. struct list_head node;
  100. struct time_in_idle *idle_time;
  101. };
  102. static DEFINE_IDA(cpufreq_ida);
  103. static DEFINE_MUTEX(cooling_list_lock);
  104. static LIST_HEAD(cpufreq_cdev_list);
  105. /* Below code defines functions to be used for cpufreq as cooling device */
  106. /**
  107. * get_level: Find the level for a particular frequency
  108. * @cpufreq_cdev: cpufreq_cdev for which the property is required
  109. * @freq: Frequency
  110. *
  111. * Return: level corresponding to the frequency.
  112. */
  113. static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_cdev,
  114. unsigned int freq)
  115. {
  116. struct freq_table *freq_table = cpufreq_cdev->freq_table;
  117. unsigned long level;
  118. for (level = 1; level <= cpufreq_cdev->max_level; level++)
  119. if (freq > freq_table[level].frequency)
  120. break;
  121. return level - 1;
  122. }
  123. /**
  124. * cpufreq_thermal_notifier - notifier callback for cpufreq policy change.
  125. * @nb: struct notifier_block * with callback info.
  126. * @event: value showing cpufreq event for which this function invoked.
  127. * @data: callback-specific data
  128. *
  129. * Callback to hijack the notification on cpufreq policy transition.
  130. * Every time there is a change in policy, we will intercept and
  131. * update the cpufreq policy with thermal constraints.
  132. *
  133. * Return: 0 (success)
  134. */
  135. static int cpufreq_thermal_notifier(struct notifier_block *nb,
  136. unsigned long event, void *data)
  137. {
  138. struct cpufreq_policy *policy = data;
  139. unsigned long clipped_freq;
  140. struct cpufreq_cooling_device *cpufreq_cdev;
  141. if (event != CPUFREQ_ADJUST)
  142. return NOTIFY_DONE;
  143. mutex_lock(&cooling_list_lock);
  144. list_for_each_entry(cpufreq_cdev, &cpufreq_cdev_list, node) {
  145. /*
  146. * A new copy of the policy is sent to the notifier and can't
  147. * compare that directly.
  148. */
  149. if (policy->cpu != cpufreq_cdev->policy->cpu)
  150. continue;
  151. /*
  152. * policy->max is the maximum allowed frequency defined by user
  153. * and clipped_freq is the maximum that thermal constraints
  154. * allow.
  155. *
  156. * If clipped_freq is lower than policy->max, then we need to
  157. * readjust policy->max.
  158. *
  159. * But, if clipped_freq is greater than policy->max, we don't
  160. * need to do anything.
  161. */
  162. clipped_freq = cpufreq_cdev->clipped_freq;
  163. if (policy->max > clipped_freq)
  164. cpufreq_verify_within_limits(policy, 0, clipped_freq);
  165. break;
  166. }
  167. mutex_unlock(&cooling_list_lock);
  168. return NOTIFY_OK;
  169. }
  170. /**
  171. * update_freq_table() - Update the freq table with power numbers
  172. * @cpufreq_cdev: the cpufreq cooling device in which to update the table
  173. * @capacitance: dynamic power coefficient for these cpus
  174. *
  175. * Update the freq table with power numbers. This table will be used in
  176. * cpu_power_to_freq() and cpu_freq_to_power() to convert between power and
  177. * frequency efficiently. Power is stored in mW, frequency in KHz. The
  178. * resulting table is in descending order.
  179. *
  180. * Return: 0 on success, -EINVAL if there are no OPPs for any CPUs,
  181. * or -ENOMEM if we run out of memory.
  182. */
  183. static int update_freq_table(struct cpufreq_cooling_device *cpufreq_cdev,
  184. u32 capacitance)
  185. {
  186. struct freq_table *freq_table = cpufreq_cdev->freq_table;
  187. struct dev_pm_opp *opp;
  188. struct device *dev = NULL;
  189. int num_opps = 0, cpu = cpufreq_cdev->policy->cpu, i;
  190. dev = get_cpu_device(cpu);
  191. if (unlikely(!dev)) {
  192. dev_warn(&cpufreq_cdev->cdev->device,
  193. "No cpu device for cpu %d\n", cpu);
  194. return -ENODEV;
  195. }
  196. num_opps = dev_pm_opp_get_opp_count(dev);
  197. if (num_opps < 0)
  198. return num_opps;
  199. /*
  200. * The cpufreq table is also built from the OPP table and so the count
  201. * should match.
  202. */
  203. if (num_opps != cpufreq_cdev->max_level + 1) {
  204. dev_warn(dev, "Number of OPPs not matching with max_levels\n");
  205. return -EINVAL;
  206. }
  207. for (i = 0; i <= cpufreq_cdev->max_level; i++) {
  208. unsigned long freq = freq_table[i].frequency * 1000;
  209. u32 freq_mhz = freq_table[i].frequency / 1000;
  210. u64 power;
  211. u32 voltage_mv;
  212. /*
  213. * Find ceil frequency as 'freq' may be slightly lower than OPP
  214. * freq due to truncation while converting to kHz.
  215. */
  216. opp = dev_pm_opp_find_freq_ceil(dev, &freq);
  217. if (IS_ERR(opp)) {
  218. dev_err(dev, "failed to get opp for %lu frequency\n",
  219. freq);
  220. return -EINVAL;
  221. }
  222. voltage_mv = dev_pm_opp_get_voltage(opp) / 1000;
  223. dev_pm_opp_put(opp);
  224. /*
  225. * Do the multiplication with MHz and millivolt so as
  226. * to not overflow.
  227. */
  228. power = (u64)capacitance * freq_mhz * voltage_mv * voltage_mv;
  229. do_div(power, 1000000000);
  230. /* power is stored in mW */
  231. freq_table[i].power = power;
  232. }
  233. return 0;
  234. }
  235. static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_cdev,
  236. u32 freq)
  237. {
  238. int i;
  239. struct freq_table *freq_table = cpufreq_cdev->freq_table;
  240. for (i = 1; i <= cpufreq_cdev->max_level; i++)
  241. if (freq > freq_table[i].frequency)
  242. break;
  243. return freq_table[i - 1].power;
  244. }
  245. static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_cdev,
  246. u32 power)
  247. {
  248. int i;
  249. struct freq_table *freq_table = cpufreq_cdev->freq_table;
  250. for (i = 1; i <= cpufreq_cdev->max_level; i++)
  251. if (power > freq_table[i].power)
  252. break;
  253. return freq_table[i - 1].frequency;
  254. }
  255. /**
  256. * get_load() - get load for a cpu since last updated
  257. * @cpufreq_cdev: &struct cpufreq_cooling_device for this cpu
  258. * @cpu: cpu number
  259. * @cpu_idx: index of the cpu in time_in_idle*
  260. *
  261. * Return: The average load of cpu @cpu in percentage since this
  262. * function was last called.
  263. */
  264. static u32 get_load(struct cpufreq_cooling_device *cpufreq_cdev, int cpu,
  265. int cpu_idx)
  266. {
  267. u32 load;
  268. u64 now, now_idle, delta_time, delta_idle;
  269. struct time_in_idle *idle_time = &cpufreq_cdev->idle_time[cpu_idx];
  270. now_idle = get_cpu_idle_time(cpu, &now, 0);
  271. delta_idle = now_idle - idle_time->time;
  272. delta_time = now - idle_time->timestamp;
  273. if (delta_time <= delta_idle)
  274. load = 0;
  275. else
  276. load = div64_u64(100 * (delta_time - delta_idle), delta_time);
  277. idle_time->time = now_idle;
  278. idle_time->timestamp = now;
  279. return load;
  280. }
  281. /**
  282. * get_dynamic_power() - calculate the dynamic power
  283. * @cpufreq_cdev: &cpufreq_cooling_device for this cdev
  284. * @freq: current frequency
  285. *
  286. * Return: the dynamic power consumed by the cpus described by
  287. * @cpufreq_cdev.
  288. */
  289. static u32 get_dynamic_power(struct cpufreq_cooling_device *cpufreq_cdev,
  290. unsigned long freq)
  291. {
  292. u32 raw_cpu_power;
  293. raw_cpu_power = cpu_freq_to_power(cpufreq_cdev, freq);
  294. return (raw_cpu_power * cpufreq_cdev->last_load) / 100;
  295. }
  296. /* cpufreq cooling device callback functions are defined below */
  297. /**
  298. * cpufreq_get_max_state - callback function to get the max cooling state.
  299. * @cdev: thermal cooling device pointer.
  300. * @state: fill this variable with the max cooling state.
  301. *
  302. * Callback for the thermal cooling device to return the cpufreq
  303. * max cooling state.
  304. *
  305. * Return: 0 on success, an error code otherwise.
  306. */
  307. static int cpufreq_get_max_state(struct thermal_cooling_device *cdev,
  308. unsigned long *state)
  309. {
  310. struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
  311. *state = cpufreq_cdev->max_level;
  312. return 0;
  313. }
  314. /**
  315. * cpufreq_get_cur_state - callback function to get the current cooling state.
  316. * @cdev: thermal cooling device pointer.
  317. * @state: fill this variable with the current cooling state.
  318. *
  319. * Callback for the thermal cooling device to return the cpufreq
  320. * current cooling state.
  321. *
  322. * Return: 0 on success, an error code otherwise.
  323. */
  324. static int cpufreq_get_cur_state(struct thermal_cooling_device *cdev,
  325. unsigned long *state)
  326. {
  327. struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
  328. *state = cpufreq_cdev->cpufreq_state;
  329. return 0;
  330. }
  331. /**
  332. * cpufreq_set_cur_state - callback function to set the current cooling state.
  333. * @cdev: thermal cooling device pointer.
  334. * @state: set this variable to the current cooling state.
  335. *
  336. * Callback for the thermal cooling device to change the cpufreq
  337. * current cooling state.
  338. *
  339. * Return: 0 on success, an error code otherwise.
  340. */
  341. static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
  342. unsigned long state)
  343. {
  344. struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
  345. unsigned int clip_freq;
  346. /* Request state should be less than max_level */
  347. if (WARN_ON(state > cpufreq_cdev->max_level))
  348. return -EINVAL;
  349. /* Check if the old cooling action is same as new cooling action */
  350. if (cpufreq_cdev->cpufreq_state == state)
  351. return 0;
  352. clip_freq = cpufreq_cdev->freq_table[state].frequency;
  353. cpufreq_cdev->cpufreq_state = state;
  354. cpufreq_cdev->clipped_freq = clip_freq;
  355. cpufreq_update_policy(cpufreq_cdev->policy->cpu);
  356. return 0;
  357. }
  358. /**
  359. * cpufreq_get_requested_power() - get the current power
  360. * @cdev: &thermal_cooling_device pointer
  361. * @tz: a valid thermal zone device pointer
  362. * @power: pointer in which to store the resulting power
  363. *
  364. * Calculate the current power consumption of the cpus in milliwatts
  365. * and store it in @power. This function should actually calculate
  366. * the requested power, but it's hard to get the frequency that
  367. * cpufreq would have assigned if there were no thermal limits.
  368. * Instead, we calculate the current power on the assumption that the
  369. * immediate future will look like the immediate past.
  370. *
  371. * We use the current frequency and the average load since this
  372. * function was last called. In reality, there could have been
  373. * multiple opps since this function was last called and that affects
  374. * the load calculation. While it's not perfectly accurate, this
  375. * simplification is good enough and works. REVISIT this, as more
  376. * complex code may be needed if experiments show that it's not
  377. * accurate enough.
  378. *
  379. * Return: 0 on success, -E* if getting the static power failed.
  380. */
  381. static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev,
  382. struct thermal_zone_device *tz,
  383. u32 *power)
  384. {
  385. unsigned long freq;
  386. int i = 0, cpu;
  387. u32 total_load = 0;
  388. struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
  389. struct cpufreq_policy *policy = cpufreq_cdev->policy;
  390. u32 *load_cpu = NULL;
  391. freq = cpufreq_quick_get(policy->cpu);
  392. if (trace_thermal_power_cpu_get_power_enabled()) {
  393. u32 ncpus = cpumask_weight(policy->related_cpus);
  394. load_cpu = kcalloc(ncpus, sizeof(*load_cpu), GFP_KERNEL);
  395. }
  396. for_each_cpu(cpu, policy->related_cpus) {
  397. u32 load;
  398. if (cpu_online(cpu))
  399. load = get_load(cpufreq_cdev, cpu, i);
  400. else
  401. load = 0;
  402. total_load += load;
  403. if (load_cpu)
  404. load_cpu[i] = load;
  405. i++;
  406. }
  407. cpufreq_cdev->last_load = total_load;
  408. *power = get_dynamic_power(cpufreq_cdev, freq);
  409. if (load_cpu) {
  410. trace_thermal_power_cpu_get_power(policy->related_cpus, freq,
  411. load_cpu, i, *power);
  412. kfree(load_cpu);
  413. }
  414. return 0;
  415. }
  416. /**
  417. * cpufreq_state2power() - convert a cpu cdev state to power consumed
  418. * @cdev: &thermal_cooling_device pointer
  419. * @tz: a valid thermal zone device pointer
  420. * @state: cooling device state to be converted
  421. * @power: pointer in which to store the resulting power
  422. *
  423. * Convert cooling device state @state into power consumption in
  424. * milliwatts assuming 100% load. Store the calculated power in
  425. * @power.
  426. *
  427. * Return: 0 on success, -EINVAL if the cooling device state could not
  428. * be converted into a frequency or other -E* if there was an error
  429. * when calculating the static power.
  430. */
  431. static int cpufreq_state2power(struct thermal_cooling_device *cdev,
  432. struct thermal_zone_device *tz,
  433. unsigned long state, u32 *power)
  434. {
  435. unsigned int freq, num_cpus;
  436. struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
  437. /* Request state should be less than max_level */
  438. if (WARN_ON(state > cpufreq_cdev->max_level))
  439. return -EINVAL;
  440. num_cpus = cpumask_weight(cpufreq_cdev->policy->cpus);
  441. freq = cpufreq_cdev->freq_table[state].frequency;
  442. *power = cpu_freq_to_power(cpufreq_cdev, freq) * num_cpus;
  443. return 0;
  444. }
  445. /**
  446. * cpufreq_power2state() - convert power to a cooling device state
  447. * @cdev: &thermal_cooling_device pointer
  448. * @tz: a valid thermal zone device pointer
  449. * @power: power in milliwatts to be converted
  450. * @state: pointer in which to store the resulting state
  451. *
  452. * Calculate a cooling device state for the cpus described by @cdev
  453. * that would allow them to consume at most @power mW and store it in
  454. * @state. Note that this calculation depends on external factors
  455. * such as the cpu load or the current static power. Calling this
  456. * function with the same power as input can yield different cooling
  457. * device states depending on those external factors.
  458. *
  459. * Return: 0 on success, -ENODEV if no cpus are online or -EINVAL if
  460. * the calculated frequency could not be converted to a valid state.
  461. * The latter should not happen unless the frequencies available to
  462. * cpufreq have changed since the initialization of the cpu cooling
  463. * device.
  464. */
  465. static int cpufreq_power2state(struct thermal_cooling_device *cdev,
  466. struct thermal_zone_device *tz, u32 power,
  467. unsigned long *state)
  468. {
  469. unsigned int cur_freq, target_freq;
  470. u32 last_load, normalised_power;
  471. struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
  472. struct cpufreq_policy *policy = cpufreq_cdev->policy;
  473. cur_freq = cpufreq_quick_get(policy->cpu);
  474. power = power > 0 ? power : 0;
  475. last_load = cpufreq_cdev->last_load ?: 1;
  476. normalised_power = (power * 100) / last_load;
  477. target_freq = cpu_power_to_freq(cpufreq_cdev, normalised_power);
  478. *state = get_level(cpufreq_cdev, target_freq);
  479. trace_thermal_power_cpu_limit(policy->related_cpus, target_freq, *state,
  480. power);
  481. return 0;
  482. }
  483. /* Bind cpufreq callbacks to thermal cooling device ops */
  484. static struct thermal_cooling_device_ops cpufreq_cooling_ops = {
  485. .get_max_state = cpufreq_get_max_state,
  486. .get_cur_state = cpufreq_get_cur_state,
  487. .set_cur_state = cpufreq_set_cur_state,
  488. };
  489. static struct thermal_cooling_device_ops cpufreq_power_cooling_ops = {
  490. .get_max_state = cpufreq_get_max_state,
  491. .get_cur_state = cpufreq_get_cur_state,
  492. .set_cur_state = cpufreq_set_cur_state,
  493. .get_requested_power = cpufreq_get_requested_power,
  494. .state2power = cpufreq_state2power,
  495. .power2state = cpufreq_power2state,
  496. };
  497. /* Notifier for cpufreq policy change */
  498. static struct notifier_block thermal_cpufreq_notifier_block = {
  499. .notifier_call = cpufreq_thermal_notifier,
  500. };
  501. static unsigned int find_next_max(struct cpufreq_frequency_table *table,
  502. unsigned int prev_max)
  503. {
  504. struct cpufreq_frequency_table *pos;
  505. unsigned int max = 0;
  506. cpufreq_for_each_valid_entry(pos, table) {
  507. if (pos->frequency > max && pos->frequency < prev_max)
  508. max = pos->frequency;
  509. }
  510. return max;
  511. }
  512. /**
  513. * __cpufreq_cooling_register - helper function to create cpufreq cooling device
  514. * @np: a valid struct device_node to the cooling device device tree node
  515. * @policy: cpufreq policy
  516. * Normally this should be same as cpufreq policy->related_cpus.
  517. * @capacitance: dynamic power coefficient for these cpus
  518. *
  519. * This interface function registers the cpufreq cooling device with the name
  520. * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
  521. * cooling devices. It also gives the opportunity to link the cooling device
  522. * with a device tree node, in order to bind it via the thermal DT code.
  523. *
  524. * Return: a valid struct thermal_cooling_device pointer on success,
  525. * on failure, it returns a corresponding ERR_PTR().
  526. */
  527. static struct thermal_cooling_device *
  528. __cpufreq_cooling_register(struct device_node *np,
  529. struct cpufreq_policy *policy, u32 capacitance)
  530. {
  531. struct thermal_cooling_device *cdev;
  532. struct cpufreq_cooling_device *cpufreq_cdev;
  533. char dev_name[THERMAL_NAME_LENGTH];
  534. unsigned int freq, i, num_cpus;
  535. int ret;
  536. struct thermal_cooling_device_ops *cooling_ops;
  537. bool first;
  538. if (IS_ERR_OR_NULL(policy)) {
  539. pr_err("%s: cpufreq policy isn't valid: %p\n", __func__, policy);
  540. return ERR_PTR(-EINVAL);
  541. }
  542. i = cpufreq_table_count_valid_entries(policy);
  543. if (!i) {
  544. pr_debug("%s: CPUFreq table not found or has no valid entries\n",
  545. __func__);
  546. return ERR_PTR(-ENODEV);
  547. }
  548. cpufreq_cdev = kzalloc(sizeof(*cpufreq_cdev), GFP_KERNEL);
  549. if (!cpufreq_cdev)
  550. return ERR_PTR(-ENOMEM);
  551. cpufreq_cdev->policy = policy;
  552. num_cpus = cpumask_weight(policy->related_cpus);
  553. cpufreq_cdev->idle_time = kcalloc(num_cpus,
  554. sizeof(*cpufreq_cdev->idle_time),
  555. GFP_KERNEL);
  556. if (!cpufreq_cdev->idle_time) {
  557. cdev = ERR_PTR(-ENOMEM);
  558. goto free_cdev;
  559. }
  560. /* max_level is an index, not a counter */
  561. cpufreq_cdev->max_level = i - 1;
  562. cpufreq_cdev->freq_table = kmalloc_array(i,
  563. sizeof(*cpufreq_cdev->freq_table),
  564. GFP_KERNEL);
  565. if (!cpufreq_cdev->freq_table) {
  566. cdev = ERR_PTR(-ENOMEM);
  567. goto free_idle_time;
  568. }
  569. ret = ida_simple_get(&cpufreq_ida, 0, 0, GFP_KERNEL);
  570. if (ret < 0) {
  571. cdev = ERR_PTR(ret);
  572. goto free_table;
  573. }
  574. cpufreq_cdev->id = ret;
  575. snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d",
  576. cpufreq_cdev->id);
  577. /* Fill freq-table in descending order of frequencies */
  578. for (i = 0, freq = -1; i <= cpufreq_cdev->max_level; i++) {
  579. freq = find_next_max(policy->freq_table, freq);
  580. cpufreq_cdev->freq_table[i].frequency = freq;
  581. /* Warn for duplicate entries */
  582. if (!freq)
  583. pr_warn("%s: table has duplicate entries\n", __func__);
  584. else
  585. pr_debug("%s: freq:%u KHz\n", __func__, freq);
  586. }
  587. if (capacitance) {
  588. ret = update_freq_table(cpufreq_cdev, capacitance);
  589. if (ret) {
  590. cdev = ERR_PTR(ret);
  591. goto remove_ida;
  592. }
  593. cooling_ops = &cpufreq_power_cooling_ops;
  594. } else {
  595. cooling_ops = &cpufreq_cooling_ops;
  596. }
  597. cdev = thermal_of_cooling_device_register(np, dev_name, cpufreq_cdev,
  598. cooling_ops);
  599. if (IS_ERR(cdev))
  600. goto remove_ida;
  601. cpufreq_cdev->clipped_freq = cpufreq_cdev->freq_table[0].frequency;
  602. cpufreq_cdev->cdev = cdev;
  603. mutex_lock(&cooling_list_lock);
  604. /* Register the notifier for first cpufreq cooling device */
  605. first = list_empty(&cpufreq_cdev_list);
  606. list_add(&cpufreq_cdev->node, &cpufreq_cdev_list);
  607. mutex_unlock(&cooling_list_lock);
  608. if (first)
  609. cpufreq_register_notifier(&thermal_cpufreq_notifier_block,
  610. CPUFREQ_POLICY_NOTIFIER);
  611. return cdev;
  612. remove_ida:
  613. ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id);
  614. free_table:
  615. kfree(cpufreq_cdev->freq_table);
  616. free_idle_time:
  617. kfree(cpufreq_cdev->idle_time);
  618. free_cdev:
  619. kfree(cpufreq_cdev);
  620. return cdev;
  621. }
  622. /**
  623. * cpufreq_cooling_register - function to create cpufreq cooling device.
  624. * @policy: cpufreq policy
  625. *
  626. * This interface function registers the cpufreq cooling device with the name
  627. * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
  628. * cooling devices.
  629. *
  630. * Return: a valid struct thermal_cooling_device pointer on success,
  631. * on failure, it returns a corresponding ERR_PTR().
  632. */
  633. struct thermal_cooling_device *
  634. cpufreq_cooling_register(struct cpufreq_policy *policy)
  635. {
  636. return __cpufreq_cooling_register(NULL, policy, 0);
  637. }
  638. EXPORT_SYMBOL_GPL(cpufreq_cooling_register);
  639. /**
  640. * of_cpufreq_cooling_register - function to create cpufreq cooling device.
  641. * @policy: cpufreq policy
  642. *
  643. * This interface function registers the cpufreq cooling device with the name
  644. * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
  645. * cooling devices. Using this API, the cpufreq cooling device will be
  646. * linked to the device tree node provided.
  647. *
  648. * Using this function, the cooling device will implement the power
  649. * extensions by using a simple cpu power model. The cpus must have
  650. * registered their OPPs using the OPP library.
  651. *
  652. * It also takes into account, if property present in policy CPU node, the
  653. * static power consumed by the cpu.
  654. *
  655. * Return: a valid struct thermal_cooling_device pointer on success,
  656. * and NULL on failure.
  657. */
  658. struct thermal_cooling_device *
  659. of_cpufreq_cooling_register(struct cpufreq_policy *policy)
  660. {
  661. struct device_node *np = of_get_cpu_node(policy->cpu, NULL);
  662. struct thermal_cooling_device *cdev = NULL;
  663. u32 capacitance = 0;
  664. if (!np) {
  665. pr_err("cpu_cooling: OF node not available for cpu%d\n",
  666. policy->cpu);
  667. return NULL;
  668. }
  669. if (of_find_property(np, "#cooling-cells", NULL)) {
  670. of_property_read_u32(np, "dynamic-power-coefficient",
  671. &capacitance);
  672. cdev = __cpufreq_cooling_register(np, policy, capacitance);
  673. if (IS_ERR(cdev)) {
  674. pr_err("cpu_cooling: cpu%d is not running as cooling device: %ld\n",
  675. policy->cpu, PTR_ERR(cdev));
  676. cdev = NULL;
  677. }
  678. }
  679. of_node_put(np);
  680. return cdev;
  681. }
  682. EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register);
  683. /**
  684. * cpufreq_cooling_unregister - function to remove cpufreq cooling device.
  685. * @cdev: thermal cooling device pointer.
  686. *
  687. * This interface function unregisters the "thermal-cpufreq-%x" cooling device.
  688. */
  689. void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
  690. {
  691. struct cpufreq_cooling_device *cpufreq_cdev;
  692. bool last;
  693. if (!cdev)
  694. return;
  695. cpufreq_cdev = cdev->devdata;
  696. mutex_lock(&cooling_list_lock);
  697. list_del(&cpufreq_cdev->node);
  698. /* Unregister the notifier for the last cpufreq cooling device */
  699. last = list_empty(&cpufreq_cdev_list);
  700. mutex_unlock(&cooling_list_lock);
  701. if (last)
  702. cpufreq_unregister_notifier(&thermal_cpufreq_notifier_block,
  703. CPUFREQ_POLICY_NOTIFIER);
  704. thermal_cooling_device_unregister(cpufreq_cdev->cdev);
  705. ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id);
  706. kfree(cpufreq_cdev->idle_time);
  707. kfree(cpufreq_cdev->freq_table);
  708. kfree(cpufreq_cdev);
  709. }
  710. EXPORT_SYMBOL_GPL(cpufreq_cooling_unregister);