cache-l2x0-pmu.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585
  1. /*
  2. * L220/L310 cache controller support
  3. *
  4. * Copyright (C) 2016 ARM Limited
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License version 2 as
  8. * published by the Free Software Foundation.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program; if not, write to the Free Software
  17. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18. */
  19. #include <linux/errno.h>
  20. #include <linux/hrtimer.h>
  21. #include <linux/io.h>
  22. #include <linux/list.h>
  23. #include <linux/perf_event.h>
  24. #include <linux/printk.h>
  25. #include <linux/slab.h>
  26. #include <linux/types.h>
  27. #include <asm/hardware/cache-l2x0.h>
  28. #define PMU_NR_COUNTERS 2
  29. static void __iomem *l2x0_base;
  30. static struct pmu *l2x0_pmu;
  31. static cpumask_t pmu_cpu;
  32. static const char *l2x0_name;
  33. static ktime_t l2x0_pmu_poll_period;
  34. static struct hrtimer l2x0_pmu_hrtimer;
  35. /*
  36. * The L220/PL310 PMU has two equivalent counters, Counter1 and Counter0.
  37. * Registers controlling these are laid out in pairs, in descending order, i.e.
  38. * the register for Counter1 comes first, followed by the register for
  39. * Counter0.
  40. * We ensure that idx 0 -> Counter0, and idx1 -> Counter1.
  41. */
  42. static struct perf_event *events[PMU_NR_COUNTERS];
  43. /* Find an unused counter */
  44. static int l2x0_pmu_find_idx(void)
  45. {
  46. int i;
  47. for (i = 0; i < PMU_NR_COUNTERS; i++) {
  48. if (!events[i])
  49. return i;
  50. }
  51. return -1;
  52. }
  53. /* How many counters are allocated? */
  54. static int l2x0_pmu_num_active_counters(void)
  55. {
  56. int i, cnt = 0;
  57. for (i = 0; i < PMU_NR_COUNTERS; i++) {
  58. if (events[i])
  59. cnt++;
  60. }
  61. return cnt;
  62. }
  63. static void l2x0_pmu_counter_config_write(int idx, u32 val)
  64. {
  65. writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT0_CFG - 4 * idx);
  66. }
  67. static u32 l2x0_pmu_counter_read(int idx)
  68. {
  69. return readl_relaxed(l2x0_base + L2X0_EVENT_CNT0_VAL - 4 * idx);
  70. }
  71. static void l2x0_pmu_counter_write(int idx, u32 val)
  72. {
  73. writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT0_VAL - 4 * idx);
  74. }
  75. static void __l2x0_pmu_enable(void)
  76. {
  77. u32 val = readl_relaxed(l2x0_base + L2X0_EVENT_CNT_CTRL);
  78. val |= L2X0_EVENT_CNT_CTRL_ENABLE;
  79. writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT_CTRL);
  80. }
  81. static void __l2x0_pmu_disable(void)
  82. {
  83. u32 val = readl_relaxed(l2x0_base + L2X0_EVENT_CNT_CTRL);
  84. val &= ~L2X0_EVENT_CNT_CTRL_ENABLE;
  85. writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT_CTRL);
  86. }
  87. static void l2x0_pmu_enable(struct pmu *pmu)
  88. {
  89. if (l2x0_pmu_num_active_counters() == 0)
  90. return;
  91. __l2x0_pmu_enable();
  92. }
  93. static void l2x0_pmu_disable(struct pmu *pmu)
  94. {
  95. if (l2x0_pmu_num_active_counters() == 0)
  96. return;
  97. __l2x0_pmu_disable();
  98. }
  99. static void warn_if_saturated(u32 count)
  100. {
  101. if (count != 0xffffffff)
  102. return;
  103. pr_warn_ratelimited("L2X0 counter saturated. Poll period too long\n");
  104. }
  105. static void l2x0_pmu_event_read(struct perf_event *event)
  106. {
  107. struct hw_perf_event *hw = &event->hw;
  108. u64 prev_count, new_count, mask;
  109. do {
  110. prev_count = local64_read(&hw->prev_count);
  111. new_count = l2x0_pmu_counter_read(hw->idx);
  112. } while (local64_xchg(&hw->prev_count, new_count) != prev_count);
  113. mask = GENMASK_ULL(31, 0);
  114. local64_add((new_count - prev_count) & mask, &event->count);
  115. warn_if_saturated(new_count);
  116. }
  117. static void l2x0_pmu_event_configure(struct perf_event *event)
  118. {
  119. struct hw_perf_event *hw = &event->hw;
  120. /*
  121. * The L2X0 counters saturate at 0xffffffff rather than wrapping, so we
  122. * will *always* lose some number of events when a counter saturates,
  123. * and have no way of detecting how many were lost.
  124. *
  125. * To minimize the impact of this, we try to maximize the period by
  126. * always starting counters at zero. To ensure that group ratios are
  127. * representative, we poll periodically to avoid counters saturating.
  128. * See l2x0_pmu_poll().
  129. */
  130. local64_set(&hw->prev_count, 0);
  131. l2x0_pmu_counter_write(hw->idx, 0);
  132. }
  133. static enum hrtimer_restart l2x0_pmu_poll(struct hrtimer *hrtimer)
  134. {
  135. unsigned long flags;
  136. int i;
  137. local_irq_save(flags);
  138. __l2x0_pmu_disable();
  139. for (i = 0; i < PMU_NR_COUNTERS; i++) {
  140. struct perf_event *event = events[i];
  141. if (!event)
  142. continue;
  143. l2x0_pmu_event_read(event);
  144. l2x0_pmu_event_configure(event);
  145. }
  146. __l2x0_pmu_enable();
  147. local_irq_restore(flags);
  148. hrtimer_forward_now(hrtimer, l2x0_pmu_poll_period);
  149. return HRTIMER_RESTART;
  150. }
  151. static void __l2x0_pmu_event_enable(int idx, u32 event)
  152. {
  153. u32 val;
  154. val = event << L2X0_EVENT_CNT_CFG_SRC_SHIFT;
  155. val |= L2X0_EVENT_CNT_CFG_INT_DISABLED;
  156. l2x0_pmu_counter_config_write(idx, val);
  157. }
  158. static void l2x0_pmu_event_start(struct perf_event *event, int flags)
  159. {
  160. struct hw_perf_event *hw = &event->hw;
  161. if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
  162. return;
  163. if (flags & PERF_EF_RELOAD) {
  164. WARN_ON_ONCE(!(hw->state & PERF_HES_UPTODATE));
  165. l2x0_pmu_event_configure(event);
  166. }
  167. hw->state = 0;
  168. __l2x0_pmu_event_enable(hw->idx, hw->config_base);
  169. }
  170. static void __l2x0_pmu_event_disable(int idx)
  171. {
  172. u32 val;
  173. val = L2X0_EVENT_CNT_CFG_SRC_DISABLED << L2X0_EVENT_CNT_CFG_SRC_SHIFT;
  174. val |= L2X0_EVENT_CNT_CFG_INT_DISABLED;
  175. l2x0_pmu_counter_config_write(idx, val);
  176. }
  177. static void l2x0_pmu_event_stop(struct perf_event *event, int flags)
  178. {
  179. struct hw_perf_event *hw = &event->hw;
  180. if (WARN_ON_ONCE(event->hw.state & PERF_HES_STOPPED))
  181. return;
  182. __l2x0_pmu_event_disable(hw->idx);
  183. hw->state |= PERF_HES_STOPPED;
  184. if (flags & PERF_EF_UPDATE) {
  185. l2x0_pmu_event_read(event);
  186. hw->state |= PERF_HES_UPTODATE;
  187. }
  188. }
  189. static int l2x0_pmu_event_add(struct perf_event *event, int flags)
  190. {
  191. struct hw_perf_event *hw = &event->hw;
  192. int idx = l2x0_pmu_find_idx();
  193. if (idx == -1)
  194. return -EAGAIN;
  195. /*
  196. * Pin the timer, so that the overflows are handled by the chosen
  197. * event->cpu (this is the same one as presented in "cpumask"
  198. * attribute).
  199. */
  200. if (l2x0_pmu_num_active_counters() == 0)
  201. hrtimer_start(&l2x0_pmu_hrtimer, l2x0_pmu_poll_period,
  202. HRTIMER_MODE_REL_PINNED);
  203. events[idx] = event;
  204. hw->idx = idx;
  205. l2x0_pmu_event_configure(event);
  206. hw->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
  207. if (flags & PERF_EF_START)
  208. l2x0_pmu_event_start(event, 0);
  209. return 0;
  210. }
  211. static void l2x0_pmu_event_del(struct perf_event *event, int flags)
  212. {
  213. struct hw_perf_event *hw = &event->hw;
  214. l2x0_pmu_event_stop(event, PERF_EF_UPDATE);
  215. events[hw->idx] = NULL;
  216. hw->idx = -1;
  217. if (l2x0_pmu_num_active_counters() == 0)
  218. hrtimer_cancel(&l2x0_pmu_hrtimer);
  219. }
  220. static bool l2x0_pmu_group_is_valid(struct perf_event *event)
  221. {
  222. struct pmu *pmu = event->pmu;
  223. struct perf_event *leader = event->group_leader;
  224. struct perf_event *sibling;
  225. int num_hw = 0;
  226. if (leader->pmu == pmu)
  227. num_hw++;
  228. else if (!is_software_event(leader))
  229. return false;
  230. for_each_sibling_event(sibling, leader) {
  231. if (sibling->pmu == pmu)
  232. num_hw++;
  233. else if (!is_software_event(sibling))
  234. return false;
  235. }
  236. return num_hw <= PMU_NR_COUNTERS;
  237. }
  238. static int l2x0_pmu_event_init(struct perf_event *event)
  239. {
  240. struct hw_perf_event *hw = &event->hw;
  241. if (event->attr.type != l2x0_pmu->type)
  242. return -ENOENT;
  243. if (is_sampling_event(event) ||
  244. event->attach_state & PERF_ATTACH_TASK)
  245. return -EINVAL;
  246. if (event->attr.exclude_user ||
  247. event->attr.exclude_kernel ||
  248. event->attr.exclude_hv ||
  249. event->attr.exclude_idle ||
  250. event->attr.exclude_host ||
  251. event->attr.exclude_guest)
  252. return -EINVAL;
  253. if (event->cpu < 0)
  254. return -EINVAL;
  255. if (event->attr.config & ~L2X0_EVENT_CNT_CFG_SRC_MASK)
  256. return -EINVAL;
  257. hw->config_base = event->attr.config;
  258. if (!l2x0_pmu_group_is_valid(event))
  259. return -EINVAL;
  260. event->cpu = cpumask_first(&pmu_cpu);
  261. return 0;
  262. }
  263. struct l2x0_event_attribute {
  264. struct device_attribute attr;
  265. unsigned int config;
  266. bool pl310_only;
  267. };
  268. #define L2X0_EVENT_ATTR(_name, _config, _pl310_only) \
  269. (&((struct l2x0_event_attribute[]) {{ \
  270. .attr = __ATTR(_name, S_IRUGO, l2x0_pmu_event_show, NULL), \
  271. .config = _config, \
  272. .pl310_only = _pl310_only, \
  273. }})[0].attr.attr)
  274. #define L220_PLUS_EVENT_ATTR(_name, _config) \
  275. L2X0_EVENT_ATTR(_name, _config, false)
  276. #define PL310_EVENT_ATTR(_name, _config) \
  277. L2X0_EVENT_ATTR(_name, _config, true)
  278. static ssize_t l2x0_pmu_event_show(struct device *dev,
  279. struct device_attribute *attr, char *buf)
  280. {
  281. struct l2x0_event_attribute *lattr;
  282. lattr = container_of(attr, typeof(*lattr), attr);
  283. return snprintf(buf, PAGE_SIZE, "config=0x%x\n", lattr->config);
  284. }
  285. static umode_t l2x0_pmu_event_attr_is_visible(struct kobject *kobj,
  286. struct attribute *attr,
  287. int unused)
  288. {
  289. struct device *dev = kobj_to_dev(kobj);
  290. struct pmu *pmu = dev_get_drvdata(dev);
  291. struct l2x0_event_attribute *lattr;
  292. lattr = container_of(attr, typeof(*lattr), attr.attr);
  293. if (!lattr->pl310_only || strcmp("l2c_310", pmu->name) == 0)
  294. return attr->mode;
  295. return 0;
  296. }
  297. static struct attribute *l2x0_pmu_event_attrs[] = {
  298. L220_PLUS_EVENT_ATTR(co, 0x1),
  299. L220_PLUS_EVENT_ATTR(drhit, 0x2),
  300. L220_PLUS_EVENT_ATTR(drreq, 0x3),
  301. L220_PLUS_EVENT_ATTR(dwhit, 0x4),
  302. L220_PLUS_EVENT_ATTR(dwreq, 0x5),
  303. L220_PLUS_EVENT_ATTR(dwtreq, 0x6),
  304. L220_PLUS_EVENT_ATTR(irhit, 0x7),
  305. L220_PLUS_EVENT_ATTR(irreq, 0x8),
  306. L220_PLUS_EVENT_ATTR(wa, 0x9),
  307. PL310_EVENT_ATTR(ipfalloc, 0xa),
  308. PL310_EVENT_ATTR(epfhit, 0xb),
  309. PL310_EVENT_ATTR(epfalloc, 0xc),
  310. PL310_EVENT_ATTR(srrcvd, 0xd),
  311. PL310_EVENT_ATTR(srconf, 0xe),
  312. PL310_EVENT_ATTR(epfrcvd, 0xf),
  313. NULL
  314. };
  315. static struct attribute_group l2x0_pmu_event_attrs_group = {
  316. .name = "events",
  317. .attrs = l2x0_pmu_event_attrs,
  318. .is_visible = l2x0_pmu_event_attr_is_visible,
  319. };
  320. static ssize_t l2x0_pmu_cpumask_show(struct device *dev,
  321. struct device_attribute *attr, char *buf)
  322. {
  323. return cpumap_print_to_pagebuf(true, buf, &pmu_cpu);
  324. }
  325. static struct device_attribute l2x0_pmu_cpumask_attr =
  326. __ATTR(cpumask, S_IRUGO, l2x0_pmu_cpumask_show, NULL);
  327. static struct attribute *l2x0_pmu_cpumask_attrs[] = {
  328. &l2x0_pmu_cpumask_attr.attr,
  329. NULL,
  330. };
  331. static struct attribute_group l2x0_pmu_cpumask_attr_group = {
  332. .attrs = l2x0_pmu_cpumask_attrs,
  333. };
  334. static const struct attribute_group *l2x0_pmu_attr_groups[] = {
  335. &l2x0_pmu_event_attrs_group,
  336. &l2x0_pmu_cpumask_attr_group,
  337. NULL,
  338. };
  339. static void l2x0_pmu_reset(void)
  340. {
  341. int i;
  342. __l2x0_pmu_disable();
  343. for (i = 0; i < PMU_NR_COUNTERS; i++)
  344. __l2x0_pmu_event_disable(i);
  345. }
  346. static int l2x0_pmu_offline_cpu(unsigned int cpu)
  347. {
  348. unsigned int target;
  349. if (!cpumask_test_and_clear_cpu(cpu, &pmu_cpu))
  350. return 0;
  351. target = cpumask_any_but(cpu_online_mask, cpu);
  352. if (target >= nr_cpu_ids)
  353. return 0;
  354. perf_pmu_migrate_context(l2x0_pmu, cpu, target);
  355. cpumask_set_cpu(target, &pmu_cpu);
  356. return 0;
  357. }
  358. void l2x0_pmu_suspend(void)
  359. {
  360. int i;
  361. if (!l2x0_pmu)
  362. return;
  363. l2x0_pmu_disable(l2x0_pmu);
  364. for (i = 0; i < PMU_NR_COUNTERS; i++) {
  365. if (events[i])
  366. l2x0_pmu_event_stop(events[i], PERF_EF_UPDATE);
  367. }
  368. }
  369. void l2x0_pmu_resume(void)
  370. {
  371. int i;
  372. if (!l2x0_pmu)
  373. return;
  374. l2x0_pmu_reset();
  375. for (i = 0; i < PMU_NR_COUNTERS; i++) {
  376. if (events[i])
  377. l2x0_pmu_event_start(events[i], PERF_EF_RELOAD);
  378. }
  379. l2x0_pmu_enable(l2x0_pmu);
  380. }
  381. void __init l2x0_pmu_register(void __iomem *base, u32 part)
  382. {
  383. /*
  384. * Determine whether we support the PMU, and choose the name for sysfs.
  385. * This is also used by l2x0_pmu_event_attr_is_visible to determine
  386. * which events to display, as the PL310 PMU supports a superset of
  387. * L220 events.
  388. *
  389. * The L210 PMU has a different programmer's interface, and is not
  390. * supported by this driver.
  391. *
  392. * We must defer registering the PMU until the perf subsystem is up and
  393. * running, so just stash the name and base, and leave that to another
  394. * initcall.
  395. */
  396. switch (part & L2X0_CACHE_ID_PART_MASK) {
  397. case L2X0_CACHE_ID_PART_L220:
  398. l2x0_name = "l2c_220";
  399. break;
  400. case L2X0_CACHE_ID_PART_L310:
  401. l2x0_name = "l2c_310";
  402. break;
  403. default:
  404. return;
  405. }
  406. l2x0_base = base;
  407. }
  408. static __init int l2x0_pmu_init(void)
  409. {
  410. int ret;
  411. if (!l2x0_base)
  412. return 0;
  413. l2x0_pmu = kzalloc(sizeof(*l2x0_pmu), GFP_KERNEL);
  414. if (!l2x0_pmu) {
  415. pr_warn("Unable to allocate L2x0 PMU\n");
  416. return -ENOMEM;
  417. }
  418. *l2x0_pmu = (struct pmu) {
  419. .task_ctx_nr = perf_invalid_context,
  420. .pmu_enable = l2x0_pmu_enable,
  421. .pmu_disable = l2x0_pmu_disable,
  422. .read = l2x0_pmu_event_read,
  423. .start = l2x0_pmu_event_start,
  424. .stop = l2x0_pmu_event_stop,
  425. .add = l2x0_pmu_event_add,
  426. .del = l2x0_pmu_event_del,
  427. .event_init = l2x0_pmu_event_init,
  428. .attr_groups = l2x0_pmu_attr_groups,
  429. };
  430. l2x0_pmu_reset();
  431. /*
  432. * We always use a hrtimer rather than an interrupt.
  433. * See comments in l2x0_pmu_event_configure and l2x0_pmu_poll.
  434. *
  435. * Polling once a second allows the counters to fill up to 1/128th on a
  436. * quad-core test chip with cores clocked at 400MHz. Hopefully this
  437. * leaves sufficient headroom to avoid overflow on production silicon
  438. * at higher frequencies.
  439. */
  440. l2x0_pmu_poll_period = ms_to_ktime(1000);
  441. hrtimer_init(&l2x0_pmu_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  442. l2x0_pmu_hrtimer.function = l2x0_pmu_poll;
  443. cpumask_set_cpu(0, &pmu_cpu);
  444. ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_L2X0_ONLINE,
  445. "perf/arm/l2x0:online", NULL,
  446. l2x0_pmu_offline_cpu);
  447. if (ret)
  448. goto out_pmu;
  449. ret = perf_pmu_register(l2x0_pmu, l2x0_name, -1);
  450. if (ret)
  451. goto out_cpuhp;
  452. return 0;
  453. out_cpuhp:
  454. cpuhp_remove_state_nocalls(CPUHP_AP_PERF_ARM_L2X0_ONLINE);
  455. out_pmu:
  456. kfree(l2x0_pmu);
  457. l2x0_pmu = NULL;
  458. return ret;
  459. }
  460. device_initcall(l2x0_pmu_init);