qcom_l2_pmu.c 26 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016
  1. /* Copyright (c) 2015-2017 The Linux Foundation. All rights reserved.
  2. *
  3. * This program is free software; you can redistribute it and/or modify
  4. * it under the terms of the GNU General Public License version 2 and
  5. * only version 2 as published by the Free Software Foundation.
  6. *
  7. * This program is distributed in the hope that it will be useful,
  8. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. * GNU General Public License for more details.
  11. */
  12. #include <linux/acpi.h>
  13. #include <linux/bitops.h>
  14. #include <linux/bug.h>
  15. #include <linux/cpuhotplug.h>
  16. #include <linux/cpumask.h>
  17. #include <linux/device.h>
  18. #include <linux/errno.h>
  19. #include <linux/interrupt.h>
  20. #include <linux/irq.h>
  21. #include <linux/kernel.h>
  22. #include <linux/list.h>
  23. #include <linux/percpu.h>
  24. #include <linux/perf_event.h>
  25. #include <linux/platform_device.h>
  26. #include <linux/smp.h>
  27. #include <linux/spinlock.h>
  28. #include <linux/sysfs.h>
  29. #include <linux/types.h>
  30. #include <asm/barrier.h>
  31. #include <asm/local64.h>
  32. #include <asm/sysreg.h>
  33. #define MAX_L2_CTRS 9
  34. #define L2PMCR_NUM_EV_SHIFT 11
  35. #define L2PMCR_NUM_EV_MASK 0x1F
  36. #define L2PMCR 0x400
  37. #define L2PMCNTENCLR 0x403
  38. #define L2PMCNTENSET 0x404
  39. #define L2PMINTENCLR 0x405
  40. #define L2PMINTENSET 0x406
  41. #define L2PMOVSCLR 0x407
  42. #define L2PMOVSSET 0x408
  43. #define L2PMCCNTCR 0x409
  44. #define L2PMCCNTR 0x40A
  45. #define L2PMCCNTSR 0x40C
  46. #define L2PMRESR 0x410
  47. #define IA_L2PMXEVCNTCR_BASE 0x420
  48. #define IA_L2PMXEVCNTR_BASE 0x421
  49. #define IA_L2PMXEVFILTER_BASE 0x423
  50. #define IA_L2PMXEVTYPER_BASE 0x424
  51. #define IA_L2_REG_OFFSET 0x10
  52. #define L2PMXEVFILTER_SUFILTER_ALL 0x000E0000
  53. #define L2PMXEVFILTER_ORGFILTER_IDINDEP 0x00000004
  54. #define L2PMXEVFILTER_ORGFILTER_ALL 0x00000003
  55. #define L2EVTYPER_REG_SHIFT 3
  56. #define L2PMRESR_GROUP_BITS 8
  57. #define L2PMRESR_GROUP_MASK GENMASK(7, 0)
  58. #define L2CYCLE_CTR_BIT 31
  59. #define L2CYCLE_CTR_RAW_CODE 0xFE
  60. #define L2PMCR_RESET_ALL 0x6
  61. #define L2PMCR_COUNTERS_ENABLE 0x1
  62. #define L2PMCR_COUNTERS_DISABLE 0x0
  63. #define L2PMRESR_EN BIT_ULL(63)
  64. #define L2_EVT_MASK 0x00000FFF
  65. #define L2_EVT_CODE_MASK 0x00000FF0
  66. #define L2_EVT_GRP_MASK 0x0000000F
  67. #define L2_EVT_CODE_SHIFT 4
  68. #define L2_EVT_GRP_SHIFT 0
  69. #define L2_EVT_CODE(event) (((event) & L2_EVT_CODE_MASK) >> L2_EVT_CODE_SHIFT)
  70. #define L2_EVT_GROUP(event) (((event) & L2_EVT_GRP_MASK) >> L2_EVT_GRP_SHIFT)
  71. #define L2_EVT_GROUP_MAX 7
  72. #define L2_COUNTER_RELOAD BIT_ULL(31)
  73. #define L2_CYCLE_COUNTER_RELOAD BIT_ULL(63)
  74. #define L2CPUSRSELR_EL1 sys_reg(3, 3, 15, 0, 6)
  75. #define L2CPUSRDR_EL1 sys_reg(3, 3, 15, 0, 7)
  76. #define reg_idx(reg, i) (((i) * IA_L2_REG_OFFSET) + reg##_BASE)
  77. static DEFINE_RAW_SPINLOCK(l2_access_lock);
  78. /**
  79. * set_l2_indirect_reg: write value to an L2 register
  80. * @reg: Address of L2 register.
  81. * @value: Value to be written to register.
  82. *
  83. * Use architecturally required barriers for ordering between system register
  84. * accesses
  85. */
  86. static void set_l2_indirect_reg(u64 reg, u64 val)
  87. {
  88. unsigned long flags;
  89. raw_spin_lock_irqsave(&l2_access_lock, flags);
  90. write_sysreg_s(reg, L2CPUSRSELR_EL1);
  91. isb();
  92. write_sysreg_s(val, L2CPUSRDR_EL1);
  93. isb();
  94. raw_spin_unlock_irqrestore(&l2_access_lock, flags);
  95. }
  96. /**
  97. * get_l2_indirect_reg: read an L2 register value
  98. * @reg: Address of L2 register.
  99. *
  100. * Use architecturally required barriers for ordering between system register
  101. * accesses
  102. */
  103. static u64 get_l2_indirect_reg(u64 reg)
  104. {
  105. u64 val;
  106. unsigned long flags;
  107. raw_spin_lock_irqsave(&l2_access_lock, flags);
  108. write_sysreg_s(reg, L2CPUSRSELR_EL1);
  109. isb();
  110. val = read_sysreg_s(L2CPUSRDR_EL1);
  111. raw_spin_unlock_irqrestore(&l2_access_lock, flags);
  112. return val;
  113. }
  114. struct cluster_pmu;
  115. /*
  116. * Aggregate PMU. Implements the core pmu functions and manages
  117. * the hardware PMUs.
  118. */
  119. struct l2cache_pmu {
  120. struct hlist_node node;
  121. u32 num_pmus;
  122. struct pmu pmu;
  123. int num_counters;
  124. cpumask_t cpumask;
  125. struct platform_device *pdev;
  126. struct cluster_pmu * __percpu *pmu_cluster;
  127. struct list_head clusters;
  128. };
  129. /*
  130. * The cache is made up of one or more clusters, each cluster has its own PMU.
  131. * Each cluster is associated with one or more CPUs.
  132. * This structure represents one of the hardware PMUs.
  133. *
  134. * Events can be envisioned as a 2-dimensional array. Each column represents
  135. * a group of events. There are 8 groups. Only one entry from each
  136. * group can be in use at a time.
  137. *
  138. * Events are specified as 0xCCG, where CC is 2 hex digits specifying
  139. * the code (array row) and G specifies the group (column).
  140. *
  141. * In addition there is a cycle counter event specified by L2CYCLE_CTR_RAW_CODE
  142. * which is outside the above scheme.
  143. */
  144. struct cluster_pmu {
  145. struct list_head next;
  146. struct perf_event *events[MAX_L2_CTRS];
  147. struct l2cache_pmu *l2cache_pmu;
  148. DECLARE_BITMAP(used_counters, MAX_L2_CTRS);
  149. DECLARE_BITMAP(used_groups, L2_EVT_GROUP_MAX + 1);
  150. int irq;
  151. int cluster_id;
  152. /* The CPU that is used for collecting events on this cluster */
  153. int on_cpu;
  154. /* All the CPUs associated with this cluster */
  155. cpumask_t cluster_cpus;
  156. spinlock_t pmu_lock;
  157. };
  158. #define to_l2cache_pmu(p) (container_of(p, struct l2cache_pmu, pmu))
  159. static u32 l2_cycle_ctr_idx;
  160. static u32 l2_counter_present_mask;
  161. static inline u32 idx_to_reg_bit(u32 idx)
  162. {
  163. if (idx == l2_cycle_ctr_idx)
  164. return BIT(L2CYCLE_CTR_BIT);
  165. return BIT(idx);
  166. }
  167. static inline struct cluster_pmu *get_cluster_pmu(
  168. struct l2cache_pmu *l2cache_pmu, int cpu)
  169. {
  170. return *per_cpu_ptr(l2cache_pmu->pmu_cluster, cpu);
  171. }
  172. static void cluster_pmu_reset(void)
  173. {
  174. /* Reset all counters */
  175. set_l2_indirect_reg(L2PMCR, L2PMCR_RESET_ALL);
  176. set_l2_indirect_reg(L2PMCNTENCLR, l2_counter_present_mask);
  177. set_l2_indirect_reg(L2PMINTENCLR, l2_counter_present_mask);
  178. set_l2_indirect_reg(L2PMOVSCLR, l2_counter_present_mask);
  179. }
  180. static inline void cluster_pmu_enable(void)
  181. {
  182. set_l2_indirect_reg(L2PMCR, L2PMCR_COUNTERS_ENABLE);
  183. }
  184. static inline void cluster_pmu_disable(void)
  185. {
  186. set_l2_indirect_reg(L2PMCR, L2PMCR_COUNTERS_DISABLE);
  187. }
  188. static inline void cluster_pmu_counter_set_value(u32 idx, u64 value)
  189. {
  190. if (idx == l2_cycle_ctr_idx)
  191. set_l2_indirect_reg(L2PMCCNTR, value);
  192. else
  193. set_l2_indirect_reg(reg_idx(IA_L2PMXEVCNTR, idx), value);
  194. }
  195. static inline u64 cluster_pmu_counter_get_value(u32 idx)
  196. {
  197. u64 value;
  198. if (idx == l2_cycle_ctr_idx)
  199. value = get_l2_indirect_reg(L2PMCCNTR);
  200. else
  201. value = get_l2_indirect_reg(reg_idx(IA_L2PMXEVCNTR, idx));
  202. return value;
  203. }
  204. static inline void cluster_pmu_counter_enable(u32 idx)
  205. {
  206. set_l2_indirect_reg(L2PMCNTENSET, idx_to_reg_bit(idx));
  207. }
  208. static inline void cluster_pmu_counter_disable(u32 idx)
  209. {
  210. set_l2_indirect_reg(L2PMCNTENCLR, idx_to_reg_bit(idx));
  211. }
  212. static inline void cluster_pmu_counter_enable_interrupt(u32 idx)
  213. {
  214. set_l2_indirect_reg(L2PMINTENSET, idx_to_reg_bit(idx));
  215. }
  216. static inline void cluster_pmu_counter_disable_interrupt(u32 idx)
  217. {
  218. set_l2_indirect_reg(L2PMINTENCLR, idx_to_reg_bit(idx));
  219. }
  220. static inline void cluster_pmu_set_evccntcr(u32 val)
  221. {
  222. set_l2_indirect_reg(L2PMCCNTCR, val);
  223. }
  224. static inline void cluster_pmu_set_evcntcr(u32 ctr, u32 val)
  225. {
  226. set_l2_indirect_reg(reg_idx(IA_L2PMXEVCNTCR, ctr), val);
  227. }
  228. static inline void cluster_pmu_set_evtyper(u32 ctr, u32 val)
  229. {
  230. set_l2_indirect_reg(reg_idx(IA_L2PMXEVTYPER, ctr), val);
  231. }
  232. static void cluster_pmu_set_resr(struct cluster_pmu *cluster,
  233. u32 event_group, u32 event_cc)
  234. {
  235. u64 field;
  236. u64 resr_val;
  237. u32 shift;
  238. unsigned long flags;
  239. shift = L2PMRESR_GROUP_BITS * event_group;
  240. field = ((u64)(event_cc & L2PMRESR_GROUP_MASK) << shift);
  241. spin_lock_irqsave(&cluster->pmu_lock, flags);
  242. resr_val = get_l2_indirect_reg(L2PMRESR);
  243. resr_val &= ~(L2PMRESR_GROUP_MASK << shift);
  244. resr_val |= field;
  245. resr_val |= L2PMRESR_EN;
  246. set_l2_indirect_reg(L2PMRESR, resr_val);
  247. spin_unlock_irqrestore(&cluster->pmu_lock, flags);
  248. }
  249. /*
  250. * Hardware allows filtering of events based on the originating
  251. * CPU. Turn this off by setting filter bits to allow events from
  252. * all CPUS, subunits and ID independent events in this cluster.
  253. */
  254. static inline void cluster_pmu_set_evfilter_sys_mode(u32 ctr)
  255. {
  256. u32 val = L2PMXEVFILTER_SUFILTER_ALL |
  257. L2PMXEVFILTER_ORGFILTER_IDINDEP |
  258. L2PMXEVFILTER_ORGFILTER_ALL;
  259. set_l2_indirect_reg(reg_idx(IA_L2PMXEVFILTER, ctr), val);
  260. }
  261. static inline u32 cluster_pmu_getreset_ovsr(void)
  262. {
  263. u32 result = get_l2_indirect_reg(L2PMOVSSET);
  264. set_l2_indirect_reg(L2PMOVSCLR, result);
  265. return result;
  266. }
  267. static inline bool cluster_pmu_has_overflowed(u32 ovsr)
  268. {
  269. return !!(ovsr & l2_counter_present_mask);
  270. }
  271. static inline bool cluster_pmu_counter_has_overflowed(u32 ovsr, u32 idx)
  272. {
  273. return !!(ovsr & idx_to_reg_bit(idx));
  274. }
  275. static void l2_cache_event_update(struct perf_event *event)
  276. {
  277. struct hw_perf_event *hwc = &event->hw;
  278. u64 delta, prev, now;
  279. u32 idx = hwc->idx;
  280. do {
  281. prev = local64_read(&hwc->prev_count);
  282. now = cluster_pmu_counter_get_value(idx);
  283. } while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev);
  284. /*
  285. * The cycle counter is 64-bit, but all other counters are
  286. * 32-bit, and we must handle 32-bit overflow explicitly.
  287. */
  288. delta = now - prev;
  289. if (idx != l2_cycle_ctr_idx)
  290. delta &= 0xffffffff;
  291. local64_add(delta, &event->count);
  292. }
  293. static void l2_cache_cluster_set_period(struct cluster_pmu *cluster,
  294. struct hw_perf_event *hwc)
  295. {
  296. u32 idx = hwc->idx;
  297. u64 new;
  298. /*
  299. * We limit the max period to half the max counter value so
  300. * that even in the case of extreme interrupt latency the
  301. * counter will (hopefully) not wrap past its initial value.
  302. */
  303. if (idx == l2_cycle_ctr_idx)
  304. new = L2_CYCLE_COUNTER_RELOAD;
  305. else
  306. new = L2_COUNTER_RELOAD;
  307. local64_set(&hwc->prev_count, new);
  308. cluster_pmu_counter_set_value(idx, new);
  309. }
  310. static int l2_cache_get_event_idx(struct cluster_pmu *cluster,
  311. struct perf_event *event)
  312. {
  313. struct hw_perf_event *hwc = &event->hw;
  314. int idx;
  315. int num_ctrs = cluster->l2cache_pmu->num_counters - 1;
  316. unsigned int group;
  317. if (hwc->config_base == L2CYCLE_CTR_RAW_CODE) {
  318. if (test_and_set_bit(l2_cycle_ctr_idx, cluster->used_counters))
  319. return -EAGAIN;
  320. return l2_cycle_ctr_idx;
  321. }
  322. idx = find_first_zero_bit(cluster->used_counters, num_ctrs);
  323. if (idx == num_ctrs)
  324. /* The counters are all in use. */
  325. return -EAGAIN;
  326. /*
  327. * Check for column exclusion: event column already in use by another
  328. * event. This is for events which are not in the same group.
  329. * Conflicting events in the same group are detected in event_init.
  330. */
  331. group = L2_EVT_GROUP(hwc->config_base);
  332. if (test_bit(group, cluster->used_groups))
  333. return -EAGAIN;
  334. set_bit(idx, cluster->used_counters);
  335. set_bit(group, cluster->used_groups);
  336. return idx;
  337. }
  338. static void l2_cache_clear_event_idx(struct cluster_pmu *cluster,
  339. struct perf_event *event)
  340. {
  341. struct hw_perf_event *hwc = &event->hw;
  342. int idx = hwc->idx;
  343. clear_bit(idx, cluster->used_counters);
  344. if (hwc->config_base != L2CYCLE_CTR_RAW_CODE)
  345. clear_bit(L2_EVT_GROUP(hwc->config_base), cluster->used_groups);
  346. }
  347. static irqreturn_t l2_cache_handle_irq(int irq_num, void *data)
  348. {
  349. struct cluster_pmu *cluster = data;
  350. int num_counters = cluster->l2cache_pmu->num_counters;
  351. u32 ovsr;
  352. int idx;
  353. ovsr = cluster_pmu_getreset_ovsr();
  354. if (!cluster_pmu_has_overflowed(ovsr))
  355. return IRQ_NONE;
  356. for_each_set_bit(idx, cluster->used_counters, num_counters) {
  357. struct perf_event *event = cluster->events[idx];
  358. struct hw_perf_event *hwc;
  359. if (WARN_ON_ONCE(!event))
  360. continue;
  361. if (!cluster_pmu_counter_has_overflowed(ovsr, idx))
  362. continue;
  363. l2_cache_event_update(event);
  364. hwc = &event->hw;
  365. l2_cache_cluster_set_period(cluster, hwc);
  366. }
  367. return IRQ_HANDLED;
  368. }
  369. /*
  370. * Implementation of abstract pmu functionality required by
  371. * the core perf events code.
  372. */
  373. static void l2_cache_pmu_enable(struct pmu *pmu)
  374. {
  375. /*
  376. * Although there is only one PMU (per socket) controlling multiple
  377. * physical PMUs (per cluster), because we do not support per-task mode
  378. * each event is associated with a CPU. Each event has pmu_enable
  379. * called on its CPU, so here it is only necessary to enable the
  380. * counters for the current CPU.
  381. */
  382. cluster_pmu_enable();
  383. }
  384. static void l2_cache_pmu_disable(struct pmu *pmu)
  385. {
  386. cluster_pmu_disable();
  387. }
  388. static int l2_cache_event_init(struct perf_event *event)
  389. {
  390. struct hw_perf_event *hwc = &event->hw;
  391. struct cluster_pmu *cluster;
  392. struct perf_event *sibling;
  393. struct l2cache_pmu *l2cache_pmu;
  394. if (event->attr.type != event->pmu->type)
  395. return -ENOENT;
  396. l2cache_pmu = to_l2cache_pmu(event->pmu);
  397. if (hwc->sample_period) {
  398. dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
  399. "Sampling not supported\n");
  400. return -EOPNOTSUPP;
  401. }
  402. if (event->cpu < 0) {
  403. dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
  404. "Per-task mode not supported\n");
  405. return -EOPNOTSUPP;
  406. }
  407. /* We cannot filter accurately so we just don't allow it. */
  408. if (event->attr.exclude_user || event->attr.exclude_kernel ||
  409. event->attr.exclude_hv || event->attr.exclude_idle) {
  410. dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
  411. "Can't exclude execution levels\n");
  412. return -EOPNOTSUPP;
  413. }
  414. if (((L2_EVT_GROUP(event->attr.config) > L2_EVT_GROUP_MAX) ||
  415. ((event->attr.config & ~L2_EVT_MASK) != 0)) &&
  416. (event->attr.config != L2CYCLE_CTR_RAW_CODE)) {
  417. dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
  418. "Invalid config %llx\n",
  419. event->attr.config);
  420. return -EINVAL;
  421. }
  422. /* Don't allow groups with mixed PMUs, except for s/w events */
  423. if (event->group_leader->pmu != event->pmu &&
  424. !is_software_event(event->group_leader)) {
  425. dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
  426. "Can't create mixed PMU group\n");
  427. return -EINVAL;
  428. }
  429. list_for_each_entry(sibling, &event->group_leader->sibling_list,
  430. group_entry)
  431. if (sibling->pmu != event->pmu &&
  432. !is_software_event(sibling)) {
  433. dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
  434. "Can't create mixed PMU group\n");
  435. return -EINVAL;
  436. }
  437. cluster = get_cluster_pmu(l2cache_pmu, event->cpu);
  438. if (!cluster) {
  439. /* CPU has not been initialised */
  440. dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
  441. "CPU%d not associated with L2 cluster\n", event->cpu);
  442. return -EINVAL;
  443. }
  444. /* Ensure all events in a group are on the same cpu */
  445. if ((event->group_leader != event) &&
  446. (cluster->on_cpu != event->group_leader->cpu)) {
  447. dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
  448. "Can't create group on CPUs %d and %d",
  449. event->cpu, event->group_leader->cpu);
  450. return -EINVAL;
  451. }
  452. if ((event != event->group_leader) &&
  453. !is_software_event(event->group_leader) &&
  454. (L2_EVT_GROUP(event->group_leader->attr.config) ==
  455. L2_EVT_GROUP(event->attr.config))) {
  456. dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
  457. "Column exclusion: conflicting events %llx %llx\n",
  458. event->group_leader->attr.config,
  459. event->attr.config);
  460. return -EINVAL;
  461. }
  462. list_for_each_entry(sibling, &event->group_leader->sibling_list,
  463. group_entry) {
  464. if ((sibling != event) &&
  465. !is_software_event(sibling) &&
  466. (L2_EVT_GROUP(sibling->attr.config) ==
  467. L2_EVT_GROUP(event->attr.config))) {
  468. dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
  469. "Column exclusion: conflicting events %llx %llx\n",
  470. sibling->attr.config,
  471. event->attr.config);
  472. return -EINVAL;
  473. }
  474. }
  475. hwc->idx = -1;
  476. hwc->config_base = event->attr.config;
  477. /*
  478. * Ensure all events are on the same cpu so all events are in the
  479. * same cpu context, to avoid races on pmu_enable etc.
  480. */
  481. event->cpu = cluster->on_cpu;
  482. return 0;
  483. }
  484. static void l2_cache_event_start(struct perf_event *event, int flags)
  485. {
  486. struct cluster_pmu *cluster;
  487. struct hw_perf_event *hwc = &event->hw;
  488. int idx = hwc->idx;
  489. u32 config;
  490. u32 event_cc, event_group;
  491. hwc->state = 0;
  492. cluster = get_cluster_pmu(to_l2cache_pmu(event->pmu), event->cpu);
  493. l2_cache_cluster_set_period(cluster, hwc);
  494. if (hwc->config_base == L2CYCLE_CTR_RAW_CODE) {
  495. cluster_pmu_set_evccntcr(0);
  496. } else {
  497. config = hwc->config_base;
  498. event_cc = L2_EVT_CODE(config);
  499. event_group = L2_EVT_GROUP(config);
  500. cluster_pmu_set_evcntcr(idx, 0);
  501. cluster_pmu_set_evtyper(idx, event_group);
  502. cluster_pmu_set_resr(cluster, event_group, event_cc);
  503. cluster_pmu_set_evfilter_sys_mode(idx);
  504. }
  505. cluster_pmu_counter_enable_interrupt(idx);
  506. cluster_pmu_counter_enable(idx);
  507. }
  508. static void l2_cache_event_stop(struct perf_event *event, int flags)
  509. {
  510. struct hw_perf_event *hwc = &event->hw;
  511. int idx = hwc->idx;
  512. if (hwc->state & PERF_HES_STOPPED)
  513. return;
  514. cluster_pmu_counter_disable_interrupt(idx);
  515. cluster_pmu_counter_disable(idx);
  516. if (flags & PERF_EF_UPDATE)
  517. l2_cache_event_update(event);
  518. hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
  519. }
  520. static int l2_cache_event_add(struct perf_event *event, int flags)
  521. {
  522. struct hw_perf_event *hwc = &event->hw;
  523. int idx;
  524. int err = 0;
  525. struct cluster_pmu *cluster;
  526. cluster = get_cluster_pmu(to_l2cache_pmu(event->pmu), event->cpu);
  527. idx = l2_cache_get_event_idx(cluster, event);
  528. if (idx < 0)
  529. return idx;
  530. hwc->idx = idx;
  531. hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
  532. cluster->events[idx] = event;
  533. local64_set(&hwc->prev_count, 0);
  534. if (flags & PERF_EF_START)
  535. l2_cache_event_start(event, flags);
  536. /* Propagate changes to the userspace mapping. */
  537. perf_event_update_userpage(event);
  538. return err;
  539. }
  540. static void l2_cache_event_del(struct perf_event *event, int flags)
  541. {
  542. struct hw_perf_event *hwc = &event->hw;
  543. struct cluster_pmu *cluster;
  544. int idx = hwc->idx;
  545. cluster = get_cluster_pmu(to_l2cache_pmu(event->pmu), event->cpu);
  546. l2_cache_event_stop(event, flags | PERF_EF_UPDATE);
  547. cluster->events[idx] = NULL;
  548. l2_cache_clear_event_idx(cluster, event);
  549. perf_event_update_userpage(event);
  550. }
  551. static void l2_cache_event_read(struct perf_event *event)
  552. {
  553. l2_cache_event_update(event);
  554. }
  555. static ssize_t l2_cache_pmu_cpumask_show(struct device *dev,
  556. struct device_attribute *attr,
  557. char *buf)
  558. {
  559. struct l2cache_pmu *l2cache_pmu = to_l2cache_pmu(dev_get_drvdata(dev));
  560. return cpumap_print_to_pagebuf(true, buf, &l2cache_pmu->cpumask);
  561. }
  562. static struct device_attribute l2_cache_pmu_cpumask_attr =
  563. __ATTR(cpumask, S_IRUGO, l2_cache_pmu_cpumask_show, NULL);
  564. static struct attribute *l2_cache_pmu_cpumask_attrs[] = {
  565. &l2_cache_pmu_cpumask_attr.attr,
  566. NULL,
  567. };
  568. static struct attribute_group l2_cache_pmu_cpumask_group = {
  569. .attrs = l2_cache_pmu_cpumask_attrs,
  570. };
  571. /* CCG format for perf RAW codes. */
  572. PMU_FORMAT_ATTR(l2_code, "config:4-11");
  573. PMU_FORMAT_ATTR(l2_group, "config:0-3");
  574. static struct attribute *l2_cache_pmu_formats[] = {
  575. &format_attr_l2_code.attr,
  576. &format_attr_l2_group.attr,
  577. NULL,
  578. };
  579. static struct attribute_group l2_cache_pmu_format_group = {
  580. .name = "format",
  581. .attrs = l2_cache_pmu_formats,
  582. };
  583. static const struct attribute_group *l2_cache_pmu_attr_grps[] = {
  584. &l2_cache_pmu_format_group,
  585. &l2_cache_pmu_cpumask_group,
  586. NULL,
  587. };
  588. /*
  589. * Generic device handlers
  590. */
  591. static const struct acpi_device_id l2_cache_pmu_acpi_match[] = {
  592. { "QCOM8130", },
  593. { }
  594. };
  595. static int get_num_counters(void)
  596. {
  597. int val;
  598. val = get_l2_indirect_reg(L2PMCR);
  599. /*
  600. * Read number of counters from L2PMCR and add 1
  601. * for the cycle counter.
  602. */
  603. return ((val >> L2PMCR_NUM_EV_SHIFT) & L2PMCR_NUM_EV_MASK) + 1;
  604. }
  605. static struct cluster_pmu *l2_cache_associate_cpu_with_cluster(
  606. struct l2cache_pmu *l2cache_pmu, int cpu)
  607. {
  608. u64 mpidr;
  609. int cpu_cluster_id;
  610. struct cluster_pmu *cluster = NULL;
  611. /*
  612. * This assumes that the cluster_id is in MPIDR[aff1] for
  613. * single-threaded cores, and MPIDR[aff2] for multi-threaded
  614. * cores. This logic will have to be updated if this changes.
  615. */
  616. mpidr = read_cpuid_mpidr();
  617. if (mpidr & MPIDR_MT_BITMASK)
  618. cpu_cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 2);
  619. else
  620. cpu_cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 1);
  621. list_for_each_entry(cluster, &l2cache_pmu->clusters, next) {
  622. if (cluster->cluster_id != cpu_cluster_id)
  623. continue;
  624. dev_info(&l2cache_pmu->pdev->dev,
  625. "CPU%d associated with cluster %d\n", cpu,
  626. cluster->cluster_id);
  627. cpumask_set_cpu(cpu, &cluster->cluster_cpus);
  628. *per_cpu_ptr(l2cache_pmu->pmu_cluster, cpu) = cluster;
  629. break;
  630. }
  631. return cluster;
  632. }
  633. static int l2cache_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
  634. {
  635. struct cluster_pmu *cluster;
  636. struct l2cache_pmu *l2cache_pmu;
  637. l2cache_pmu = hlist_entry_safe(node, struct l2cache_pmu, node);
  638. cluster = get_cluster_pmu(l2cache_pmu, cpu);
  639. if (!cluster) {
  640. /* First time this CPU has come online */
  641. cluster = l2_cache_associate_cpu_with_cluster(l2cache_pmu, cpu);
  642. if (!cluster) {
  643. /* Only if broken firmware doesn't list every cluster */
  644. WARN_ONCE(1, "No L2 cache cluster for CPU%d\n", cpu);
  645. return 0;
  646. }
  647. }
  648. /* If another CPU is managing this cluster, we're done */
  649. if (cluster->on_cpu != -1)
  650. return 0;
  651. /*
  652. * All CPUs on this cluster were down, use this one.
  653. * Reset to put it into sane state.
  654. */
  655. cluster->on_cpu = cpu;
  656. cpumask_set_cpu(cpu, &l2cache_pmu->cpumask);
  657. cluster_pmu_reset();
  658. WARN_ON(irq_set_affinity(cluster->irq, cpumask_of(cpu)));
  659. enable_irq(cluster->irq);
  660. return 0;
  661. }
  662. static int l2cache_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
  663. {
  664. struct cluster_pmu *cluster;
  665. struct l2cache_pmu *l2cache_pmu;
  666. cpumask_t cluster_online_cpus;
  667. unsigned int target;
  668. l2cache_pmu = hlist_entry_safe(node, struct l2cache_pmu, node);
  669. cluster = get_cluster_pmu(l2cache_pmu, cpu);
  670. if (!cluster)
  671. return 0;
  672. /* If this CPU is not managing the cluster, we're done */
  673. if (cluster->on_cpu != cpu)
  674. return 0;
  675. /* Give up ownership of cluster */
  676. cpumask_clear_cpu(cpu, &l2cache_pmu->cpumask);
  677. cluster->on_cpu = -1;
  678. /* Any other CPU for this cluster which is still online */
  679. cpumask_and(&cluster_online_cpus, &cluster->cluster_cpus,
  680. cpu_online_mask);
  681. target = cpumask_any_but(&cluster_online_cpus, cpu);
  682. if (target >= nr_cpu_ids) {
  683. disable_irq(cluster->irq);
  684. return 0;
  685. }
  686. perf_pmu_migrate_context(&l2cache_pmu->pmu, cpu, target);
  687. cluster->on_cpu = target;
  688. cpumask_set_cpu(target, &l2cache_pmu->cpumask);
  689. WARN_ON(irq_set_affinity(cluster->irq, cpumask_of(target)));
  690. return 0;
  691. }
  692. static int l2_cache_pmu_probe_cluster(struct device *dev, void *data)
  693. {
  694. struct platform_device *pdev = to_platform_device(dev->parent);
  695. struct platform_device *sdev = to_platform_device(dev);
  696. struct l2cache_pmu *l2cache_pmu = data;
  697. struct cluster_pmu *cluster;
  698. struct acpi_device *device;
  699. unsigned long fw_cluster_id;
  700. int err;
  701. int irq;
  702. if (acpi_bus_get_device(ACPI_HANDLE(dev), &device))
  703. return -ENODEV;
  704. if (kstrtoul(device->pnp.unique_id, 10, &fw_cluster_id) < 0) {
  705. dev_err(&pdev->dev, "unable to read ACPI uid\n");
  706. return -ENODEV;
  707. }
  708. cluster = devm_kzalloc(&pdev->dev, sizeof(*cluster), GFP_KERNEL);
  709. if (!cluster)
  710. return -ENOMEM;
  711. INIT_LIST_HEAD(&cluster->next);
  712. list_add(&cluster->next, &l2cache_pmu->clusters);
  713. cluster->cluster_id = fw_cluster_id;
  714. irq = platform_get_irq(sdev, 0);
  715. if (irq < 0) {
  716. dev_err(&pdev->dev,
  717. "Failed to get valid irq for cluster %ld\n",
  718. fw_cluster_id);
  719. return irq;
  720. }
  721. irq_set_status_flags(irq, IRQ_NOAUTOEN);
  722. cluster->irq = irq;
  723. cluster->l2cache_pmu = l2cache_pmu;
  724. cluster->on_cpu = -1;
  725. err = devm_request_irq(&pdev->dev, irq, l2_cache_handle_irq,
  726. IRQF_NOBALANCING | IRQF_NO_THREAD,
  727. "l2-cache-pmu", cluster);
  728. if (err) {
  729. dev_err(&pdev->dev,
  730. "Unable to request IRQ%d for L2 PMU counters\n", irq);
  731. return err;
  732. }
  733. dev_info(&pdev->dev,
  734. "Registered L2 cache PMU cluster %ld\n", fw_cluster_id);
  735. spin_lock_init(&cluster->pmu_lock);
  736. l2cache_pmu->num_pmus++;
  737. return 0;
  738. }
  739. static int l2_cache_pmu_probe(struct platform_device *pdev)
  740. {
  741. int err;
  742. struct l2cache_pmu *l2cache_pmu;
  743. l2cache_pmu =
  744. devm_kzalloc(&pdev->dev, sizeof(*l2cache_pmu), GFP_KERNEL);
  745. if (!l2cache_pmu)
  746. return -ENOMEM;
  747. INIT_LIST_HEAD(&l2cache_pmu->clusters);
  748. platform_set_drvdata(pdev, l2cache_pmu);
  749. l2cache_pmu->pmu = (struct pmu) {
  750. /* suffix is instance id for future use with multiple sockets */
  751. .name = "l2cache_0",
  752. .task_ctx_nr = perf_invalid_context,
  753. .pmu_enable = l2_cache_pmu_enable,
  754. .pmu_disable = l2_cache_pmu_disable,
  755. .event_init = l2_cache_event_init,
  756. .add = l2_cache_event_add,
  757. .del = l2_cache_event_del,
  758. .start = l2_cache_event_start,
  759. .stop = l2_cache_event_stop,
  760. .read = l2_cache_event_read,
  761. .attr_groups = l2_cache_pmu_attr_grps,
  762. };
  763. l2cache_pmu->num_counters = get_num_counters();
  764. l2cache_pmu->pdev = pdev;
  765. l2cache_pmu->pmu_cluster = devm_alloc_percpu(&pdev->dev,
  766. struct cluster_pmu *);
  767. if (!l2cache_pmu->pmu_cluster)
  768. return -ENOMEM;
  769. l2_cycle_ctr_idx = l2cache_pmu->num_counters - 1;
  770. l2_counter_present_mask = GENMASK(l2cache_pmu->num_counters - 2, 0) |
  771. BIT(L2CYCLE_CTR_BIT);
  772. cpumask_clear(&l2cache_pmu->cpumask);
  773. /* Read cluster info and initialize each cluster */
  774. err = device_for_each_child(&pdev->dev, l2cache_pmu,
  775. l2_cache_pmu_probe_cluster);
  776. if (err)
  777. return err;
  778. if (l2cache_pmu->num_pmus == 0) {
  779. dev_err(&pdev->dev, "No hardware L2 cache PMUs found\n");
  780. return -ENODEV;
  781. }
  782. err = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
  783. &l2cache_pmu->node);
  784. if (err) {
  785. dev_err(&pdev->dev, "Error %d registering hotplug", err);
  786. return err;
  787. }
  788. err = perf_pmu_register(&l2cache_pmu->pmu, l2cache_pmu->pmu.name, -1);
  789. if (err) {
  790. dev_err(&pdev->dev, "Error %d registering L2 cache PMU\n", err);
  791. goto out_unregister;
  792. }
  793. dev_info(&pdev->dev, "Registered L2 cache PMU using %d HW PMUs\n",
  794. l2cache_pmu->num_pmus);
  795. return err;
  796. out_unregister:
  797. cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
  798. &l2cache_pmu->node);
  799. return err;
  800. }
  801. static int l2_cache_pmu_remove(struct platform_device *pdev)
  802. {
  803. struct l2cache_pmu *l2cache_pmu =
  804. to_l2cache_pmu(platform_get_drvdata(pdev));
  805. perf_pmu_unregister(&l2cache_pmu->pmu);
  806. cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
  807. &l2cache_pmu->node);
  808. return 0;
  809. }
  810. static struct platform_driver l2_cache_pmu_driver = {
  811. .driver = {
  812. .name = "qcom-l2cache-pmu",
  813. .acpi_match_table = ACPI_PTR(l2_cache_pmu_acpi_match),
  814. },
  815. .probe = l2_cache_pmu_probe,
  816. .remove = l2_cache_pmu_remove,
  817. };
  818. static int __init register_l2_cache_pmu_driver(void)
  819. {
  820. int err;
  821. err = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
  822. "AP_PERF_ARM_QCOM_L2_ONLINE",
  823. l2cache_pmu_online_cpu,
  824. l2cache_pmu_offline_cpu);
  825. if (err)
  826. return err;
  827. return platform_driver_register(&l2_cache_pmu_driver);
  828. }
  829. device_initcall(register_l2_cache_pmu_driver);