power9-pmu.c 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331
  1. /*
  2. * Performance counter support for POWER9 processors.
  3. *
  4. * Copyright 2009 Paul Mackerras, IBM Corporation.
  5. * Copyright 2013 Michael Ellerman, IBM Corporation.
  6. * Copyright 2016 Madhavan Srinivasan, IBM Corporation.
  7. *
  8. * This program is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU General Public License
  10. * as published by the Free Software Foundation; either version
  11. * 2 of the License, or later version.
  12. */
  13. #define pr_fmt(fmt) "power9-pmu: " fmt
  14. #include "isa207-common.h"
  15. /*
  16. * Some power9 event codes.
  17. */
  18. #define EVENT(_name, _code) _name = _code,
  19. enum {
  20. #include "power9-events-list.h"
  21. };
  22. #undef EVENT
  23. /* MMCRA IFM bits - POWER9 */
  24. #define POWER9_MMCRA_IFM1 0x0000000040000000UL
  25. #define POWER9_MMCRA_IFM2 0x0000000080000000UL
  26. #define POWER9_MMCRA_IFM3 0x00000000C0000000UL
  27. GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC);
  28. GENERIC_EVENT_ATTR(stalled-cycles-frontend, PM_ICT_NOSLOT_CYC);
  29. GENERIC_EVENT_ATTR(stalled-cycles-backend, PM_CMPLU_STALL);
  30. GENERIC_EVENT_ATTR(instructions, PM_INST_CMPL);
  31. GENERIC_EVENT_ATTR(branch-instructions, PM_BRU_CMPL);
  32. GENERIC_EVENT_ATTR(branch-misses, PM_BR_MPRED_CMPL);
  33. GENERIC_EVENT_ATTR(cache-references, PM_LD_REF_L1);
  34. GENERIC_EVENT_ATTR(cache-misses, PM_LD_MISS_L1_FIN);
  35. CACHE_EVENT_ATTR(L1-dcache-load-misses, PM_LD_MISS_L1_FIN);
  36. CACHE_EVENT_ATTR(L1-dcache-loads, PM_LD_REF_L1);
  37. CACHE_EVENT_ATTR(L1-dcache-prefetches, PM_L1_PREF);
  38. CACHE_EVENT_ATTR(L1-dcache-store-misses, PM_ST_MISS_L1);
  39. CACHE_EVENT_ATTR(L1-icache-load-misses, PM_L1_ICACHE_MISS);
  40. CACHE_EVENT_ATTR(L1-icache-loads, PM_INST_FROM_L1);
  41. CACHE_EVENT_ATTR(L1-icache-prefetches, PM_IC_PREF_WRITE);
  42. CACHE_EVENT_ATTR(LLC-load-misses, PM_DATA_FROM_L3MISS);
  43. CACHE_EVENT_ATTR(LLC-loads, PM_DATA_FROM_L3);
  44. CACHE_EVENT_ATTR(LLC-prefetches, PM_L3_PREF_ALL);
  45. CACHE_EVENT_ATTR(LLC-store-misses, PM_L2_ST_MISS);
  46. CACHE_EVENT_ATTR(LLC-stores, PM_L2_ST);
  47. CACHE_EVENT_ATTR(branch-load-misses, PM_BR_MPRED_CMPL);
  48. CACHE_EVENT_ATTR(branch-loads, PM_BRU_CMPL);
  49. CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS);
  50. CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS);
  51. static struct attribute *power9_events_attr[] = {
  52. GENERIC_EVENT_PTR(PM_CYC),
  53. GENERIC_EVENT_PTR(PM_ICT_NOSLOT_CYC),
  54. GENERIC_EVENT_PTR(PM_CMPLU_STALL),
  55. GENERIC_EVENT_PTR(PM_INST_CMPL),
  56. GENERIC_EVENT_PTR(PM_BRU_CMPL),
  57. GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
  58. GENERIC_EVENT_PTR(PM_LD_REF_L1),
  59. GENERIC_EVENT_PTR(PM_LD_MISS_L1_FIN),
  60. CACHE_EVENT_PTR(PM_LD_MISS_L1_FIN),
  61. CACHE_EVENT_PTR(PM_LD_REF_L1),
  62. CACHE_EVENT_PTR(PM_L1_PREF),
  63. CACHE_EVENT_PTR(PM_ST_MISS_L1),
  64. CACHE_EVENT_PTR(PM_L1_ICACHE_MISS),
  65. CACHE_EVENT_PTR(PM_INST_FROM_L1),
  66. CACHE_EVENT_PTR(PM_IC_PREF_WRITE),
  67. CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS),
  68. CACHE_EVENT_PTR(PM_DATA_FROM_L3),
  69. CACHE_EVENT_PTR(PM_L3_PREF_ALL),
  70. CACHE_EVENT_PTR(PM_L2_ST_MISS),
  71. CACHE_EVENT_PTR(PM_L2_ST),
  72. CACHE_EVENT_PTR(PM_BR_MPRED_CMPL),
  73. CACHE_EVENT_PTR(PM_BRU_CMPL),
  74. CACHE_EVENT_PTR(PM_DTLB_MISS),
  75. CACHE_EVENT_PTR(PM_ITLB_MISS),
  76. NULL
  77. };
  78. static struct attribute_group power9_pmu_events_group = {
  79. .name = "events",
  80. .attrs = power9_events_attr,
  81. };
  82. PMU_FORMAT_ATTR(event, "config:0-49");
  83. PMU_FORMAT_ATTR(pmcxsel, "config:0-7");
  84. PMU_FORMAT_ATTR(mark, "config:8");
  85. PMU_FORMAT_ATTR(combine, "config:11");
  86. PMU_FORMAT_ATTR(unit, "config:12-15");
  87. PMU_FORMAT_ATTR(pmc, "config:16-19");
  88. PMU_FORMAT_ATTR(cache_sel, "config:20-23");
  89. PMU_FORMAT_ATTR(sample_mode, "config:24-28");
  90. PMU_FORMAT_ATTR(thresh_sel, "config:29-31");
  91. PMU_FORMAT_ATTR(thresh_stop, "config:32-35");
  92. PMU_FORMAT_ATTR(thresh_start, "config:36-39");
  93. PMU_FORMAT_ATTR(thresh_cmp, "config:40-49");
  94. static struct attribute *power9_pmu_format_attr[] = {
  95. &format_attr_event.attr,
  96. &format_attr_pmcxsel.attr,
  97. &format_attr_mark.attr,
  98. &format_attr_combine.attr,
  99. &format_attr_unit.attr,
  100. &format_attr_pmc.attr,
  101. &format_attr_cache_sel.attr,
  102. &format_attr_sample_mode.attr,
  103. &format_attr_thresh_sel.attr,
  104. &format_attr_thresh_stop.attr,
  105. &format_attr_thresh_start.attr,
  106. &format_attr_thresh_cmp.attr,
  107. NULL,
  108. };
  109. static struct attribute_group power9_pmu_format_group = {
  110. .name = "format",
  111. .attrs = power9_pmu_format_attr,
  112. };
  113. static const struct attribute_group *power9_pmu_attr_groups[] = {
  114. &power9_pmu_format_group,
  115. &power9_pmu_events_group,
  116. NULL,
  117. };
  118. static int power9_generic_events[] = {
  119. [PERF_COUNT_HW_CPU_CYCLES] = PM_CYC,
  120. [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PM_ICT_NOSLOT_CYC,
  121. [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = PM_CMPLU_STALL,
  122. [PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_CMPL,
  123. [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BRU_CMPL,
  124. [PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL,
  125. [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1,
  126. [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1_FIN,
  127. };
  128. static u64 power9_bhrb_filter_map(u64 branch_sample_type)
  129. {
  130. u64 pmu_bhrb_filter = 0;
  131. /* BHRB and regular PMU events share the same privilege state
  132. * filter configuration. BHRB is always recorded along with a
  133. * regular PMU event. As the privilege state filter is handled
  134. * in the basic PMC configuration of the accompanying regular
  135. * PMU event, we ignore any separate BHRB specific request.
  136. */
  137. /* No branch filter requested */
  138. if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY)
  139. return pmu_bhrb_filter;
  140. /* Invalid branch filter options - HW does not support */
  141. if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
  142. return -1;
  143. if (branch_sample_type & PERF_SAMPLE_BRANCH_IND_CALL)
  144. return -1;
  145. if (branch_sample_type & PERF_SAMPLE_BRANCH_CALL)
  146. return -1;
  147. if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) {
  148. pmu_bhrb_filter |= POWER9_MMCRA_IFM1;
  149. return pmu_bhrb_filter;
  150. }
  151. /* Every thing else is unsupported */
  152. return -1;
  153. }
  154. static void power9_config_bhrb(u64 pmu_bhrb_filter)
  155. {
  156. /* Enable BHRB filter in PMU */
  157. mtspr(SPRN_MMCRA, (mfspr(SPRN_MMCRA) | pmu_bhrb_filter));
  158. }
  159. #define C(x) PERF_COUNT_HW_CACHE_##x
  160. /*
  161. * Table of generalized cache-related events.
  162. * 0 means not supported, -1 means nonsensical, other values
  163. * are event codes.
  164. */
  165. static int power9_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
  166. [ C(L1D) ] = {
  167. [ C(OP_READ) ] = {
  168. [ C(RESULT_ACCESS) ] = PM_LD_REF_L1,
  169. [ C(RESULT_MISS) ] = PM_LD_MISS_L1_FIN,
  170. },
  171. [ C(OP_WRITE) ] = {
  172. [ C(RESULT_ACCESS) ] = 0,
  173. [ C(RESULT_MISS) ] = PM_ST_MISS_L1,
  174. },
  175. [ C(OP_PREFETCH) ] = {
  176. [ C(RESULT_ACCESS) ] = PM_L1_PREF,
  177. [ C(RESULT_MISS) ] = 0,
  178. },
  179. },
  180. [ C(L1I) ] = {
  181. [ C(OP_READ) ] = {
  182. [ C(RESULT_ACCESS) ] = PM_INST_FROM_L1,
  183. [ C(RESULT_MISS) ] = PM_L1_ICACHE_MISS,
  184. },
  185. [ C(OP_WRITE) ] = {
  186. [ C(RESULT_ACCESS) ] = PM_L1_DEMAND_WRITE,
  187. [ C(RESULT_MISS) ] = -1,
  188. },
  189. [ C(OP_PREFETCH) ] = {
  190. [ C(RESULT_ACCESS) ] = PM_IC_PREF_WRITE,
  191. [ C(RESULT_MISS) ] = 0,
  192. },
  193. },
  194. [ C(LL) ] = {
  195. [ C(OP_READ) ] = {
  196. [ C(RESULT_ACCESS) ] = PM_DATA_FROM_L3,
  197. [ C(RESULT_MISS) ] = PM_DATA_FROM_L3MISS,
  198. },
  199. [ C(OP_WRITE) ] = {
  200. [ C(RESULT_ACCESS) ] = PM_L2_ST,
  201. [ C(RESULT_MISS) ] = PM_L2_ST_MISS,
  202. },
  203. [ C(OP_PREFETCH) ] = {
  204. [ C(RESULT_ACCESS) ] = PM_L3_PREF_ALL,
  205. [ C(RESULT_MISS) ] = 0,
  206. },
  207. },
  208. [ C(DTLB) ] = {
  209. [ C(OP_READ) ] = {
  210. [ C(RESULT_ACCESS) ] = 0,
  211. [ C(RESULT_MISS) ] = PM_DTLB_MISS,
  212. },
  213. [ C(OP_WRITE) ] = {
  214. [ C(RESULT_ACCESS) ] = -1,
  215. [ C(RESULT_MISS) ] = -1,
  216. },
  217. [ C(OP_PREFETCH) ] = {
  218. [ C(RESULT_ACCESS) ] = -1,
  219. [ C(RESULT_MISS) ] = -1,
  220. },
  221. },
  222. [ C(ITLB) ] = {
  223. [ C(OP_READ) ] = {
  224. [ C(RESULT_ACCESS) ] = 0,
  225. [ C(RESULT_MISS) ] = PM_ITLB_MISS,
  226. },
  227. [ C(OP_WRITE) ] = {
  228. [ C(RESULT_ACCESS) ] = -1,
  229. [ C(RESULT_MISS) ] = -1,
  230. },
  231. [ C(OP_PREFETCH) ] = {
  232. [ C(RESULT_ACCESS) ] = -1,
  233. [ C(RESULT_MISS) ] = -1,
  234. },
  235. },
  236. [ C(BPU) ] = {
  237. [ C(OP_READ) ] = {
  238. [ C(RESULT_ACCESS) ] = PM_BRU_CMPL,
  239. [ C(RESULT_MISS) ] = PM_BR_MPRED_CMPL,
  240. },
  241. [ C(OP_WRITE) ] = {
  242. [ C(RESULT_ACCESS) ] = -1,
  243. [ C(RESULT_MISS) ] = -1,
  244. },
  245. [ C(OP_PREFETCH) ] = {
  246. [ C(RESULT_ACCESS) ] = -1,
  247. [ C(RESULT_MISS) ] = -1,
  248. },
  249. },
  250. [ C(NODE) ] = {
  251. [ C(OP_READ) ] = {
  252. [ C(RESULT_ACCESS) ] = -1,
  253. [ C(RESULT_MISS) ] = -1,
  254. },
  255. [ C(OP_WRITE) ] = {
  256. [ C(RESULT_ACCESS) ] = -1,
  257. [ C(RESULT_MISS) ] = -1,
  258. },
  259. [ C(OP_PREFETCH) ] = {
  260. [ C(RESULT_ACCESS) ] = -1,
  261. [ C(RESULT_MISS) ] = -1,
  262. },
  263. },
  264. };
  265. #undef C
  266. static struct power_pmu power9_pmu = {
  267. .name = "POWER9",
  268. .n_counter = MAX_PMU_COUNTERS,
  269. .add_fields = ISA207_ADD_FIELDS,
  270. .test_adder = P9_DD1_TEST_ADDER,
  271. .compute_mmcr = isa207_compute_mmcr,
  272. .config_bhrb = power9_config_bhrb,
  273. .bhrb_filter_map = power9_bhrb_filter_map,
  274. .get_constraint = isa207_get_constraint,
  275. .disable_pmc = isa207_disable_pmc,
  276. .flags = PPMU_HAS_SIER | PPMU_ARCH_207S,
  277. .n_generic = ARRAY_SIZE(power9_generic_events),
  278. .generic_events = power9_generic_events,
  279. .cache_events = &power9_cache_events,
  280. .attr_groups = power9_pmu_attr_groups,
  281. .bhrb_nr = 32,
  282. };
  283. static int __init init_power9_pmu(void)
  284. {
  285. int rc;
  286. /* Comes from cpu_specs[] */
  287. if (!cur_cpu_spec->oprofile_cpu_type ||
  288. strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power9"))
  289. return -ENODEV;
  290. rc = register_power_pmu(&power9_pmu);
  291. if (rc)
  292. return rc;
  293. /* Tell userspace that EBB is supported */
  294. cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB;
  295. return 0;
  296. }
  297. early_initcall(init_power9_pmu);