power4-pmu.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623
  1. /*
  2. * Performance counter support for POWER4 (GP) and POWER4+ (GQ) processors.
  3. *
  4. * Copyright 2009 Paul Mackerras, IBM Corporation.
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public License
  8. * as published by the Free Software Foundation; either version
  9. * 2 of the License, or (at your option) any later version.
  10. */
  11. #include <linux/kernel.h>
  12. #include <linux/perf_event.h>
  13. #include <linux/string.h>
  14. #include <asm/reg.h>
  15. #include <asm/cputable.h>
  16. /*
  17. * Bits in event code for POWER4
  18. */
  19. #define PM_PMC_SH 12 /* PMC number (1-based) for direct events */
  20. #define PM_PMC_MSK 0xf
  21. #define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */
  22. #define PM_UNIT_MSK 0xf
  23. #define PM_LOWER_SH 6
  24. #define PM_LOWER_MSK 1
  25. #define PM_LOWER_MSKS 0x40
  26. #define PM_BYTE_SH 4 /* Byte number of event bus to use */
  27. #define PM_BYTE_MSK 3
  28. #define PM_PMCSEL_MSK 7
  29. /*
  30. * Unit code values
  31. */
  32. #define PM_FPU 1
  33. #define PM_ISU1 2
  34. #define PM_IFU 3
  35. #define PM_IDU0 4
  36. #define PM_ISU1_ALT 6
  37. #define PM_ISU2 7
  38. #define PM_IFU_ALT 8
  39. #define PM_LSU0 9
  40. #define PM_LSU1 0xc
  41. #define PM_GPS 0xf
  42. /*
  43. * Bits in MMCR0 for POWER4
  44. */
  45. #define MMCR0_PMC1SEL_SH 8
  46. #define MMCR0_PMC2SEL_SH 1
  47. #define MMCR_PMCSEL_MSK 0x1f
  48. /*
  49. * Bits in MMCR1 for POWER4
  50. */
  51. #define MMCR1_TTM0SEL_SH 62
  52. #define MMCR1_TTC0SEL_SH 61
  53. #define MMCR1_TTM1SEL_SH 59
  54. #define MMCR1_TTC1SEL_SH 58
  55. #define MMCR1_TTM2SEL_SH 56
  56. #define MMCR1_TTC2SEL_SH 55
  57. #define MMCR1_TTM3SEL_SH 53
  58. #define MMCR1_TTC3SEL_SH 52
  59. #define MMCR1_TTMSEL_MSK 3
  60. #define MMCR1_TD_CP_DBG0SEL_SH 50
  61. #define MMCR1_TD_CP_DBG1SEL_SH 48
  62. #define MMCR1_TD_CP_DBG2SEL_SH 46
  63. #define MMCR1_TD_CP_DBG3SEL_SH 44
  64. #define MMCR1_DEBUG0SEL_SH 43
  65. #define MMCR1_DEBUG1SEL_SH 42
  66. #define MMCR1_DEBUG2SEL_SH 41
  67. #define MMCR1_DEBUG3SEL_SH 40
  68. #define MMCR1_PMC1_ADDER_SEL_SH 39
  69. #define MMCR1_PMC2_ADDER_SEL_SH 38
  70. #define MMCR1_PMC6_ADDER_SEL_SH 37
  71. #define MMCR1_PMC5_ADDER_SEL_SH 36
  72. #define MMCR1_PMC8_ADDER_SEL_SH 35
  73. #define MMCR1_PMC7_ADDER_SEL_SH 34
  74. #define MMCR1_PMC3_ADDER_SEL_SH 33
  75. #define MMCR1_PMC4_ADDER_SEL_SH 32
  76. #define MMCR1_PMC3SEL_SH 27
  77. #define MMCR1_PMC4SEL_SH 22
  78. #define MMCR1_PMC5SEL_SH 17
  79. #define MMCR1_PMC6SEL_SH 12
  80. #define MMCR1_PMC7SEL_SH 7
  81. #define MMCR1_PMC8SEL_SH 2 /* note bit 0 is in MMCRA for GP */
  82. static short mmcr1_adder_bits[8] = {
  83. MMCR1_PMC1_ADDER_SEL_SH,
  84. MMCR1_PMC2_ADDER_SEL_SH,
  85. MMCR1_PMC3_ADDER_SEL_SH,
  86. MMCR1_PMC4_ADDER_SEL_SH,
  87. MMCR1_PMC5_ADDER_SEL_SH,
  88. MMCR1_PMC6_ADDER_SEL_SH,
  89. MMCR1_PMC7_ADDER_SEL_SH,
  90. MMCR1_PMC8_ADDER_SEL_SH
  91. };
  92. /*
  93. * Bits in MMCRA
  94. */
  95. #define MMCRA_PMC8SEL0_SH 17 /* PMC8SEL bit 0 for GP */
  96. /*
  97. * Layout of constraint bits:
  98. * 6666555555555544444444443333333333222222222211111111110000000000
  99. * 3210987654321098765432109876543210987654321098765432109876543210
  100. * |[ >[ >[ >|||[ >[ >< >< >< >< ><><><><><><><><>
  101. * | UC1 UC2 UC3 ||| PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8
  102. * \SMPL ||\TTC3SEL
  103. * |\TTC_IFU_SEL
  104. * \TTM2SEL0
  105. *
  106. * SMPL - SAMPLE_ENABLE constraint
  107. * 56: SAMPLE_ENABLE value 0x0100_0000_0000_0000
  108. *
  109. * UC1 - unit constraint 1: can't have all three of FPU/ISU1/IDU0|ISU2
  110. * 55: UC1 error 0x0080_0000_0000_0000
  111. * 54: FPU events needed 0x0040_0000_0000_0000
  112. * 53: ISU1 events needed 0x0020_0000_0000_0000
  113. * 52: IDU0|ISU2 events needed 0x0010_0000_0000_0000
  114. *
  115. * UC2 - unit constraint 2: can't have all three of FPU/IFU/LSU0
  116. * 51: UC2 error 0x0008_0000_0000_0000
  117. * 50: FPU events needed 0x0004_0000_0000_0000
  118. * 49: IFU events needed 0x0002_0000_0000_0000
  119. * 48: LSU0 events needed 0x0001_0000_0000_0000
  120. *
  121. * UC3 - unit constraint 3: can't have all four of LSU0/IFU/IDU0|ISU2/ISU1
  122. * 47: UC3 error 0x8000_0000_0000
  123. * 46: LSU0 events needed 0x4000_0000_0000
  124. * 45: IFU events needed 0x2000_0000_0000
  125. * 44: IDU0|ISU2 events needed 0x1000_0000_0000
  126. * 43: ISU1 events needed 0x0800_0000_0000
  127. *
  128. * TTM2SEL0
  129. * 42: 0 = IDU0 events needed
  130. * 1 = ISU2 events needed 0x0400_0000_0000
  131. *
  132. * TTC_IFU_SEL
  133. * 41: 0 = IFU.U events needed
  134. * 1 = IFU.L events needed 0x0200_0000_0000
  135. *
  136. * TTC3SEL
  137. * 40: 0 = LSU1.U events needed
  138. * 1 = LSU1.L events needed 0x0100_0000_0000
  139. *
  140. * PS1
  141. * 39: PS1 error 0x0080_0000_0000
  142. * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000
  143. *
  144. * PS2
  145. * 35: PS2 error 0x0008_0000_0000
  146. * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000
  147. *
  148. * B0
  149. * 28-31: Byte 0 event source 0xf000_0000
  150. * 1 = FPU
  151. * 2 = ISU1
  152. * 3 = IFU
  153. * 4 = IDU0
  154. * 7 = ISU2
  155. * 9 = LSU0
  156. * c = LSU1
  157. * f = GPS
  158. *
  159. * B1, B2, B3
  160. * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources
  161. *
  162. * P8
  163. * 15: P8 error 0x8000
  164. * 14-15: Count of events needing PMC8
  165. *
  166. * P1..P7
  167. * 0-13: Count of events needing PMC1..PMC7
  168. *
  169. * Note: this doesn't allow events using IFU.U to be combined with events
  170. * using IFU.L, though that is feasible (using TTM0 and TTM2). However
  171. * there are no listed events for IFU.L (they are debug events not
  172. * verified for performance monitoring) so this shouldn't cause a
  173. * problem.
  174. */
  175. static struct unitinfo {
  176. unsigned long value, mask;
  177. int unit;
  178. int lowerbit;
  179. } p4_unitinfo[16] = {
  180. [PM_FPU] = { 0x44000000000000ul, 0x88000000000000ul, PM_FPU, 0 },
  181. [PM_ISU1] = { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 },
  182. [PM_ISU1_ALT] =
  183. { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 },
  184. [PM_IFU] = { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 },
  185. [PM_IFU_ALT] =
  186. { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 },
  187. [PM_IDU0] = { 0x10100000000000ul, 0x80840000000000ul, PM_IDU0, 1 },
  188. [PM_ISU2] = { 0x10140000000000ul, 0x80840000000000ul, PM_ISU2, 0 },
  189. [PM_LSU0] = { 0x01400000000000ul, 0x08800000000000ul, PM_LSU0, 0 },
  190. [PM_LSU1] = { 0x00000000000000ul, 0x00010000000000ul, PM_LSU1, 40 },
  191. [PM_GPS] = { 0x00000000000000ul, 0x00000000000000ul, PM_GPS, 0 }
  192. };
  193. static unsigned char direct_marked_event[8] = {
  194. (1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */
  195. (1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */
  196. (1<<3), /* PMC3: PM_MRK_ST_CMPL_INT */
  197. (1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */
  198. (1<<4) | (1<<5), /* PMC5: PM_MRK_GRP_TIMEO */
  199. (1<<3) | (1<<4) | (1<<5),
  200. /* PMC6: PM_MRK_ST_GPS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */
  201. (1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */
  202. (1<<4), /* PMC8: PM_MRK_LSU_FIN */
  203. };
  204. /*
  205. * Returns 1 if event counts things relating to marked instructions
  206. * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
  207. */
  208. static int p4_marked_instr_event(u64 event)
  209. {
  210. int pmc, psel, unit, byte, bit;
  211. unsigned int mask;
  212. pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
  213. psel = event & PM_PMCSEL_MSK;
  214. if (pmc) {
  215. if (direct_marked_event[pmc - 1] & (1 << psel))
  216. return 1;
  217. if (psel == 0) /* add events */
  218. bit = (pmc <= 4)? pmc - 1: 8 - pmc;
  219. else if (psel == 6) /* decode events */
  220. bit = 4;
  221. else
  222. return 0;
  223. } else
  224. bit = psel;
  225. byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
  226. unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
  227. mask = 0;
  228. switch (unit) {
  229. case PM_LSU1:
  230. if (event & PM_LOWER_MSKS)
  231. mask = 1 << 28; /* byte 7 bit 4 */
  232. else
  233. mask = 6 << 24; /* byte 3 bits 1 and 2 */
  234. break;
  235. case PM_LSU0:
  236. /* byte 3, bit 3; byte 2 bits 0,2,3,4,5; byte 1 */
  237. mask = 0x083dff00;
  238. }
  239. return (mask >> (byte * 8 + bit)) & 1;
  240. }
  241. static int p4_get_constraint(u64 event, unsigned long *maskp,
  242. unsigned long *valp)
  243. {
  244. int pmc, byte, unit, lower, sh;
  245. unsigned long mask = 0, value = 0;
  246. int grp = -1;
  247. pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
  248. if (pmc) {
  249. if (pmc > 8)
  250. return -1;
  251. sh = (pmc - 1) * 2;
  252. mask |= 2 << sh;
  253. value |= 1 << sh;
  254. grp = ((pmc - 1) >> 1) & 1;
  255. }
  256. unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
  257. byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
  258. if (unit) {
  259. lower = (event >> PM_LOWER_SH) & PM_LOWER_MSK;
  260. /*
  261. * Bus events on bytes 0 and 2 can be counted
  262. * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8.
  263. */
  264. if (!pmc)
  265. grp = byte & 1;
  266. if (!p4_unitinfo[unit].unit)
  267. return -1;
  268. mask |= p4_unitinfo[unit].mask;
  269. value |= p4_unitinfo[unit].value;
  270. sh = p4_unitinfo[unit].lowerbit;
  271. if (sh > 1)
  272. value |= (unsigned long)lower << sh;
  273. else if (lower != sh)
  274. return -1;
  275. unit = p4_unitinfo[unit].unit;
  276. /* Set byte lane select field */
  277. mask |= 0xfULL << (28 - 4 * byte);
  278. value |= (unsigned long)unit << (28 - 4 * byte);
  279. }
  280. if (grp == 0) {
  281. /* increment PMC1/2/5/6 field */
  282. mask |= 0x8000000000ull;
  283. value |= 0x1000000000ull;
  284. } else {
  285. /* increment PMC3/4/7/8 field */
  286. mask |= 0x800000000ull;
  287. value |= 0x100000000ull;
  288. }
  289. /* Marked instruction events need sample_enable set */
  290. if (p4_marked_instr_event(event)) {
  291. mask |= 1ull << 56;
  292. value |= 1ull << 56;
  293. }
  294. /* PMCSEL=6 decode events on byte 2 need sample_enable clear */
  295. if (pmc && (event & PM_PMCSEL_MSK) == 6 && byte == 2)
  296. mask |= 1ull << 56;
  297. *maskp = mask;
  298. *valp = value;
  299. return 0;
  300. }
  301. static unsigned int ppc_inst_cmpl[] = {
  302. 0x1001, 0x4001, 0x6001, 0x7001, 0x8001
  303. };
  304. static int p4_get_alternatives(u64 event, unsigned int flags, u64 alt[])
  305. {
  306. int i, j, na;
  307. alt[0] = event;
  308. na = 1;
  309. /* 2 possibilities for PM_GRP_DISP_REJECT */
  310. if (event == 0x8003 || event == 0x0224) {
  311. alt[1] = event ^ (0x8003 ^ 0x0224);
  312. return 2;
  313. }
  314. /* 2 possibilities for PM_ST_MISS_L1 */
  315. if (event == 0x0c13 || event == 0x0c23) {
  316. alt[1] = event ^ (0x0c13 ^ 0x0c23);
  317. return 2;
  318. }
  319. /* several possibilities for PM_INST_CMPL */
  320. for (i = 0; i < ARRAY_SIZE(ppc_inst_cmpl); ++i) {
  321. if (event == ppc_inst_cmpl[i]) {
  322. for (j = 0; j < ARRAY_SIZE(ppc_inst_cmpl); ++j)
  323. if (j != i)
  324. alt[na++] = ppc_inst_cmpl[j];
  325. break;
  326. }
  327. }
  328. return na;
  329. }
  330. static int p4_compute_mmcr(u64 event[], int n_ev,
  331. unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[])
  332. {
  333. unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0;
  334. unsigned int pmc, unit, byte, psel, lower;
  335. unsigned int ttm, grp;
  336. unsigned int pmc_inuse = 0;
  337. unsigned int pmc_grp_use[2];
  338. unsigned char busbyte[4];
  339. unsigned char unituse[16];
  340. unsigned int unitlower = 0;
  341. int i;
  342. if (n_ev > 8)
  343. return -1;
  344. /* First pass to count resource use */
  345. pmc_grp_use[0] = pmc_grp_use[1] = 0;
  346. memset(busbyte, 0, sizeof(busbyte));
  347. memset(unituse, 0, sizeof(unituse));
  348. for (i = 0; i < n_ev; ++i) {
  349. pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
  350. if (pmc) {
  351. if (pmc_inuse & (1 << (pmc - 1)))
  352. return -1;
  353. pmc_inuse |= 1 << (pmc - 1);
  354. /* count 1/2/5/6 vs 3/4/7/8 use */
  355. ++pmc_grp_use[((pmc - 1) >> 1) & 1];
  356. }
  357. unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
  358. byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
  359. lower = (event[i] >> PM_LOWER_SH) & PM_LOWER_MSK;
  360. if (unit) {
  361. if (!pmc)
  362. ++pmc_grp_use[byte & 1];
  363. if (unit == 6 || unit == 8)
  364. /* map alt ISU1/IFU codes: 6->2, 8->3 */
  365. unit = (unit >> 1) - 1;
  366. if (busbyte[byte] && busbyte[byte] != unit)
  367. return -1;
  368. busbyte[byte] = unit;
  369. lower <<= unit;
  370. if (unituse[unit] && lower != (unitlower & lower))
  371. return -1;
  372. unituse[unit] = 1;
  373. unitlower |= lower;
  374. }
  375. }
  376. if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4)
  377. return -1;
  378. /*
  379. * Assign resources and set multiplexer selects.
  380. *
  381. * Units 1,2,3 are on TTM0, 4,6,7 on TTM1, 8,10 on TTM2.
  382. * Each TTMx can only select one unit, but since
  383. * units 2 and 6 are both ISU1, and 3 and 8 are both IFU,
  384. * we have some choices.
  385. */
  386. if (unituse[2] & (unituse[1] | (unituse[3] & unituse[9]))) {
  387. unituse[6] = 1; /* Move 2 to 6 */
  388. unituse[2] = 0;
  389. }
  390. if (unituse[3] & (unituse[1] | unituse[2])) {
  391. unituse[8] = 1; /* Move 3 to 8 */
  392. unituse[3] = 0;
  393. unitlower = (unitlower & ~8) | ((unitlower & 8) << 5);
  394. }
  395. /* Check only one unit per TTMx */
  396. if (unituse[1] + unituse[2] + unituse[3] > 1 ||
  397. unituse[4] + unituse[6] + unituse[7] > 1 ||
  398. unituse[8] + unituse[9] > 1 ||
  399. (unituse[5] | unituse[10] | unituse[11] |
  400. unituse[13] | unituse[14]))
  401. return -1;
  402. /* Set TTMxSEL fields. Note, units 1-3 => TTM0SEL codes 0-2 */
  403. mmcr1 |= (unsigned long)(unituse[3] * 2 + unituse[2])
  404. << MMCR1_TTM0SEL_SH;
  405. mmcr1 |= (unsigned long)(unituse[7] * 3 + unituse[6] * 2)
  406. << MMCR1_TTM1SEL_SH;
  407. mmcr1 |= (unsigned long)unituse[9] << MMCR1_TTM2SEL_SH;
  408. /* Set TTCxSEL fields. */
  409. if (unitlower & 0xe)
  410. mmcr1 |= 1ull << MMCR1_TTC0SEL_SH;
  411. if (unitlower & 0xf0)
  412. mmcr1 |= 1ull << MMCR1_TTC1SEL_SH;
  413. if (unitlower & 0xf00)
  414. mmcr1 |= 1ull << MMCR1_TTC2SEL_SH;
  415. if (unitlower & 0x7000)
  416. mmcr1 |= 1ull << MMCR1_TTC3SEL_SH;
  417. /* Set byte lane select fields. */
  418. for (byte = 0; byte < 4; ++byte) {
  419. unit = busbyte[byte];
  420. if (!unit)
  421. continue;
  422. if (unit == 0xf) {
  423. /* special case for GPS */
  424. mmcr1 |= 1ull << (MMCR1_DEBUG0SEL_SH - byte);
  425. } else {
  426. if (!unituse[unit])
  427. ttm = unit - 1; /* 2->1, 3->2 */
  428. else
  429. ttm = unit >> 2;
  430. mmcr1 |= (unsigned long)ttm
  431. << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
  432. }
  433. }
  434. /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
  435. for (i = 0; i < n_ev; ++i) {
  436. pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
  437. unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
  438. byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
  439. psel = event[i] & PM_PMCSEL_MSK;
  440. if (!pmc) {
  441. /* Bus event or 00xxx direct event (off or cycles) */
  442. if (unit)
  443. psel |= 0x10 | ((byte & 2) << 2);
  444. for (pmc = 0; pmc < 8; ++pmc) {
  445. if (pmc_inuse & (1 << pmc))
  446. continue;
  447. grp = (pmc >> 1) & 1;
  448. if (unit) {
  449. if (grp == (byte & 1))
  450. break;
  451. } else if (pmc_grp_use[grp] < 4) {
  452. ++pmc_grp_use[grp];
  453. break;
  454. }
  455. }
  456. pmc_inuse |= 1 << pmc;
  457. } else {
  458. /* Direct event */
  459. --pmc;
  460. if (psel == 0 && (byte & 2))
  461. /* add events on higher-numbered bus */
  462. mmcr1 |= 1ull << mmcr1_adder_bits[pmc];
  463. else if (psel == 6 && byte == 3)
  464. /* seem to need to set sample_enable here */
  465. mmcra |= MMCRA_SAMPLE_ENABLE;
  466. psel |= 8;
  467. }
  468. if (pmc <= 1)
  469. mmcr0 |= psel << (MMCR0_PMC1SEL_SH - 7 * pmc);
  470. else
  471. mmcr1 |= psel << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
  472. if (pmc == 7) /* PMC8 */
  473. mmcra |= (psel & 1) << MMCRA_PMC8SEL0_SH;
  474. hwc[i] = pmc;
  475. if (p4_marked_instr_event(event[i]))
  476. mmcra |= MMCRA_SAMPLE_ENABLE;
  477. }
  478. if (pmc_inuse & 1)
  479. mmcr0 |= MMCR0_PMC1CE;
  480. if (pmc_inuse & 0xfe)
  481. mmcr0 |= MMCR0_PMCjCE;
  482. mmcra |= 0x2000; /* mark only one IOP per PPC instruction */
  483. /* Return MMCRx values */
  484. mmcr[0] = mmcr0;
  485. mmcr[1] = mmcr1;
  486. mmcr[2] = mmcra;
  487. return 0;
  488. }
  489. static void p4_disable_pmc(unsigned int pmc, unsigned long mmcr[])
  490. {
  491. /*
  492. * Setting the PMCxSEL field to 0 disables PMC x.
  493. * (Note that pmc is 0-based here, not 1-based.)
  494. */
  495. if (pmc <= 1) {
  496. mmcr[0] &= ~(0x1fUL << (MMCR0_PMC1SEL_SH - 7 * pmc));
  497. } else {
  498. mmcr[1] &= ~(0x1fUL << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)));
  499. if (pmc == 7)
  500. mmcr[2] &= ~(1UL << MMCRA_PMC8SEL0_SH);
  501. }
  502. }
  503. static int p4_generic_events[] = {
  504. [PERF_COUNT_HW_CPU_CYCLES] = 7,
  505. [PERF_COUNT_HW_INSTRUCTIONS] = 0x1001,
  506. [PERF_COUNT_HW_CACHE_REFERENCES] = 0x8c10, /* PM_LD_REF_L1 */
  507. [PERF_COUNT_HW_CACHE_MISSES] = 0x3c10, /* PM_LD_MISS_L1 */
  508. [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x330, /* PM_BR_ISSUED */
  509. [PERF_COUNT_HW_BRANCH_MISSES] = 0x331, /* PM_BR_MPRED_CR */
  510. };
  511. #define C(x) PERF_COUNT_HW_CACHE_##x
  512. /*
  513. * Table of generalized cache-related events.
  514. * 0 means not supported, -1 means nonsensical, other values
  515. * are event codes.
  516. */
  517. static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
  518. [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
  519. [C(OP_READ)] = { 0x8c10, 0x3c10 },
  520. [C(OP_WRITE)] = { 0x7c10, 0xc13 },
  521. [C(OP_PREFETCH)] = { 0xc35, 0 },
  522. },
  523. [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
  524. [C(OP_READ)] = { 0, 0 },
  525. [C(OP_WRITE)] = { -1, -1 },
  526. [C(OP_PREFETCH)] = { 0, 0 },
  527. },
  528. [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
  529. [C(OP_READ)] = { 0, 0 },
  530. [C(OP_WRITE)] = { 0, 0 },
  531. [C(OP_PREFETCH)] = { 0xc34, 0 },
  532. },
  533. [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
  534. [C(OP_READ)] = { 0, 0x904 },
  535. [C(OP_WRITE)] = { -1, -1 },
  536. [C(OP_PREFETCH)] = { -1, -1 },
  537. },
  538. [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
  539. [C(OP_READ)] = { 0, 0x900 },
  540. [C(OP_WRITE)] = { -1, -1 },
  541. [C(OP_PREFETCH)] = { -1, -1 },
  542. },
  543. [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
  544. [C(OP_READ)] = { 0x330, 0x331 },
  545. [C(OP_WRITE)] = { -1, -1 },
  546. [C(OP_PREFETCH)] = { -1, -1 },
  547. },
  548. [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */
  549. [C(OP_READ)] = { -1, -1 },
  550. [C(OP_WRITE)] = { -1, -1 },
  551. [C(OP_PREFETCH)] = { -1, -1 },
  552. },
  553. };
  554. static struct power_pmu power4_pmu = {
  555. .name = "POWER4/4+",
  556. .n_counter = 8,
  557. .max_alternatives = 5,
  558. .add_fields = 0x0000001100005555ul,
  559. .test_adder = 0x0011083300000000ul,
  560. .compute_mmcr = p4_compute_mmcr,
  561. .get_constraint = p4_get_constraint,
  562. .get_alternatives = p4_get_alternatives,
  563. .disable_pmc = p4_disable_pmc,
  564. .n_generic = ARRAY_SIZE(p4_generic_events),
  565. .generic_events = p4_generic_events,
  566. .cache_events = &power4_cache_events,
  567. .flags = PPMU_NO_SIPR | PPMU_NO_CONT_SAMPLING,
  568. };
  569. static int __init init_power4_pmu(void)
  570. {
  571. if (!cur_cpu_spec->oprofile_cpu_type ||
  572. strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power4"))
  573. return -ENODEV;
  574. return register_power_pmu(&power4_pmu);
  575. }
  576. early_initcall(init_power4_pmu);