pmu.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575
  1. // SPDX-License-Identifier: GPL-2.0
  2. #include <linux/types.h>
  3. #include <linux/interrupt.h>
  4. #include <asm/xen/hypercall.h>
  5. #include <xen/page.h>
  6. #include <xen/interface/xen.h>
  7. #include <xen/interface/vcpu.h>
  8. #include <xen/interface/xenpmu.h>
  9. #include "xen-ops.h"
  10. #include "pmu.h"
  11. /* x86_pmu.handle_irq definition */
  12. #include "../events/perf_event.h"
  13. #define XENPMU_IRQ_PROCESSING 1
  14. struct xenpmu {
  15. /* Shared page between hypervisor and domain */
  16. struct xen_pmu_data *xenpmu_data;
  17. uint8_t flags;
  18. };
  19. static DEFINE_PER_CPU(struct xenpmu, xenpmu_shared);
  20. #define get_xenpmu_data() (this_cpu_ptr(&xenpmu_shared)->xenpmu_data)
  21. #define get_xenpmu_flags() (this_cpu_ptr(&xenpmu_shared)->flags)
  22. /* Macro for computing address of a PMU MSR bank */
  23. #define field_offset(ctxt, field) ((void *)((uintptr_t)ctxt + \
  24. (uintptr_t)ctxt->field))
  25. /* AMD PMU */
  26. #define F15H_NUM_COUNTERS 6
  27. #define F10H_NUM_COUNTERS 4
  28. static __read_mostly uint32_t amd_counters_base;
  29. static __read_mostly uint32_t amd_ctrls_base;
  30. static __read_mostly int amd_msr_step;
  31. static __read_mostly int k7_counters_mirrored;
  32. static __read_mostly int amd_num_counters;
  33. /* Intel PMU */
  34. #define MSR_TYPE_COUNTER 0
  35. #define MSR_TYPE_CTRL 1
  36. #define MSR_TYPE_GLOBAL 2
  37. #define MSR_TYPE_ARCH_COUNTER 3
  38. #define MSR_TYPE_ARCH_CTRL 4
  39. /* Number of general pmu registers (CPUID.EAX[0xa].EAX[8..15]) */
  40. #define PMU_GENERAL_NR_SHIFT 8
  41. #define PMU_GENERAL_NR_BITS 8
  42. #define PMU_GENERAL_NR_MASK (((1 << PMU_GENERAL_NR_BITS) - 1) \
  43. << PMU_GENERAL_NR_SHIFT)
  44. /* Number of fixed pmu registers (CPUID.EDX[0xa].EDX[0..4]) */
  45. #define PMU_FIXED_NR_SHIFT 0
  46. #define PMU_FIXED_NR_BITS 5
  47. #define PMU_FIXED_NR_MASK (((1 << PMU_FIXED_NR_BITS) - 1) \
  48. << PMU_FIXED_NR_SHIFT)
  49. /* Alias registers (0x4c1) for full-width writes to PMCs */
  50. #define MSR_PMC_ALIAS_MASK (~(MSR_IA32_PERFCTR0 ^ MSR_IA32_PMC0))
  51. #define INTEL_PMC_TYPE_SHIFT 30
  52. static __read_mostly int intel_num_arch_counters, intel_num_fixed_counters;
  53. static void xen_pmu_arch_init(void)
  54. {
  55. if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
  56. switch (boot_cpu_data.x86) {
  57. case 0x15:
  58. amd_num_counters = F15H_NUM_COUNTERS;
  59. amd_counters_base = MSR_F15H_PERF_CTR;
  60. amd_ctrls_base = MSR_F15H_PERF_CTL;
  61. amd_msr_step = 2;
  62. k7_counters_mirrored = 1;
  63. break;
  64. case 0x10:
  65. case 0x12:
  66. case 0x14:
  67. case 0x16:
  68. default:
  69. amd_num_counters = F10H_NUM_COUNTERS;
  70. amd_counters_base = MSR_K7_PERFCTR0;
  71. amd_ctrls_base = MSR_K7_EVNTSEL0;
  72. amd_msr_step = 1;
  73. k7_counters_mirrored = 0;
  74. break;
  75. }
  76. } else {
  77. uint32_t eax, ebx, ecx, edx;
  78. cpuid(0xa, &eax, &ebx, &ecx, &edx);
  79. intel_num_arch_counters = (eax & PMU_GENERAL_NR_MASK) >>
  80. PMU_GENERAL_NR_SHIFT;
  81. intel_num_fixed_counters = (edx & PMU_FIXED_NR_MASK) >>
  82. PMU_FIXED_NR_SHIFT;
  83. }
  84. }
  85. static inline uint32_t get_fam15h_addr(u32 addr)
  86. {
  87. switch (addr) {
  88. case MSR_K7_PERFCTR0:
  89. case MSR_K7_PERFCTR1:
  90. case MSR_K7_PERFCTR2:
  91. case MSR_K7_PERFCTR3:
  92. return MSR_F15H_PERF_CTR + (addr - MSR_K7_PERFCTR0);
  93. case MSR_K7_EVNTSEL0:
  94. case MSR_K7_EVNTSEL1:
  95. case MSR_K7_EVNTSEL2:
  96. case MSR_K7_EVNTSEL3:
  97. return MSR_F15H_PERF_CTL + (addr - MSR_K7_EVNTSEL0);
  98. default:
  99. break;
  100. }
  101. return addr;
  102. }
  103. static inline bool is_amd_pmu_msr(unsigned int msr)
  104. {
  105. if ((msr >= MSR_F15H_PERF_CTL &&
  106. msr < MSR_F15H_PERF_CTR + (amd_num_counters * 2)) ||
  107. (msr >= MSR_K7_EVNTSEL0 &&
  108. msr < MSR_K7_PERFCTR0 + amd_num_counters))
  109. return true;
  110. return false;
  111. }
  112. static int is_intel_pmu_msr(u32 msr_index, int *type, int *index)
  113. {
  114. u32 msr_index_pmc;
  115. switch (msr_index) {
  116. case MSR_CORE_PERF_FIXED_CTR_CTRL:
  117. case MSR_IA32_DS_AREA:
  118. case MSR_IA32_PEBS_ENABLE:
  119. *type = MSR_TYPE_CTRL;
  120. return true;
  121. case MSR_CORE_PERF_GLOBAL_CTRL:
  122. case MSR_CORE_PERF_GLOBAL_STATUS:
  123. case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
  124. *type = MSR_TYPE_GLOBAL;
  125. return true;
  126. default:
  127. if ((msr_index >= MSR_CORE_PERF_FIXED_CTR0) &&
  128. (msr_index < MSR_CORE_PERF_FIXED_CTR0 +
  129. intel_num_fixed_counters)) {
  130. *index = msr_index - MSR_CORE_PERF_FIXED_CTR0;
  131. *type = MSR_TYPE_COUNTER;
  132. return true;
  133. }
  134. if ((msr_index >= MSR_P6_EVNTSEL0) &&
  135. (msr_index < MSR_P6_EVNTSEL0 + intel_num_arch_counters)) {
  136. *index = msr_index - MSR_P6_EVNTSEL0;
  137. *type = MSR_TYPE_ARCH_CTRL;
  138. return true;
  139. }
  140. msr_index_pmc = msr_index & MSR_PMC_ALIAS_MASK;
  141. if ((msr_index_pmc >= MSR_IA32_PERFCTR0) &&
  142. (msr_index_pmc < MSR_IA32_PERFCTR0 +
  143. intel_num_arch_counters)) {
  144. *type = MSR_TYPE_ARCH_COUNTER;
  145. *index = msr_index_pmc - MSR_IA32_PERFCTR0;
  146. return true;
  147. }
  148. return false;
  149. }
  150. }
  151. static bool xen_intel_pmu_emulate(unsigned int msr, u64 *val, int type,
  152. int index, bool is_read)
  153. {
  154. uint64_t *reg = NULL;
  155. struct xen_pmu_intel_ctxt *ctxt;
  156. uint64_t *fix_counters;
  157. struct xen_pmu_cntr_pair *arch_cntr_pair;
  158. struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
  159. uint8_t xenpmu_flags = get_xenpmu_flags();
  160. if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING))
  161. return false;
  162. ctxt = &xenpmu_data->pmu.c.intel;
  163. switch (msr) {
  164. case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
  165. reg = &ctxt->global_ovf_ctrl;
  166. break;
  167. case MSR_CORE_PERF_GLOBAL_STATUS:
  168. reg = &ctxt->global_status;
  169. break;
  170. case MSR_CORE_PERF_GLOBAL_CTRL:
  171. reg = &ctxt->global_ctrl;
  172. break;
  173. case MSR_CORE_PERF_FIXED_CTR_CTRL:
  174. reg = &ctxt->fixed_ctrl;
  175. break;
  176. default:
  177. switch (type) {
  178. case MSR_TYPE_COUNTER:
  179. fix_counters = field_offset(ctxt, fixed_counters);
  180. reg = &fix_counters[index];
  181. break;
  182. case MSR_TYPE_ARCH_COUNTER:
  183. arch_cntr_pair = field_offset(ctxt, arch_counters);
  184. reg = &arch_cntr_pair[index].counter;
  185. break;
  186. case MSR_TYPE_ARCH_CTRL:
  187. arch_cntr_pair = field_offset(ctxt, arch_counters);
  188. reg = &arch_cntr_pair[index].control;
  189. break;
  190. default:
  191. return false;
  192. }
  193. }
  194. if (reg) {
  195. if (is_read)
  196. *val = *reg;
  197. else {
  198. *reg = *val;
  199. if (msr == MSR_CORE_PERF_GLOBAL_OVF_CTRL)
  200. ctxt->global_status &= (~(*val));
  201. }
  202. return true;
  203. }
  204. return false;
  205. }
  206. static bool xen_amd_pmu_emulate(unsigned int msr, u64 *val, bool is_read)
  207. {
  208. uint64_t *reg = NULL;
  209. int i, off = 0;
  210. struct xen_pmu_amd_ctxt *ctxt;
  211. uint64_t *counter_regs, *ctrl_regs;
  212. struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
  213. uint8_t xenpmu_flags = get_xenpmu_flags();
  214. if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING))
  215. return false;
  216. if (k7_counters_mirrored &&
  217. ((msr >= MSR_K7_EVNTSEL0) && (msr <= MSR_K7_PERFCTR3)))
  218. msr = get_fam15h_addr(msr);
  219. ctxt = &xenpmu_data->pmu.c.amd;
  220. for (i = 0; i < amd_num_counters; i++) {
  221. if (msr == amd_ctrls_base + off) {
  222. ctrl_regs = field_offset(ctxt, ctrls);
  223. reg = &ctrl_regs[i];
  224. break;
  225. } else if (msr == amd_counters_base + off) {
  226. counter_regs = field_offset(ctxt, counters);
  227. reg = &counter_regs[i];
  228. break;
  229. }
  230. off += amd_msr_step;
  231. }
  232. if (reg) {
  233. if (is_read)
  234. *val = *reg;
  235. else
  236. *reg = *val;
  237. return true;
  238. }
  239. return false;
  240. }
  241. bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err)
  242. {
  243. if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
  244. if (is_amd_pmu_msr(msr)) {
  245. if (!xen_amd_pmu_emulate(msr, val, 1))
  246. *val = native_read_msr_safe(msr, err);
  247. return true;
  248. }
  249. } else {
  250. int type, index;
  251. if (is_intel_pmu_msr(msr, &type, &index)) {
  252. if (!xen_intel_pmu_emulate(msr, val, type, index, 1))
  253. *val = native_read_msr_safe(msr, err);
  254. return true;
  255. }
  256. }
  257. return false;
  258. }
  259. bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err)
  260. {
  261. uint64_t val = ((uint64_t)high << 32) | low;
  262. if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
  263. if (is_amd_pmu_msr(msr)) {
  264. if (!xen_amd_pmu_emulate(msr, &val, 0))
  265. *err = native_write_msr_safe(msr, low, high);
  266. return true;
  267. }
  268. } else {
  269. int type, index;
  270. if (is_intel_pmu_msr(msr, &type, &index)) {
  271. if (!xen_intel_pmu_emulate(msr, &val, type, index, 0))
  272. *err = native_write_msr_safe(msr, low, high);
  273. return true;
  274. }
  275. }
  276. return false;
  277. }
  278. static unsigned long long xen_amd_read_pmc(int counter)
  279. {
  280. struct xen_pmu_amd_ctxt *ctxt;
  281. uint64_t *counter_regs;
  282. struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
  283. uint8_t xenpmu_flags = get_xenpmu_flags();
  284. if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) {
  285. uint32_t msr;
  286. int err;
  287. msr = amd_counters_base + (counter * amd_msr_step);
  288. return native_read_msr_safe(msr, &err);
  289. }
  290. ctxt = &xenpmu_data->pmu.c.amd;
  291. counter_regs = field_offset(ctxt, counters);
  292. return counter_regs[counter];
  293. }
  294. static unsigned long long xen_intel_read_pmc(int counter)
  295. {
  296. struct xen_pmu_intel_ctxt *ctxt;
  297. uint64_t *fixed_counters;
  298. struct xen_pmu_cntr_pair *arch_cntr_pair;
  299. struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
  300. uint8_t xenpmu_flags = get_xenpmu_flags();
  301. if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) {
  302. uint32_t msr;
  303. int err;
  304. if (counter & (1 << INTEL_PMC_TYPE_SHIFT))
  305. msr = MSR_CORE_PERF_FIXED_CTR0 + (counter & 0xffff);
  306. else
  307. msr = MSR_IA32_PERFCTR0 + counter;
  308. return native_read_msr_safe(msr, &err);
  309. }
  310. ctxt = &xenpmu_data->pmu.c.intel;
  311. if (counter & (1 << INTEL_PMC_TYPE_SHIFT)) {
  312. fixed_counters = field_offset(ctxt, fixed_counters);
  313. return fixed_counters[counter & 0xffff];
  314. }
  315. arch_cntr_pair = field_offset(ctxt, arch_counters);
  316. return arch_cntr_pair[counter].counter;
  317. }
  318. unsigned long long xen_read_pmc(int counter)
  319. {
  320. if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
  321. return xen_amd_read_pmc(counter);
  322. else
  323. return xen_intel_read_pmc(counter);
  324. }
  325. int pmu_apic_update(uint32_t val)
  326. {
  327. int ret;
  328. struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
  329. if (!xenpmu_data) {
  330. pr_warn_once("%s: pmudata not initialized\n", __func__);
  331. return -EINVAL;
  332. }
  333. xenpmu_data->pmu.l.lapic_lvtpc = val;
  334. if (get_xenpmu_flags() & XENPMU_IRQ_PROCESSING)
  335. return 0;
  336. ret = HYPERVISOR_xenpmu_op(XENPMU_lvtpc_set, NULL);
  337. return ret;
  338. }
  339. /* perf callbacks */
  340. static int xen_is_in_guest(void)
  341. {
  342. const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
  343. if (!xenpmu_data) {
  344. pr_warn_once("%s: pmudata not initialized\n", __func__);
  345. return 0;
  346. }
  347. if (!xen_initial_domain() || (xenpmu_data->domain_id >= DOMID_SELF))
  348. return 0;
  349. return 1;
  350. }
  351. static int xen_is_user_mode(void)
  352. {
  353. const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
  354. if (!xenpmu_data) {
  355. pr_warn_once("%s: pmudata not initialized\n", __func__);
  356. return 0;
  357. }
  358. if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_PV)
  359. return (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_USER);
  360. else
  361. return !!(xenpmu_data->pmu.r.regs.cpl & 3);
  362. }
  363. static unsigned long xen_get_guest_ip(void)
  364. {
  365. const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
  366. if (!xenpmu_data) {
  367. pr_warn_once("%s: pmudata not initialized\n", __func__);
  368. return 0;
  369. }
  370. return xenpmu_data->pmu.r.regs.ip;
  371. }
  372. static struct perf_guest_info_callbacks xen_guest_cbs = {
  373. .is_in_guest = xen_is_in_guest,
  374. .is_user_mode = xen_is_user_mode,
  375. .get_guest_ip = xen_get_guest_ip,
  376. };
  377. /* Convert registers from Xen's format to Linux' */
  378. static void xen_convert_regs(const struct xen_pmu_regs *xen_regs,
  379. struct pt_regs *regs, uint64_t pmu_flags)
  380. {
  381. regs->ip = xen_regs->ip;
  382. regs->cs = xen_regs->cs;
  383. regs->sp = xen_regs->sp;
  384. if (pmu_flags & PMU_SAMPLE_PV) {
  385. if (pmu_flags & PMU_SAMPLE_USER)
  386. regs->cs |= 3;
  387. else
  388. regs->cs &= ~3;
  389. } else {
  390. if (xen_regs->cpl)
  391. regs->cs |= 3;
  392. else
  393. regs->cs &= ~3;
  394. }
  395. }
  396. irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id)
  397. {
  398. int err, ret = IRQ_NONE;
  399. struct pt_regs regs = {0};
  400. const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
  401. uint8_t xenpmu_flags = get_xenpmu_flags();
  402. if (!xenpmu_data) {
  403. pr_warn_once("%s: pmudata not initialized\n", __func__);
  404. return ret;
  405. }
  406. this_cpu_ptr(&xenpmu_shared)->flags =
  407. xenpmu_flags | XENPMU_IRQ_PROCESSING;
  408. xen_convert_regs(&xenpmu_data->pmu.r.regs, &regs,
  409. xenpmu_data->pmu.pmu_flags);
  410. if (x86_pmu.handle_irq(&regs))
  411. ret = IRQ_HANDLED;
  412. /* Write out cached context to HW */
  413. err = HYPERVISOR_xenpmu_op(XENPMU_flush, NULL);
  414. this_cpu_ptr(&xenpmu_shared)->flags = xenpmu_flags;
  415. if (err) {
  416. pr_warn_once("%s: failed hypercall, err: %d\n", __func__, err);
  417. return IRQ_NONE;
  418. }
  419. return ret;
  420. }
  421. bool is_xen_pmu(int cpu)
  422. {
  423. return (get_xenpmu_data() != NULL);
  424. }
  425. void xen_pmu_init(int cpu)
  426. {
  427. int err;
  428. struct xen_pmu_params xp;
  429. unsigned long pfn;
  430. struct xen_pmu_data *xenpmu_data;
  431. BUILD_BUG_ON(sizeof(struct xen_pmu_data) > PAGE_SIZE);
  432. if (xen_hvm_domain())
  433. return;
  434. xenpmu_data = (struct xen_pmu_data *)get_zeroed_page(GFP_KERNEL);
  435. if (!xenpmu_data) {
  436. pr_err("VPMU init: No memory\n");
  437. return;
  438. }
  439. pfn = virt_to_pfn(xenpmu_data);
  440. xp.val = pfn_to_mfn(pfn);
  441. xp.vcpu = cpu;
  442. xp.version.maj = XENPMU_VER_MAJ;
  443. xp.version.min = XENPMU_VER_MIN;
  444. err = HYPERVISOR_xenpmu_op(XENPMU_init, &xp);
  445. if (err)
  446. goto fail;
  447. per_cpu(xenpmu_shared, cpu).xenpmu_data = xenpmu_data;
  448. per_cpu(xenpmu_shared, cpu).flags = 0;
  449. if (cpu == 0) {
  450. perf_register_guest_info_callbacks(&xen_guest_cbs);
  451. xen_pmu_arch_init();
  452. }
  453. return;
  454. fail:
  455. if (err == -EOPNOTSUPP || err == -ENOSYS)
  456. pr_info_once("VPMU disabled by hypervisor.\n");
  457. else
  458. pr_info_once("Could not initialize VPMU for cpu %d, error %d\n",
  459. cpu, err);
  460. free_pages((unsigned long)xenpmu_data, 0);
  461. }
  462. void xen_pmu_finish(int cpu)
  463. {
  464. struct xen_pmu_params xp;
  465. if (xen_hvm_domain())
  466. return;
  467. xp.vcpu = cpu;
  468. xp.version.maj = XENPMU_VER_MAJ;
  469. xp.version.min = XENPMU_VER_MIN;
  470. (void)HYPERVISOR_xenpmu_op(XENPMU_finish, &xp);
  471. free_pages((unsigned long)per_cpu(xenpmu_shared, cpu).xenpmu_data, 0);
  472. per_cpu(xenpmu_shared, cpu).xenpmu_data = NULL;
  473. }