pmu.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574
  1. #include <linux/types.h>
  2. #include <linux/interrupt.h>
  3. #include <asm/xen/hypercall.h>
  4. #include <xen/page.h>
  5. #include <xen/interface/xen.h>
  6. #include <xen/interface/vcpu.h>
  7. #include <xen/interface/xenpmu.h>
  8. #include "xen-ops.h"
  9. #include "pmu.h"
  10. /* x86_pmu.handle_irq definition */
  11. #include "../events/perf_event.h"
  12. #define XENPMU_IRQ_PROCESSING 1
  13. struct xenpmu {
  14. /* Shared page between hypervisor and domain */
  15. struct xen_pmu_data *xenpmu_data;
  16. uint8_t flags;
  17. };
  18. static DEFINE_PER_CPU(struct xenpmu, xenpmu_shared);
  19. #define get_xenpmu_data() (this_cpu_ptr(&xenpmu_shared)->xenpmu_data)
  20. #define get_xenpmu_flags() (this_cpu_ptr(&xenpmu_shared)->flags)
  21. /* Macro for computing address of a PMU MSR bank */
  22. #define field_offset(ctxt, field) ((void *)((uintptr_t)ctxt + \
  23. (uintptr_t)ctxt->field))
  24. /* AMD PMU */
  25. #define F15H_NUM_COUNTERS 6
  26. #define F10H_NUM_COUNTERS 4
  27. static __read_mostly uint32_t amd_counters_base;
  28. static __read_mostly uint32_t amd_ctrls_base;
  29. static __read_mostly int amd_msr_step;
  30. static __read_mostly int k7_counters_mirrored;
  31. static __read_mostly int amd_num_counters;
  32. /* Intel PMU */
  33. #define MSR_TYPE_COUNTER 0
  34. #define MSR_TYPE_CTRL 1
  35. #define MSR_TYPE_GLOBAL 2
  36. #define MSR_TYPE_ARCH_COUNTER 3
  37. #define MSR_TYPE_ARCH_CTRL 4
  38. /* Number of general pmu registers (CPUID.EAX[0xa].EAX[8..15]) */
  39. #define PMU_GENERAL_NR_SHIFT 8
  40. #define PMU_GENERAL_NR_BITS 8
  41. #define PMU_GENERAL_NR_MASK (((1 << PMU_GENERAL_NR_BITS) - 1) \
  42. << PMU_GENERAL_NR_SHIFT)
  43. /* Number of fixed pmu registers (CPUID.EDX[0xa].EDX[0..4]) */
  44. #define PMU_FIXED_NR_SHIFT 0
  45. #define PMU_FIXED_NR_BITS 5
  46. #define PMU_FIXED_NR_MASK (((1 << PMU_FIXED_NR_BITS) - 1) \
  47. << PMU_FIXED_NR_SHIFT)
  48. /* Alias registers (0x4c1) for full-width writes to PMCs */
  49. #define MSR_PMC_ALIAS_MASK (~(MSR_IA32_PERFCTR0 ^ MSR_IA32_PMC0))
  50. #define INTEL_PMC_TYPE_SHIFT 30
  51. static __read_mostly int intel_num_arch_counters, intel_num_fixed_counters;
  52. static void xen_pmu_arch_init(void)
  53. {
  54. if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
  55. switch (boot_cpu_data.x86) {
  56. case 0x15:
  57. amd_num_counters = F15H_NUM_COUNTERS;
  58. amd_counters_base = MSR_F15H_PERF_CTR;
  59. amd_ctrls_base = MSR_F15H_PERF_CTL;
  60. amd_msr_step = 2;
  61. k7_counters_mirrored = 1;
  62. break;
  63. case 0x10:
  64. case 0x12:
  65. case 0x14:
  66. case 0x16:
  67. default:
  68. amd_num_counters = F10H_NUM_COUNTERS;
  69. amd_counters_base = MSR_K7_PERFCTR0;
  70. amd_ctrls_base = MSR_K7_EVNTSEL0;
  71. amd_msr_step = 1;
  72. k7_counters_mirrored = 0;
  73. break;
  74. }
  75. } else {
  76. uint32_t eax, ebx, ecx, edx;
  77. cpuid(0xa, &eax, &ebx, &ecx, &edx);
  78. intel_num_arch_counters = (eax & PMU_GENERAL_NR_MASK) >>
  79. PMU_GENERAL_NR_SHIFT;
  80. intel_num_fixed_counters = (edx & PMU_FIXED_NR_MASK) >>
  81. PMU_FIXED_NR_SHIFT;
  82. }
  83. }
  84. static inline uint32_t get_fam15h_addr(u32 addr)
  85. {
  86. switch (addr) {
  87. case MSR_K7_PERFCTR0:
  88. case MSR_K7_PERFCTR1:
  89. case MSR_K7_PERFCTR2:
  90. case MSR_K7_PERFCTR3:
  91. return MSR_F15H_PERF_CTR + (addr - MSR_K7_PERFCTR0);
  92. case MSR_K7_EVNTSEL0:
  93. case MSR_K7_EVNTSEL1:
  94. case MSR_K7_EVNTSEL2:
  95. case MSR_K7_EVNTSEL3:
  96. return MSR_F15H_PERF_CTL + (addr - MSR_K7_EVNTSEL0);
  97. default:
  98. break;
  99. }
  100. return addr;
  101. }
  102. static inline bool is_amd_pmu_msr(unsigned int msr)
  103. {
  104. if ((msr >= MSR_F15H_PERF_CTL &&
  105. msr < MSR_F15H_PERF_CTR + (amd_num_counters * 2)) ||
  106. (msr >= MSR_K7_EVNTSEL0 &&
  107. msr < MSR_K7_PERFCTR0 + amd_num_counters))
  108. return true;
  109. return false;
  110. }
  111. static int is_intel_pmu_msr(u32 msr_index, int *type, int *index)
  112. {
  113. u32 msr_index_pmc;
  114. switch (msr_index) {
  115. case MSR_CORE_PERF_FIXED_CTR_CTRL:
  116. case MSR_IA32_DS_AREA:
  117. case MSR_IA32_PEBS_ENABLE:
  118. *type = MSR_TYPE_CTRL;
  119. return true;
  120. case MSR_CORE_PERF_GLOBAL_CTRL:
  121. case MSR_CORE_PERF_GLOBAL_STATUS:
  122. case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
  123. *type = MSR_TYPE_GLOBAL;
  124. return true;
  125. default:
  126. if ((msr_index >= MSR_CORE_PERF_FIXED_CTR0) &&
  127. (msr_index < MSR_CORE_PERF_FIXED_CTR0 +
  128. intel_num_fixed_counters)) {
  129. *index = msr_index - MSR_CORE_PERF_FIXED_CTR0;
  130. *type = MSR_TYPE_COUNTER;
  131. return true;
  132. }
  133. if ((msr_index >= MSR_P6_EVNTSEL0) &&
  134. (msr_index < MSR_P6_EVNTSEL0 + intel_num_arch_counters)) {
  135. *index = msr_index - MSR_P6_EVNTSEL0;
  136. *type = MSR_TYPE_ARCH_CTRL;
  137. return true;
  138. }
  139. msr_index_pmc = msr_index & MSR_PMC_ALIAS_MASK;
  140. if ((msr_index_pmc >= MSR_IA32_PERFCTR0) &&
  141. (msr_index_pmc < MSR_IA32_PERFCTR0 +
  142. intel_num_arch_counters)) {
  143. *type = MSR_TYPE_ARCH_COUNTER;
  144. *index = msr_index_pmc - MSR_IA32_PERFCTR0;
  145. return true;
  146. }
  147. return false;
  148. }
  149. }
  150. static bool xen_intel_pmu_emulate(unsigned int msr, u64 *val, int type,
  151. int index, bool is_read)
  152. {
  153. uint64_t *reg = NULL;
  154. struct xen_pmu_intel_ctxt *ctxt;
  155. uint64_t *fix_counters;
  156. struct xen_pmu_cntr_pair *arch_cntr_pair;
  157. struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
  158. uint8_t xenpmu_flags = get_xenpmu_flags();
  159. if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING))
  160. return false;
  161. ctxt = &xenpmu_data->pmu.c.intel;
  162. switch (msr) {
  163. case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
  164. reg = &ctxt->global_ovf_ctrl;
  165. break;
  166. case MSR_CORE_PERF_GLOBAL_STATUS:
  167. reg = &ctxt->global_status;
  168. break;
  169. case MSR_CORE_PERF_GLOBAL_CTRL:
  170. reg = &ctxt->global_ctrl;
  171. break;
  172. case MSR_CORE_PERF_FIXED_CTR_CTRL:
  173. reg = &ctxt->fixed_ctrl;
  174. break;
  175. default:
  176. switch (type) {
  177. case MSR_TYPE_COUNTER:
  178. fix_counters = field_offset(ctxt, fixed_counters);
  179. reg = &fix_counters[index];
  180. break;
  181. case MSR_TYPE_ARCH_COUNTER:
  182. arch_cntr_pair = field_offset(ctxt, arch_counters);
  183. reg = &arch_cntr_pair[index].counter;
  184. break;
  185. case MSR_TYPE_ARCH_CTRL:
  186. arch_cntr_pair = field_offset(ctxt, arch_counters);
  187. reg = &arch_cntr_pair[index].control;
  188. break;
  189. default:
  190. return false;
  191. }
  192. }
  193. if (reg) {
  194. if (is_read)
  195. *val = *reg;
  196. else {
  197. *reg = *val;
  198. if (msr == MSR_CORE_PERF_GLOBAL_OVF_CTRL)
  199. ctxt->global_status &= (~(*val));
  200. }
  201. return true;
  202. }
  203. return false;
  204. }
  205. static bool xen_amd_pmu_emulate(unsigned int msr, u64 *val, bool is_read)
  206. {
  207. uint64_t *reg = NULL;
  208. int i, off = 0;
  209. struct xen_pmu_amd_ctxt *ctxt;
  210. uint64_t *counter_regs, *ctrl_regs;
  211. struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
  212. uint8_t xenpmu_flags = get_xenpmu_flags();
  213. if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING))
  214. return false;
  215. if (k7_counters_mirrored &&
  216. ((msr >= MSR_K7_EVNTSEL0) && (msr <= MSR_K7_PERFCTR3)))
  217. msr = get_fam15h_addr(msr);
  218. ctxt = &xenpmu_data->pmu.c.amd;
  219. for (i = 0; i < amd_num_counters; i++) {
  220. if (msr == amd_ctrls_base + off) {
  221. ctrl_regs = field_offset(ctxt, ctrls);
  222. reg = &ctrl_regs[i];
  223. break;
  224. } else if (msr == amd_counters_base + off) {
  225. counter_regs = field_offset(ctxt, counters);
  226. reg = &counter_regs[i];
  227. break;
  228. }
  229. off += amd_msr_step;
  230. }
  231. if (reg) {
  232. if (is_read)
  233. *val = *reg;
  234. else
  235. *reg = *val;
  236. return true;
  237. }
  238. return false;
  239. }
  240. bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err)
  241. {
  242. if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
  243. if (is_amd_pmu_msr(msr)) {
  244. if (!xen_amd_pmu_emulate(msr, val, 1))
  245. *val = native_read_msr_safe(msr, err);
  246. return true;
  247. }
  248. } else {
  249. int type, index;
  250. if (is_intel_pmu_msr(msr, &type, &index)) {
  251. if (!xen_intel_pmu_emulate(msr, val, type, index, 1))
  252. *val = native_read_msr_safe(msr, err);
  253. return true;
  254. }
  255. }
  256. return false;
  257. }
  258. bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err)
  259. {
  260. uint64_t val = ((uint64_t)high << 32) | low;
  261. if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
  262. if (is_amd_pmu_msr(msr)) {
  263. if (!xen_amd_pmu_emulate(msr, &val, 0))
  264. *err = native_write_msr_safe(msr, low, high);
  265. return true;
  266. }
  267. } else {
  268. int type, index;
  269. if (is_intel_pmu_msr(msr, &type, &index)) {
  270. if (!xen_intel_pmu_emulate(msr, &val, type, index, 0))
  271. *err = native_write_msr_safe(msr, low, high);
  272. return true;
  273. }
  274. }
  275. return false;
  276. }
  277. static unsigned long long xen_amd_read_pmc(int counter)
  278. {
  279. struct xen_pmu_amd_ctxt *ctxt;
  280. uint64_t *counter_regs;
  281. struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
  282. uint8_t xenpmu_flags = get_xenpmu_flags();
  283. if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) {
  284. uint32_t msr;
  285. int err;
  286. msr = amd_counters_base + (counter * amd_msr_step);
  287. return native_read_msr_safe(msr, &err);
  288. }
  289. ctxt = &xenpmu_data->pmu.c.amd;
  290. counter_regs = field_offset(ctxt, counters);
  291. return counter_regs[counter];
  292. }
  293. static unsigned long long xen_intel_read_pmc(int counter)
  294. {
  295. struct xen_pmu_intel_ctxt *ctxt;
  296. uint64_t *fixed_counters;
  297. struct xen_pmu_cntr_pair *arch_cntr_pair;
  298. struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
  299. uint8_t xenpmu_flags = get_xenpmu_flags();
  300. if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) {
  301. uint32_t msr;
  302. int err;
  303. if (counter & (1 << INTEL_PMC_TYPE_SHIFT))
  304. msr = MSR_CORE_PERF_FIXED_CTR0 + (counter & 0xffff);
  305. else
  306. msr = MSR_IA32_PERFCTR0 + counter;
  307. return native_read_msr_safe(msr, &err);
  308. }
  309. ctxt = &xenpmu_data->pmu.c.intel;
  310. if (counter & (1 << INTEL_PMC_TYPE_SHIFT)) {
  311. fixed_counters = field_offset(ctxt, fixed_counters);
  312. return fixed_counters[counter & 0xffff];
  313. }
  314. arch_cntr_pair = field_offset(ctxt, arch_counters);
  315. return arch_cntr_pair[counter].counter;
  316. }
  317. unsigned long long xen_read_pmc(int counter)
  318. {
  319. if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
  320. return xen_amd_read_pmc(counter);
  321. else
  322. return xen_intel_read_pmc(counter);
  323. }
  324. int pmu_apic_update(uint32_t val)
  325. {
  326. int ret;
  327. struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
  328. if (!xenpmu_data) {
  329. pr_warn_once("%s: pmudata not initialized\n", __func__);
  330. return -EINVAL;
  331. }
  332. xenpmu_data->pmu.l.lapic_lvtpc = val;
  333. if (get_xenpmu_flags() & XENPMU_IRQ_PROCESSING)
  334. return 0;
  335. ret = HYPERVISOR_xenpmu_op(XENPMU_lvtpc_set, NULL);
  336. return ret;
  337. }
  338. /* perf callbacks */
  339. static int xen_is_in_guest(void)
  340. {
  341. const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
  342. if (!xenpmu_data) {
  343. pr_warn_once("%s: pmudata not initialized\n", __func__);
  344. return 0;
  345. }
  346. if (!xen_initial_domain() || (xenpmu_data->domain_id >= DOMID_SELF))
  347. return 0;
  348. return 1;
  349. }
  350. static int xen_is_user_mode(void)
  351. {
  352. const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
  353. if (!xenpmu_data) {
  354. pr_warn_once("%s: pmudata not initialized\n", __func__);
  355. return 0;
  356. }
  357. if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_PV)
  358. return (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_USER);
  359. else
  360. return !!(xenpmu_data->pmu.r.regs.cpl & 3);
  361. }
  362. static unsigned long xen_get_guest_ip(void)
  363. {
  364. const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
  365. if (!xenpmu_data) {
  366. pr_warn_once("%s: pmudata not initialized\n", __func__);
  367. return 0;
  368. }
  369. return xenpmu_data->pmu.r.regs.ip;
  370. }
  371. static struct perf_guest_info_callbacks xen_guest_cbs = {
  372. .is_in_guest = xen_is_in_guest,
  373. .is_user_mode = xen_is_user_mode,
  374. .get_guest_ip = xen_get_guest_ip,
  375. };
  376. /* Convert registers from Xen's format to Linux' */
  377. static void xen_convert_regs(const struct xen_pmu_regs *xen_regs,
  378. struct pt_regs *regs, uint64_t pmu_flags)
  379. {
  380. regs->ip = xen_regs->ip;
  381. regs->cs = xen_regs->cs;
  382. regs->sp = xen_regs->sp;
  383. if (pmu_flags & PMU_SAMPLE_PV) {
  384. if (pmu_flags & PMU_SAMPLE_USER)
  385. regs->cs |= 3;
  386. else
  387. regs->cs &= ~3;
  388. } else {
  389. if (xen_regs->cpl)
  390. regs->cs |= 3;
  391. else
  392. regs->cs &= ~3;
  393. }
  394. }
  395. irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id)
  396. {
  397. int err, ret = IRQ_NONE;
  398. struct pt_regs regs;
  399. const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
  400. uint8_t xenpmu_flags = get_xenpmu_flags();
  401. if (!xenpmu_data) {
  402. pr_warn_once("%s: pmudata not initialized\n", __func__);
  403. return ret;
  404. }
  405. this_cpu_ptr(&xenpmu_shared)->flags =
  406. xenpmu_flags | XENPMU_IRQ_PROCESSING;
  407. xen_convert_regs(&xenpmu_data->pmu.r.regs, &regs,
  408. xenpmu_data->pmu.pmu_flags);
  409. if (x86_pmu.handle_irq(&regs))
  410. ret = IRQ_HANDLED;
  411. /* Write out cached context to HW */
  412. err = HYPERVISOR_xenpmu_op(XENPMU_flush, NULL);
  413. this_cpu_ptr(&xenpmu_shared)->flags = xenpmu_flags;
  414. if (err) {
  415. pr_warn_once("%s: failed hypercall, err: %d\n", __func__, err);
  416. return IRQ_NONE;
  417. }
  418. return ret;
  419. }
  420. bool is_xen_pmu(int cpu)
  421. {
  422. return (get_xenpmu_data() != NULL);
  423. }
  424. void xen_pmu_init(int cpu)
  425. {
  426. int err;
  427. struct xen_pmu_params xp;
  428. unsigned long pfn;
  429. struct xen_pmu_data *xenpmu_data;
  430. BUILD_BUG_ON(sizeof(struct xen_pmu_data) > PAGE_SIZE);
  431. if (xen_hvm_domain())
  432. return;
  433. xenpmu_data = (struct xen_pmu_data *)get_zeroed_page(GFP_KERNEL);
  434. if (!xenpmu_data) {
  435. pr_err("VPMU init: No memory\n");
  436. return;
  437. }
  438. pfn = virt_to_pfn(xenpmu_data);
  439. xp.val = pfn_to_mfn(pfn);
  440. xp.vcpu = cpu;
  441. xp.version.maj = XENPMU_VER_MAJ;
  442. xp.version.min = XENPMU_VER_MIN;
  443. err = HYPERVISOR_xenpmu_op(XENPMU_init, &xp);
  444. if (err)
  445. goto fail;
  446. per_cpu(xenpmu_shared, cpu).xenpmu_data = xenpmu_data;
  447. per_cpu(xenpmu_shared, cpu).flags = 0;
  448. if (cpu == 0) {
  449. perf_register_guest_info_callbacks(&xen_guest_cbs);
  450. xen_pmu_arch_init();
  451. }
  452. return;
  453. fail:
  454. if (err == -EOPNOTSUPP || err == -ENOSYS)
  455. pr_info_once("VPMU disabled by hypervisor.\n");
  456. else
  457. pr_info_once("Could not initialize VPMU for cpu %d, error %d\n",
  458. cpu, err);
  459. free_pages((unsigned long)xenpmu_data, 0);
  460. }
  461. void xen_pmu_finish(int cpu)
  462. {
  463. struct xen_pmu_params xp;
  464. if (xen_hvm_domain())
  465. return;
  466. xp.vcpu = cpu;
  467. xp.version.maj = XENPMU_VER_MAJ;
  468. xp.version.min = XENPMU_VER_MIN;
  469. (void)HYPERVISOR_xenpmu_op(XENPMU_finish, &xp);
  470. free_pages((unsigned long)per_cpu(xenpmu_shared, cpu).xenpmu_data, 0);
  471. per_cpu(xenpmu_shared, cpu).xenpmu_data = NULL;
  472. }