vsie.c 34 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204
  1. /*
  2. * kvm nested virtualization support for s390x
  3. *
  4. * Copyright IBM Corp. 2016
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License (version 2 only)
  8. * as published by the Free Software Foundation.
  9. *
  10. * Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
  11. */
  12. #include <linux/vmalloc.h>
  13. #include <linux/kvm_host.h>
  14. #include <linux/bug.h>
  15. #include <linux/list.h>
  16. #include <linux/bitmap.h>
  17. #include <linux/sched/signal.h>
  18. #include <asm/gmap.h>
  19. #include <asm/mmu_context.h>
  20. #include <asm/sclp.h>
  21. #include <asm/nmi.h>
  22. #include <asm/dis.h>
  23. #include "kvm-s390.h"
  24. #include "gaccess.h"
  25. struct vsie_page {
  26. struct kvm_s390_sie_block scb_s; /* 0x0000 */
  27. /*
  28. * the backup info for machine check. ensure it's at
  29. * the same offset as that in struct sie_page!
  30. */
  31. struct mcck_volatile_info mcck_info; /* 0x0200 */
  32. /*
  33. * The pinned original scb. Be aware that other VCPUs can modify
  34. * it while we read from it. Values that are used for conditions or
  35. * are reused conditionally, should be accessed via READ_ONCE.
  36. */
  37. struct kvm_s390_sie_block *scb_o; /* 0x0218 */
  38. /* the shadow gmap in use by the vsie_page */
  39. struct gmap *gmap; /* 0x0220 */
  40. /* address of the last reported fault to guest2 */
  41. unsigned long fault_addr; /* 0x0228 */
  42. __u8 reserved[0x0700 - 0x0230]; /* 0x0230 */
  43. struct kvm_s390_crypto_cb crycb; /* 0x0700 */
  44. __u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */
  45. };
  46. /* trigger a validity icpt for the given scb */
  47. static int set_validity_icpt(struct kvm_s390_sie_block *scb,
  48. __u16 reason_code)
  49. {
  50. scb->ipa = 0x1000;
  51. scb->ipb = ((__u32) reason_code) << 16;
  52. scb->icptcode = ICPT_VALIDITY;
  53. return 1;
  54. }
  55. /* mark the prefix as unmapped, this will block the VSIE */
  56. static void prefix_unmapped(struct vsie_page *vsie_page)
  57. {
  58. atomic_or(PROG_REQUEST, &vsie_page->scb_s.prog20);
  59. }
  60. /* mark the prefix as unmapped and wait until the VSIE has been left */
  61. static void prefix_unmapped_sync(struct vsie_page *vsie_page)
  62. {
  63. prefix_unmapped(vsie_page);
  64. if (vsie_page->scb_s.prog0c & PROG_IN_SIE)
  65. atomic_or(CPUSTAT_STOP_INT, &vsie_page->scb_s.cpuflags);
  66. while (vsie_page->scb_s.prog0c & PROG_IN_SIE)
  67. cpu_relax();
  68. }
  69. /* mark the prefix as mapped, this will allow the VSIE to run */
  70. static void prefix_mapped(struct vsie_page *vsie_page)
  71. {
  72. atomic_andnot(PROG_REQUEST, &vsie_page->scb_s.prog20);
  73. }
  74. /* test if the prefix is mapped into the gmap shadow */
  75. static int prefix_is_mapped(struct vsie_page *vsie_page)
  76. {
  77. return !(atomic_read(&vsie_page->scb_s.prog20) & PROG_REQUEST);
  78. }
  79. /* copy the updated intervention request bits into the shadow scb */
  80. static void update_intervention_requests(struct vsie_page *vsie_page)
  81. {
  82. const int bits = CPUSTAT_STOP_INT | CPUSTAT_IO_INT | CPUSTAT_EXT_INT;
  83. int cpuflags;
  84. cpuflags = atomic_read(&vsie_page->scb_o->cpuflags);
  85. atomic_andnot(bits, &vsie_page->scb_s.cpuflags);
  86. atomic_or(cpuflags & bits, &vsie_page->scb_s.cpuflags);
  87. }
  88. /* shadow (filter and validate) the cpuflags */
  89. static int prepare_cpuflags(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  90. {
  91. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  92. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  93. int newflags, cpuflags = atomic_read(&scb_o->cpuflags);
  94. /* we don't allow ESA/390 guests */
  95. if (!(cpuflags & CPUSTAT_ZARCH))
  96. return set_validity_icpt(scb_s, 0x0001U);
  97. if (cpuflags & (CPUSTAT_RRF | CPUSTAT_MCDS))
  98. return set_validity_icpt(scb_s, 0x0001U);
  99. else if (cpuflags & (CPUSTAT_SLSV | CPUSTAT_SLSR))
  100. return set_validity_icpt(scb_s, 0x0007U);
  101. /* intervention requests will be set later */
  102. newflags = CPUSTAT_ZARCH;
  103. if (cpuflags & CPUSTAT_GED && test_kvm_facility(vcpu->kvm, 8))
  104. newflags |= CPUSTAT_GED;
  105. if (cpuflags & CPUSTAT_GED2 && test_kvm_facility(vcpu->kvm, 78)) {
  106. if (cpuflags & CPUSTAT_GED)
  107. return set_validity_icpt(scb_s, 0x0001U);
  108. newflags |= CPUSTAT_GED2;
  109. }
  110. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_GPERE))
  111. newflags |= cpuflags & CPUSTAT_P;
  112. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_GSLS))
  113. newflags |= cpuflags & CPUSTAT_SM;
  114. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IBS))
  115. newflags |= cpuflags & CPUSTAT_IBS;
  116. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_KSS))
  117. newflags |= cpuflags & CPUSTAT_KSS;
  118. atomic_set(&scb_s->cpuflags, newflags);
  119. return 0;
  120. }
  121. /*
  122. * Create a shadow copy of the crycb block and setup key wrapping, if
  123. * requested for guest 3 and enabled for guest 2.
  124. *
  125. * We only accept format-1 (no AP in g2), but convert it into format-2
  126. * There is nothing to do for format-0.
  127. *
  128. * Returns: - 0 if shadowed or nothing to do
  129. * - > 0 if control has to be given to guest 2
  130. */
  131. static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  132. {
  133. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  134. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  135. const uint32_t crycbd_o = READ_ONCE(scb_o->crycbd);
  136. const u32 crycb_addr = crycbd_o & 0x7ffffff8U;
  137. unsigned long *b1, *b2;
  138. u8 ecb3_flags;
  139. scb_s->crycbd = 0;
  140. if (!(crycbd_o & vcpu->arch.sie_block->crycbd & CRYCB_FORMAT1))
  141. return 0;
  142. /* format-1 is supported with message-security-assist extension 3 */
  143. if (!test_kvm_facility(vcpu->kvm, 76))
  144. return 0;
  145. /* we may only allow it if enabled for guest 2 */
  146. ecb3_flags = scb_o->ecb3 & vcpu->arch.sie_block->ecb3 &
  147. (ECB3_AES | ECB3_DEA);
  148. if (!ecb3_flags)
  149. return 0;
  150. if ((crycb_addr & PAGE_MASK) != ((crycb_addr + 128) & PAGE_MASK))
  151. return set_validity_icpt(scb_s, 0x003CU);
  152. else if (!crycb_addr)
  153. return set_validity_icpt(scb_s, 0x0039U);
  154. /* copy only the wrapping keys */
  155. if (read_guest_real(vcpu, crycb_addr + 72,
  156. vsie_page->crycb.dea_wrapping_key_mask, 56))
  157. return set_validity_icpt(scb_s, 0x0035U);
  158. scb_s->ecb3 |= ecb3_flags;
  159. scb_s->crycbd = ((__u32)(__u64) &vsie_page->crycb) | CRYCB_FORMAT1 |
  160. CRYCB_FORMAT2;
  161. /* xor both blocks in one run */
  162. b1 = (unsigned long *) vsie_page->crycb.dea_wrapping_key_mask;
  163. b2 = (unsigned long *)
  164. vcpu->kvm->arch.crypto.crycb->dea_wrapping_key_mask;
  165. /* as 56%8 == 0, bitmap_xor won't overwrite any data */
  166. bitmap_xor(b1, b1, b2, BITS_PER_BYTE * 56);
  167. return 0;
  168. }
  169. /* shadow (round up/down) the ibc to avoid validity icpt */
  170. static void prepare_ibc(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  171. {
  172. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  173. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  174. /* READ_ONCE does not work on bitfields - use a temporary variable */
  175. const uint32_t __new_ibc = scb_o->ibc;
  176. const uint32_t new_ibc = READ_ONCE(__new_ibc) & 0x0fffU;
  177. __u64 min_ibc = (sclp.ibc >> 16) & 0x0fffU;
  178. scb_s->ibc = 0;
  179. /* ibc installed in g2 and requested for g3 */
  180. if (vcpu->kvm->arch.model.ibc && new_ibc) {
  181. scb_s->ibc = new_ibc;
  182. /* takte care of the minimum ibc level of the machine */
  183. if (scb_s->ibc < min_ibc)
  184. scb_s->ibc = min_ibc;
  185. /* take care of the maximum ibc level set for the guest */
  186. if (scb_s->ibc > vcpu->kvm->arch.model.ibc)
  187. scb_s->ibc = vcpu->kvm->arch.model.ibc;
  188. }
  189. }
  190. /* unshadow the scb, copying parameters back to the real scb */
  191. static void unshadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  192. {
  193. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  194. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  195. /* interception */
  196. scb_o->icptcode = scb_s->icptcode;
  197. scb_o->icptstatus = scb_s->icptstatus;
  198. scb_o->ipa = scb_s->ipa;
  199. scb_o->ipb = scb_s->ipb;
  200. scb_o->gbea = scb_s->gbea;
  201. /* timer */
  202. scb_o->cputm = scb_s->cputm;
  203. scb_o->ckc = scb_s->ckc;
  204. scb_o->todpr = scb_s->todpr;
  205. /* guest state */
  206. scb_o->gpsw = scb_s->gpsw;
  207. scb_o->gg14 = scb_s->gg14;
  208. scb_o->gg15 = scb_s->gg15;
  209. memcpy(scb_o->gcr, scb_s->gcr, 128);
  210. scb_o->pp = scb_s->pp;
  211. /* branch prediction */
  212. if (test_kvm_facility(vcpu->kvm, 82)) {
  213. scb_o->fpf &= ~FPF_BPBC;
  214. scb_o->fpf |= scb_s->fpf & FPF_BPBC;
  215. }
  216. /* interrupt intercept */
  217. switch (scb_s->icptcode) {
  218. case ICPT_PROGI:
  219. case ICPT_INSTPROGI:
  220. case ICPT_EXTINT:
  221. memcpy((void *)((u64)scb_o + 0xc0),
  222. (void *)((u64)scb_s + 0xc0), 0xf0 - 0xc0);
  223. break;
  224. case ICPT_PARTEXEC:
  225. /* MVPG only */
  226. memcpy((void *)((u64)scb_o + 0xc0),
  227. (void *)((u64)scb_s + 0xc0), 0xd0 - 0xc0);
  228. break;
  229. }
  230. if (scb_s->ihcpu != 0xffffU)
  231. scb_o->ihcpu = scb_s->ihcpu;
  232. }
  233. /*
  234. * Setup the shadow scb by copying and checking the relevant parts of the g2
  235. * provided scb.
  236. *
  237. * Returns: - 0 if the scb has been shadowed
  238. * - > 0 if control has to be given to guest 2
  239. */
  240. static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  241. {
  242. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  243. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  244. /* READ_ONCE does not work on bitfields - use a temporary variable */
  245. const uint32_t __new_prefix = scb_o->prefix;
  246. const uint32_t new_prefix = READ_ONCE(__new_prefix);
  247. const bool wants_tx = READ_ONCE(scb_o->ecb) & ECB_TE;
  248. bool had_tx = scb_s->ecb & ECB_TE;
  249. unsigned long new_mso = 0;
  250. int rc;
  251. /* make sure we don't have any leftovers when reusing the scb */
  252. scb_s->icptcode = 0;
  253. scb_s->eca = 0;
  254. scb_s->ecb = 0;
  255. scb_s->ecb2 = 0;
  256. scb_s->ecb3 = 0;
  257. scb_s->ecd = 0;
  258. scb_s->fac = 0;
  259. scb_s->fpf = 0;
  260. rc = prepare_cpuflags(vcpu, vsie_page);
  261. if (rc)
  262. goto out;
  263. /* timer */
  264. scb_s->cputm = scb_o->cputm;
  265. scb_s->ckc = scb_o->ckc;
  266. scb_s->todpr = scb_o->todpr;
  267. scb_s->epoch = scb_o->epoch;
  268. /* guest state */
  269. scb_s->gpsw = scb_o->gpsw;
  270. scb_s->gg14 = scb_o->gg14;
  271. scb_s->gg15 = scb_o->gg15;
  272. memcpy(scb_s->gcr, scb_o->gcr, 128);
  273. scb_s->pp = scb_o->pp;
  274. /* interception / execution handling */
  275. scb_s->gbea = scb_o->gbea;
  276. scb_s->lctl = scb_o->lctl;
  277. scb_s->svcc = scb_o->svcc;
  278. scb_s->ictl = scb_o->ictl;
  279. /*
  280. * SKEY handling functions can't deal with false setting of PTE invalid
  281. * bits. Therefore we cannot provide interpretation and would later
  282. * have to provide own emulation handlers.
  283. */
  284. if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_KSS))
  285. scb_s->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
  286. scb_s->icpua = scb_o->icpua;
  287. if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_SM))
  288. new_mso = READ_ONCE(scb_o->mso) & 0xfffffffffff00000UL;
  289. /* if the hva of the prefix changes, we have to remap the prefix */
  290. if (scb_s->mso != new_mso || scb_s->prefix != new_prefix)
  291. prefix_unmapped(vsie_page);
  292. /* SIE will do mso/msl validity and exception checks for us */
  293. scb_s->msl = scb_o->msl & 0xfffffffffff00000UL;
  294. scb_s->mso = new_mso;
  295. scb_s->prefix = new_prefix;
  296. /* We have to definetly flush the tlb if this scb never ran */
  297. if (scb_s->ihcpu != 0xffffU)
  298. scb_s->ihcpu = scb_o->ihcpu;
  299. /* MVPG and Protection Exception Interpretation are always available */
  300. scb_s->eca |= scb_o->eca & (ECA_MVPGI | ECA_PROTEXCI);
  301. /* Host-protection-interruption introduced with ESOP */
  302. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_ESOP))
  303. scb_s->ecb |= scb_o->ecb & ECB_HOSTPROTINT;
  304. /* transactional execution */
  305. if (test_kvm_facility(vcpu->kvm, 73) && wants_tx) {
  306. /* remap the prefix is tx is toggled on */
  307. if (!had_tx)
  308. prefix_unmapped(vsie_page);
  309. scb_s->ecb |= ECB_TE;
  310. }
  311. /* branch prediction */
  312. if (test_kvm_facility(vcpu->kvm, 82))
  313. scb_s->fpf |= scb_o->fpf & FPF_BPBC;
  314. /* SIMD */
  315. if (test_kvm_facility(vcpu->kvm, 129)) {
  316. scb_s->eca |= scb_o->eca & ECA_VX;
  317. scb_s->ecd |= scb_o->ecd & ECD_HOSTREGMGMT;
  318. }
  319. /* Run-time-Instrumentation */
  320. if (test_kvm_facility(vcpu->kvm, 64))
  321. scb_s->ecb3 |= scb_o->ecb3 & ECB3_RI;
  322. /* Instruction Execution Prevention */
  323. if (test_kvm_facility(vcpu->kvm, 130))
  324. scb_s->ecb2 |= scb_o->ecb2 & ECB2_IEP;
  325. /* Guarded Storage */
  326. if (test_kvm_facility(vcpu->kvm, 133)) {
  327. scb_s->ecb |= scb_o->ecb & ECB_GS;
  328. scb_s->ecd |= scb_o->ecd & ECD_HOSTREGMGMT;
  329. }
  330. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIIF))
  331. scb_s->eca |= scb_o->eca & ECA_SII;
  332. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IB))
  333. scb_s->eca |= scb_o->eca & ECA_IB;
  334. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI))
  335. scb_s->eca |= scb_o->eca & ECA_CEI;
  336. /* Epoch Extension */
  337. if (test_kvm_facility(vcpu->kvm, 139))
  338. scb_s->ecd |= scb_o->ecd & ECD_MEF;
  339. prepare_ibc(vcpu, vsie_page);
  340. rc = shadow_crycb(vcpu, vsie_page);
  341. out:
  342. if (rc)
  343. unshadow_scb(vcpu, vsie_page);
  344. return rc;
  345. }
  346. void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
  347. unsigned long end)
  348. {
  349. struct kvm *kvm = gmap->private;
  350. struct vsie_page *cur;
  351. unsigned long prefix;
  352. struct page *page;
  353. int i;
  354. if (!gmap_is_shadow(gmap))
  355. return;
  356. if (start >= 1UL << 31)
  357. /* We are only interested in prefix pages */
  358. return;
  359. /*
  360. * Only new shadow blocks are added to the list during runtime,
  361. * therefore we can safely reference them all the time.
  362. */
  363. for (i = 0; i < kvm->arch.vsie.page_count; i++) {
  364. page = READ_ONCE(kvm->arch.vsie.pages[i]);
  365. if (!page)
  366. continue;
  367. cur = page_to_virt(page);
  368. if (READ_ONCE(cur->gmap) != gmap)
  369. continue;
  370. prefix = cur->scb_s.prefix << GUEST_PREFIX_SHIFT;
  371. /* with mso/msl, the prefix lies at an offset */
  372. prefix += cur->scb_s.mso;
  373. if (prefix <= end && start <= prefix + 2 * PAGE_SIZE - 1)
  374. prefix_unmapped_sync(cur);
  375. }
  376. }
  377. /*
  378. * Map the first prefix page and if tx is enabled also the second prefix page.
  379. *
  380. * The prefix will be protected, a gmap notifier will inform about unmaps.
  381. * The shadow scb must not be executed until the prefix is remapped, this is
  382. * guaranteed by properly handling PROG_REQUEST.
  383. *
  384. * Returns: - 0 on if successfully mapped or already mapped
  385. * - > 0 if control has to be given to guest 2
  386. * - -EAGAIN if the caller can retry immediately
  387. * - -ENOMEM if out of memory
  388. */
  389. static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  390. {
  391. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  392. u64 prefix = scb_s->prefix << GUEST_PREFIX_SHIFT;
  393. int rc;
  394. if (prefix_is_mapped(vsie_page))
  395. return 0;
  396. /* mark it as mapped so we can catch any concurrent unmappers */
  397. prefix_mapped(vsie_page);
  398. /* with mso/msl, the prefix lies at offset *mso* */
  399. prefix += scb_s->mso;
  400. rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);
  401. if (!rc && (scb_s->ecb & ECB_TE))
  402. rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
  403. prefix + PAGE_SIZE);
  404. /*
  405. * We don't have to mprotect, we will be called for all unshadows.
  406. * SIE will detect if protection applies and trigger a validity.
  407. */
  408. if (rc)
  409. prefix_unmapped(vsie_page);
  410. if (rc > 0 || rc == -EFAULT)
  411. rc = set_validity_icpt(scb_s, 0x0037U);
  412. return rc;
  413. }
  414. /*
  415. * Pin the guest page given by gpa and set hpa to the pinned host address.
  416. * Will always be pinned writable.
  417. *
  418. * Returns: - 0 on success
  419. * - -EINVAL if the gpa is not valid guest storage
  420. * - -ENOMEM if out of memory
  421. */
  422. static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa)
  423. {
  424. struct page *page;
  425. hva_t hva;
  426. int rc;
  427. hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
  428. if (kvm_is_error_hva(hva))
  429. return -EINVAL;
  430. rc = get_user_pages_fast(hva, 1, 1, &page);
  431. if (rc < 0)
  432. return rc;
  433. else if (rc != 1)
  434. return -ENOMEM;
  435. *hpa = (hpa_t) page_to_virt(page) + (gpa & ~PAGE_MASK);
  436. return 0;
  437. }
  438. /* Unpins a page previously pinned via pin_guest_page, marking it as dirty. */
  439. static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa)
  440. {
  441. struct page *page;
  442. page = virt_to_page(hpa);
  443. set_page_dirty_lock(page);
  444. put_page(page);
  445. /* mark the page always as dirty for migration */
  446. mark_page_dirty(kvm, gpa_to_gfn(gpa));
  447. }
  448. /* unpin all blocks previously pinned by pin_blocks(), marking them dirty */
  449. static void unpin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  450. {
  451. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  452. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  453. hpa_t hpa;
  454. gpa_t gpa;
  455. hpa = (u64) scb_s->scaoh << 32 | scb_s->scaol;
  456. if (hpa) {
  457. gpa = scb_o->scaol & ~0xfUL;
  458. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_64BSCAO))
  459. gpa |= (u64) scb_o->scaoh << 32;
  460. unpin_guest_page(vcpu->kvm, gpa, hpa);
  461. scb_s->scaol = 0;
  462. scb_s->scaoh = 0;
  463. }
  464. hpa = scb_s->itdba;
  465. if (hpa) {
  466. gpa = scb_o->itdba & ~0xffUL;
  467. unpin_guest_page(vcpu->kvm, gpa, hpa);
  468. scb_s->itdba = 0;
  469. }
  470. hpa = scb_s->gvrd;
  471. if (hpa) {
  472. gpa = scb_o->gvrd & ~0x1ffUL;
  473. unpin_guest_page(vcpu->kvm, gpa, hpa);
  474. scb_s->gvrd = 0;
  475. }
  476. hpa = scb_s->riccbd;
  477. if (hpa) {
  478. gpa = scb_o->riccbd & ~0x3fUL;
  479. unpin_guest_page(vcpu->kvm, gpa, hpa);
  480. scb_s->riccbd = 0;
  481. }
  482. hpa = scb_s->sdnxo;
  483. if (hpa) {
  484. gpa = scb_o->sdnxo;
  485. unpin_guest_page(vcpu->kvm, gpa, hpa);
  486. scb_s->sdnxo = 0;
  487. }
  488. }
  489. /*
  490. * Instead of shadowing some blocks, we can simply forward them because the
  491. * addresses in the scb are 64 bit long.
  492. *
  493. * This works as long as the data lies in one page. If blocks ever exceed one
  494. * page, we have to fall back to shadowing.
  495. *
  496. * As we reuse the sca, the vcpu pointers contained in it are invalid. We must
  497. * therefore not enable any facilities that access these pointers (e.g. SIGPIF).
  498. *
  499. * Returns: - 0 if all blocks were pinned.
  500. * - > 0 if control has to be given to guest 2
  501. * - -ENOMEM if out of memory
  502. */
  503. static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  504. {
  505. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  506. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  507. hpa_t hpa;
  508. gpa_t gpa;
  509. int rc = 0;
  510. gpa = READ_ONCE(scb_o->scaol) & ~0xfUL;
  511. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_64BSCAO))
  512. gpa |= (u64) READ_ONCE(scb_o->scaoh) << 32;
  513. if (gpa) {
  514. if (!(gpa & ~0x1fffUL))
  515. rc = set_validity_icpt(scb_s, 0x0038U);
  516. else if ((gpa & ~0x1fffUL) == kvm_s390_get_prefix(vcpu))
  517. rc = set_validity_icpt(scb_s, 0x0011U);
  518. else if ((gpa & PAGE_MASK) !=
  519. ((gpa + sizeof(struct bsca_block) - 1) & PAGE_MASK))
  520. rc = set_validity_icpt(scb_s, 0x003bU);
  521. if (!rc) {
  522. rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
  523. if (rc == -EINVAL)
  524. rc = set_validity_icpt(scb_s, 0x0034U);
  525. }
  526. if (rc)
  527. goto unpin;
  528. scb_s->scaoh = (u32)((u64)hpa >> 32);
  529. scb_s->scaol = (u32)(u64)hpa;
  530. }
  531. gpa = READ_ONCE(scb_o->itdba) & ~0xffUL;
  532. if (gpa && (scb_s->ecb & ECB_TE)) {
  533. if (!(gpa & ~0x1fffUL)) {
  534. rc = set_validity_icpt(scb_s, 0x0080U);
  535. goto unpin;
  536. }
  537. /* 256 bytes cannot cross page boundaries */
  538. rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
  539. if (rc == -EINVAL)
  540. rc = set_validity_icpt(scb_s, 0x0080U);
  541. if (rc)
  542. goto unpin;
  543. scb_s->itdba = hpa;
  544. }
  545. gpa = READ_ONCE(scb_o->gvrd) & ~0x1ffUL;
  546. if (gpa && (scb_s->eca & ECA_VX) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
  547. if (!(gpa & ~0x1fffUL)) {
  548. rc = set_validity_icpt(scb_s, 0x1310U);
  549. goto unpin;
  550. }
  551. /*
  552. * 512 bytes vector registers cannot cross page boundaries
  553. * if this block gets bigger, we have to shadow it.
  554. */
  555. rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
  556. if (rc == -EINVAL)
  557. rc = set_validity_icpt(scb_s, 0x1310U);
  558. if (rc)
  559. goto unpin;
  560. scb_s->gvrd = hpa;
  561. }
  562. gpa = READ_ONCE(scb_o->riccbd) & ~0x3fUL;
  563. if (gpa && (scb_s->ecb3 & ECB3_RI)) {
  564. if (!(gpa & ~0x1fffUL)) {
  565. rc = set_validity_icpt(scb_s, 0x0043U);
  566. goto unpin;
  567. }
  568. /* 64 bytes cannot cross page boundaries */
  569. rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
  570. if (rc == -EINVAL)
  571. rc = set_validity_icpt(scb_s, 0x0043U);
  572. /* Validity 0x0044 will be checked by SIE */
  573. if (rc)
  574. goto unpin;
  575. scb_s->riccbd = hpa;
  576. }
  577. if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
  578. unsigned long sdnxc;
  579. gpa = READ_ONCE(scb_o->sdnxo) & ~0xfUL;
  580. sdnxc = READ_ONCE(scb_o->sdnxo) & 0xfUL;
  581. if (!gpa || !(gpa & ~0x1fffUL)) {
  582. rc = set_validity_icpt(scb_s, 0x10b0U);
  583. goto unpin;
  584. }
  585. if (sdnxc < 6 || sdnxc > 12) {
  586. rc = set_validity_icpt(scb_s, 0x10b1U);
  587. goto unpin;
  588. }
  589. if (gpa & ((1 << sdnxc) - 1)) {
  590. rc = set_validity_icpt(scb_s, 0x10b2U);
  591. goto unpin;
  592. }
  593. /* Due to alignment rules (checked above) this cannot
  594. * cross page boundaries
  595. */
  596. rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
  597. if (rc == -EINVAL)
  598. rc = set_validity_icpt(scb_s, 0x10b0U);
  599. if (rc)
  600. goto unpin;
  601. scb_s->sdnxo = hpa | sdnxc;
  602. }
  603. return 0;
  604. unpin:
  605. unpin_blocks(vcpu, vsie_page);
  606. return rc;
  607. }
  608. /* unpin the scb provided by guest 2, marking it as dirty */
  609. static void unpin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
  610. gpa_t gpa)
  611. {
  612. hpa_t hpa = (hpa_t) vsie_page->scb_o;
  613. if (hpa)
  614. unpin_guest_page(vcpu->kvm, gpa, hpa);
  615. vsie_page->scb_o = NULL;
  616. }
  617. /*
  618. * Pin the scb at gpa provided by guest 2 at vsie_page->scb_o.
  619. *
  620. * Returns: - 0 if the scb was pinned.
  621. * - > 0 if control has to be given to guest 2
  622. * - -ENOMEM if out of memory
  623. */
  624. static int pin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
  625. gpa_t gpa)
  626. {
  627. hpa_t hpa;
  628. int rc;
  629. rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
  630. if (rc == -EINVAL) {
  631. rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
  632. if (!rc)
  633. rc = 1;
  634. }
  635. if (!rc)
  636. vsie_page->scb_o = (struct kvm_s390_sie_block *) hpa;
  637. return rc;
  638. }
  639. /*
  640. * Inject a fault into guest 2.
  641. *
  642. * Returns: - > 0 if control has to be given to guest 2
  643. * < 0 if an error occurred during injection.
  644. */
  645. static int inject_fault(struct kvm_vcpu *vcpu, __u16 code, __u64 vaddr,
  646. bool write_flag)
  647. {
  648. struct kvm_s390_pgm_info pgm = {
  649. .code = code,
  650. .trans_exc_code =
  651. /* 0-51: virtual address */
  652. (vaddr & 0xfffffffffffff000UL) |
  653. /* 52-53: store / fetch */
  654. (((unsigned int) !write_flag) + 1) << 10,
  655. /* 62-63: asce id (alway primary == 0) */
  656. .exc_access_id = 0, /* always primary */
  657. .op_access_id = 0, /* not MVPG */
  658. };
  659. int rc;
  660. if (code == PGM_PROTECTION)
  661. pgm.trans_exc_code |= 0x4UL;
  662. rc = kvm_s390_inject_prog_irq(vcpu, &pgm);
  663. return rc ? rc : 1;
  664. }
  665. /*
  666. * Handle a fault during vsie execution on a gmap shadow.
  667. *
  668. * Returns: - 0 if the fault was resolved
  669. * - > 0 if control has to be given to guest 2
  670. * - < 0 if an error occurred
  671. */
  672. static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  673. {
  674. int rc;
  675. if (current->thread.gmap_int_code == PGM_PROTECTION)
  676. /* we can directly forward all protection exceptions */
  677. return inject_fault(vcpu, PGM_PROTECTION,
  678. current->thread.gmap_addr, 1);
  679. rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
  680. current->thread.gmap_addr);
  681. if (rc > 0) {
  682. rc = inject_fault(vcpu, rc,
  683. current->thread.gmap_addr,
  684. current->thread.gmap_write_flag);
  685. if (rc >= 0)
  686. vsie_page->fault_addr = current->thread.gmap_addr;
  687. }
  688. return rc;
  689. }
  690. /*
  691. * Retry the previous fault that required guest 2 intervention. This avoids
  692. * one superfluous SIE re-entry and direct exit.
  693. *
  694. * Will ignore any errors. The next SIE fault will do proper fault handling.
  695. */
  696. static void handle_last_fault(struct kvm_vcpu *vcpu,
  697. struct vsie_page *vsie_page)
  698. {
  699. if (vsie_page->fault_addr)
  700. kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
  701. vsie_page->fault_addr);
  702. vsie_page->fault_addr = 0;
  703. }
  704. static inline void clear_vsie_icpt(struct vsie_page *vsie_page)
  705. {
  706. vsie_page->scb_s.icptcode = 0;
  707. }
  708. /* rewind the psw and clear the vsie icpt, so we can retry execution */
  709. static void retry_vsie_icpt(struct vsie_page *vsie_page)
  710. {
  711. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  712. int ilen = insn_length(scb_s->ipa >> 8);
  713. /* take care of EXECUTE instructions */
  714. if (scb_s->icptstatus & 1) {
  715. ilen = (scb_s->icptstatus >> 4) & 0x6;
  716. if (!ilen)
  717. ilen = 4;
  718. }
  719. scb_s->gpsw.addr = __rewind_psw(scb_s->gpsw, ilen);
  720. clear_vsie_icpt(vsie_page);
  721. }
  722. /*
  723. * Try to shadow + enable the guest 2 provided facility list.
  724. * Retry instruction execution if enabled for and provided by guest 2.
  725. *
  726. * Returns: - 0 if handled (retry or guest 2 icpt)
  727. * - > 0 if control has to be given to guest 2
  728. */
  729. static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  730. {
  731. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  732. __u32 fac = READ_ONCE(vsie_page->scb_o->fac) & 0x7ffffff8U;
  733. if (fac && test_kvm_facility(vcpu->kvm, 7)) {
  734. retry_vsie_icpt(vsie_page);
  735. if (read_guest_real(vcpu, fac, &vsie_page->fac,
  736. sizeof(vsie_page->fac)))
  737. return set_validity_icpt(scb_s, 0x1090U);
  738. scb_s->fac = (__u32)(__u64) &vsie_page->fac;
  739. }
  740. return 0;
  741. }
  742. /*
  743. * Run the vsie on a shadow scb and a shadow gmap, without any further
  744. * sanity checks, handling SIE faults.
  745. *
  746. * Returns: - 0 everything went fine
  747. * - > 0 if control has to be given to guest 2
  748. * - < 0 if an error occurred
  749. */
  750. static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  751. {
  752. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  753. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  754. int guest_bp_isolation;
  755. int rc;
  756. handle_last_fault(vcpu, vsie_page);
  757. if (need_resched())
  758. schedule();
  759. if (test_cpu_flag(CIF_MCCK_PENDING))
  760. s390_handle_mcck();
  761. srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
  762. /* save current guest state of bp isolation override */
  763. guest_bp_isolation = test_thread_flag(TIF_ISOLATE_BP_GUEST);
  764. /*
  765. * The guest is running with BPBC, so we have to force it on for our
  766. * nested guest. This is done by enabling BPBC globally, so the BPBC
  767. * control in the SCB (which the nested guest can modify) is simply
  768. * ignored.
  769. */
  770. if (test_kvm_facility(vcpu->kvm, 82) &&
  771. vcpu->arch.sie_block->fpf & FPF_BPBC)
  772. set_thread_flag(TIF_ISOLATE_BP_GUEST);
  773. local_irq_disable();
  774. guest_enter_irqoff();
  775. local_irq_enable();
  776. rc = sie64a(scb_s, vcpu->run->s.regs.gprs);
  777. local_irq_disable();
  778. guest_exit_irqoff();
  779. local_irq_enable();
  780. /* restore guest state for bp isolation override */
  781. if (!guest_bp_isolation)
  782. clear_thread_flag(TIF_ISOLATE_BP_GUEST);
  783. vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
  784. if (rc == -EINTR) {
  785. VCPU_EVENT(vcpu, 3, "%s", "machine check");
  786. kvm_s390_reinject_machine_check(vcpu, &vsie_page->mcck_info);
  787. return 0;
  788. }
  789. if (rc > 0)
  790. rc = 0; /* we could still have an icpt */
  791. else if (rc == -EFAULT)
  792. return handle_fault(vcpu, vsie_page);
  793. switch (scb_s->icptcode) {
  794. case ICPT_INST:
  795. if (scb_s->ipa == 0xb2b0)
  796. rc = handle_stfle(vcpu, vsie_page);
  797. break;
  798. case ICPT_STOP:
  799. /* stop not requested by g2 - must have been a kick */
  800. if (!(atomic_read(&scb_o->cpuflags) & CPUSTAT_STOP_INT))
  801. clear_vsie_icpt(vsie_page);
  802. break;
  803. case ICPT_VALIDITY:
  804. if ((scb_s->ipa & 0xf000) != 0xf000)
  805. scb_s->ipa += 0x1000;
  806. break;
  807. }
  808. return rc;
  809. }
  810. static void release_gmap_shadow(struct vsie_page *vsie_page)
  811. {
  812. if (vsie_page->gmap)
  813. gmap_put(vsie_page->gmap);
  814. WRITE_ONCE(vsie_page->gmap, NULL);
  815. prefix_unmapped(vsie_page);
  816. }
  817. static int acquire_gmap_shadow(struct kvm_vcpu *vcpu,
  818. struct vsie_page *vsie_page)
  819. {
  820. unsigned long asce;
  821. union ctlreg0 cr0;
  822. struct gmap *gmap;
  823. int edat;
  824. asce = vcpu->arch.sie_block->gcr[1];
  825. cr0.val = vcpu->arch.sie_block->gcr[0];
  826. edat = cr0.edat && test_kvm_facility(vcpu->kvm, 8);
  827. edat += edat && test_kvm_facility(vcpu->kvm, 78);
  828. /*
  829. * ASCE or EDAT could have changed since last icpt, or the gmap
  830. * we're holding has been unshadowed. If the gmap is still valid,
  831. * we can safely reuse it.
  832. */
  833. if (vsie_page->gmap && gmap_shadow_valid(vsie_page->gmap, asce, edat))
  834. return 0;
  835. /* release the old shadow - if any, and mark the prefix as unmapped */
  836. release_gmap_shadow(vsie_page);
  837. gmap = gmap_shadow(vcpu->arch.gmap, asce, edat);
  838. if (IS_ERR(gmap))
  839. return PTR_ERR(gmap);
  840. gmap->private = vcpu->kvm;
  841. WRITE_ONCE(vsie_page->gmap, gmap);
  842. return 0;
  843. }
  844. /*
  845. * Register the shadow scb at the VCPU, e.g. for kicking out of vsie.
  846. */
  847. static void register_shadow_scb(struct kvm_vcpu *vcpu,
  848. struct vsie_page *vsie_page)
  849. {
  850. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  851. WRITE_ONCE(vcpu->arch.vsie_block, &vsie_page->scb_s);
  852. /*
  853. * External calls have to lead to a kick of the vcpu and
  854. * therefore the vsie -> Simulate Wait state.
  855. */
  856. atomic_or(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
  857. /*
  858. * We have to adjust the g3 epoch by the g2 epoch. The epoch will
  859. * automatically be adjusted on tod clock changes via kvm_sync_clock.
  860. */
  861. preempt_disable();
  862. scb_s->epoch += vcpu->kvm->arch.epoch;
  863. if (scb_s->ecd & ECD_MEF) {
  864. scb_s->epdx += vcpu->kvm->arch.epdx;
  865. if (scb_s->epoch < vcpu->kvm->arch.epoch)
  866. scb_s->epdx += 1;
  867. }
  868. preempt_enable();
  869. }
  870. /*
  871. * Unregister a shadow scb from a VCPU.
  872. */
  873. static void unregister_shadow_scb(struct kvm_vcpu *vcpu)
  874. {
  875. atomic_andnot(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
  876. WRITE_ONCE(vcpu->arch.vsie_block, NULL);
  877. }
  878. /*
  879. * Run the vsie on a shadowed scb, managing the gmap shadow, handling
  880. * prefix pages and faults.
  881. *
  882. * Returns: - 0 if no errors occurred
  883. * - > 0 if control has to be given to guest 2
  884. * - -ENOMEM if out of memory
  885. */
  886. static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  887. {
  888. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  889. int rc = 0;
  890. while (1) {
  891. rc = acquire_gmap_shadow(vcpu, vsie_page);
  892. if (!rc)
  893. rc = map_prefix(vcpu, vsie_page);
  894. if (!rc) {
  895. gmap_enable(vsie_page->gmap);
  896. update_intervention_requests(vsie_page);
  897. rc = do_vsie_run(vcpu, vsie_page);
  898. gmap_enable(vcpu->arch.gmap);
  899. }
  900. atomic_andnot(PROG_BLOCK_SIE, &scb_s->prog20);
  901. if (rc == -EAGAIN)
  902. rc = 0;
  903. if (rc || scb_s->icptcode || signal_pending(current) ||
  904. kvm_s390_vcpu_has_irq(vcpu, 0))
  905. break;
  906. }
  907. if (rc == -EFAULT) {
  908. /*
  909. * Addressing exceptions are always presentes as intercepts.
  910. * As addressing exceptions are suppressing and our guest 3 PSW
  911. * points at the responsible instruction, we have to
  912. * forward the PSW and set the ilc. If we can't read guest 3
  913. * instruction, we can use an arbitrary ilc. Let's always use
  914. * ilen = 4 for now, so we can avoid reading in guest 3 virtual
  915. * memory. (we could also fake the shadow so the hardware
  916. * handles it).
  917. */
  918. scb_s->icptcode = ICPT_PROGI;
  919. scb_s->iprcc = PGM_ADDRESSING;
  920. scb_s->pgmilc = 4;
  921. scb_s->gpsw.addr = __rewind_psw(scb_s->gpsw, 4);
  922. rc = 1;
  923. }
  924. return rc;
  925. }
  926. /*
  927. * Get or create a vsie page for a scb address.
  928. *
  929. * Returns: - address of a vsie page (cached or new one)
  930. * - NULL if the same scb address is already used by another VCPU
  931. * - ERR_PTR(-ENOMEM) if out of memory
  932. */
  933. static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
  934. {
  935. struct vsie_page *vsie_page;
  936. struct page *page;
  937. int nr_vcpus;
  938. rcu_read_lock();
  939. page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9);
  940. rcu_read_unlock();
  941. if (page) {
  942. if (page_ref_inc_return(page) == 2)
  943. return page_to_virt(page);
  944. page_ref_dec(page);
  945. }
  946. /*
  947. * We want at least #online_vcpus shadows, so every VCPU can execute
  948. * the VSIE in parallel.
  949. */
  950. nr_vcpus = atomic_read(&kvm->online_vcpus);
  951. mutex_lock(&kvm->arch.vsie.mutex);
  952. if (kvm->arch.vsie.page_count < nr_vcpus) {
  953. page = alloc_page(GFP_KERNEL | __GFP_ZERO | GFP_DMA);
  954. if (!page) {
  955. mutex_unlock(&kvm->arch.vsie.mutex);
  956. return ERR_PTR(-ENOMEM);
  957. }
  958. page_ref_inc(page);
  959. kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = page;
  960. kvm->arch.vsie.page_count++;
  961. } else {
  962. /* reuse an existing entry that belongs to nobody */
  963. while (true) {
  964. page = kvm->arch.vsie.pages[kvm->arch.vsie.next];
  965. if (page_ref_inc_return(page) == 2)
  966. break;
  967. page_ref_dec(page);
  968. kvm->arch.vsie.next++;
  969. kvm->arch.vsie.next %= nr_vcpus;
  970. }
  971. radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
  972. }
  973. page->index = addr;
  974. /* double use of the same address */
  975. if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, page)) {
  976. page_ref_dec(page);
  977. mutex_unlock(&kvm->arch.vsie.mutex);
  978. return NULL;
  979. }
  980. mutex_unlock(&kvm->arch.vsie.mutex);
  981. vsie_page = page_to_virt(page);
  982. memset(&vsie_page->scb_s, 0, sizeof(struct kvm_s390_sie_block));
  983. release_gmap_shadow(vsie_page);
  984. vsie_page->fault_addr = 0;
  985. vsie_page->scb_s.ihcpu = 0xffffU;
  986. return vsie_page;
  987. }
  988. /* put a vsie page acquired via get_vsie_page */
  989. static void put_vsie_page(struct kvm *kvm, struct vsie_page *vsie_page)
  990. {
  991. struct page *page = pfn_to_page(__pa(vsie_page) >> PAGE_SHIFT);
  992. page_ref_dec(page);
  993. }
  994. int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
  995. {
  996. struct vsie_page *vsie_page;
  997. unsigned long scb_addr;
  998. int rc;
  999. vcpu->stat.instruction_sie++;
  1000. if (!test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIEF2))
  1001. return -EOPNOTSUPP;
  1002. if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
  1003. return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
  1004. BUILD_BUG_ON(sizeof(struct vsie_page) != PAGE_SIZE);
  1005. scb_addr = kvm_s390_get_base_disp_s(vcpu, NULL);
  1006. /* 512 byte alignment */
  1007. if (unlikely(scb_addr & 0x1ffUL))
  1008. return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
  1009. if (signal_pending(current) || kvm_s390_vcpu_has_irq(vcpu, 0))
  1010. return 0;
  1011. vsie_page = get_vsie_page(vcpu->kvm, scb_addr);
  1012. if (IS_ERR(vsie_page))
  1013. return PTR_ERR(vsie_page);
  1014. else if (!vsie_page)
  1015. /* double use of sie control block - simply do nothing */
  1016. return 0;
  1017. rc = pin_scb(vcpu, vsie_page, scb_addr);
  1018. if (rc)
  1019. goto out_put;
  1020. rc = shadow_scb(vcpu, vsie_page);
  1021. if (rc)
  1022. goto out_unpin_scb;
  1023. rc = pin_blocks(vcpu, vsie_page);
  1024. if (rc)
  1025. goto out_unshadow;
  1026. register_shadow_scb(vcpu, vsie_page);
  1027. rc = vsie_run(vcpu, vsie_page);
  1028. unregister_shadow_scb(vcpu);
  1029. unpin_blocks(vcpu, vsie_page);
  1030. out_unshadow:
  1031. unshadow_scb(vcpu, vsie_page);
  1032. out_unpin_scb:
  1033. unpin_scb(vcpu, vsie_page, scb_addr);
  1034. out_put:
  1035. put_vsie_page(vcpu->kvm, vsie_page);
  1036. return rc < 0 ? rc : 0;
  1037. }
  1038. /* Init the vsie data structures. To be called when a vm is initialized. */
  1039. void kvm_s390_vsie_init(struct kvm *kvm)
  1040. {
  1041. mutex_init(&kvm->arch.vsie.mutex);
  1042. INIT_RADIX_TREE(&kvm->arch.vsie.addr_to_page, GFP_KERNEL);
  1043. }
  1044. /* Destroy the vsie data structures. To be called when a vm is destroyed. */
  1045. void kvm_s390_vsie_destroy(struct kvm *kvm)
  1046. {
  1047. struct vsie_page *vsie_page;
  1048. struct page *page;
  1049. int i;
  1050. mutex_lock(&kvm->arch.vsie.mutex);
  1051. for (i = 0; i < kvm->arch.vsie.page_count; i++) {
  1052. page = kvm->arch.vsie.pages[i];
  1053. kvm->arch.vsie.pages[i] = NULL;
  1054. vsie_page = page_to_virt(page);
  1055. release_gmap_shadow(vsie_page);
  1056. /* free the radix tree entry */
  1057. radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
  1058. __free_page(page);
  1059. }
  1060. kvm->arch.vsie.page_count = 0;
  1061. mutex_unlock(&kvm->arch.vsie.mutex);
  1062. }
  1063. void kvm_s390_vsie_kick(struct kvm_vcpu *vcpu)
  1064. {
  1065. struct kvm_s390_sie_block *scb = READ_ONCE(vcpu->arch.vsie_block);
  1066. /*
  1067. * Even if the VCPU lets go of the shadow sie block reference, it is
  1068. * still valid in the cache. So we can safely kick it.
  1069. */
  1070. if (scb) {
  1071. atomic_or(PROG_BLOCK_SIE, &scb->prog20);
  1072. if (scb->prog0c & PROG_IN_SIE)
  1073. atomic_or(CPUSTAT_STOP_INT, &scb->cpuflags);
  1074. }
  1075. }