vsie.c 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * kvm nested virtualization support for s390x
  4. *
  5. * Copyright IBM Corp. 2016, 2018
  6. *
  7. * Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
  8. */
  9. #include <linux/vmalloc.h>
  10. #include <linux/kvm_host.h>
  11. #include <linux/bug.h>
  12. #include <linux/list.h>
  13. #include <linux/bitmap.h>
  14. #include <linux/sched/signal.h>
  15. #include <asm/gmap.h>
  16. #include <asm/mmu_context.h>
  17. #include <asm/sclp.h>
  18. #include <asm/nmi.h>
  19. #include <asm/dis.h>
  20. #include "kvm-s390.h"
  21. #include "gaccess.h"
  22. struct vsie_page {
  23. struct kvm_s390_sie_block scb_s; /* 0x0000 */
  24. /*
  25. * the backup info for machine check. ensure it's at
  26. * the same offset as that in struct sie_page!
  27. */
  28. struct mcck_volatile_info mcck_info; /* 0x0200 */
  29. /*
  30. * The pinned original scb. Be aware that other VCPUs can modify
  31. * it while we read from it. Values that are used for conditions or
  32. * are reused conditionally, should be accessed via READ_ONCE.
  33. */
  34. struct kvm_s390_sie_block *scb_o; /* 0x0218 */
  35. /* the shadow gmap in use by the vsie_page */
  36. struct gmap *gmap; /* 0x0220 */
  37. /* address of the last reported fault to guest2 */
  38. unsigned long fault_addr; /* 0x0228 */
  39. /* calculated guest addresses of satellite control blocks */
  40. gpa_t sca_gpa; /* 0x0230 */
  41. gpa_t itdba_gpa; /* 0x0238 */
  42. gpa_t gvrd_gpa; /* 0x0240 */
  43. gpa_t riccbd_gpa; /* 0x0248 */
  44. gpa_t sdnx_gpa; /* 0x0250 */
  45. __u8 reserved[0x0700 - 0x0258]; /* 0x0258 */
  46. struct kvm_s390_crypto_cb crycb; /* 0x0700 */
  47. __u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */
  48. };
  49. /* trigger a validity icpt for the given scb */
  50. static int set_validity_icpt(struct kvm_s390_sie_block *scb,
  51. __u16 reason_code)
  52. {
  53. scb->ipa = 0x1000;
  54. scb->ipb = ((__u32) reason_code) << 16;
  55. scb->icptcode = ICPT_VALIDITY;
  56. return 1;
  57. }
  58. /* mark the prefix as unmapped, this will block the VSIE */
  59. static void prefix_unmapped(struct vsie_page *vsie_page)
  60. {
  61. atomic_or(PROG_REQUEST, &vsie_page->scb_s.prog20);
  62. }
  63. /* mark the prefix as unmapped and wait until the VSIE has been left */
  64. static void prefix_unmapped_sync(struct vsie_page *vsie_page)
  65. {
  66. prefix_unmapped(vsie_page);
  67. if (vsie_page->scb_s.prog0c & PROG_IN_SIE)
  68. atomic_or(CPUSTAT_STOP_INT, &vsie_page->scb_s.cpuflags);
  69. while (vsie_page->scb_s.prog0c & PROG_IN_SIE)
  70. cpu_relax();
  71. }
  72. /* mark the prefix as mapped, this will allow the VSIE to run */
  73. static void prefix_mapped(struct vsie_page *vsie_page)
  74. {
  75. atomic_andnot(PROG_REQUEST, &vsie_page->scb_s.prog20);
  76. }
  77. /* test if the prefix is mapped into the gmap shadow */
  78. static int prefix_is_mapped(struct vsie_page *vsie_page)
  79. {
  80. return !(atomic_read(&vsie_page->scb_s.prog20) & PROG_REQUEST);
  81. }
  82. /* copy the updated intervention request bits into the shadow scb */
  83. static void update_intervention_requests(struct vsie_page *vsie_page)
  84. {
  85. const int bits = CPUSTAT_STOP_INT | CPUSTAT_IO_INT | CPUSTAT_EXT_INT;
  86. int cpuflags;
  87. cpuflags = atomic_read(&vsie_page->scb_o->cpuflags);
  88. atomic_andnot(bits, &vsie_page->scb_s.cpuflags);
  89. atomic_or(cpuflags & bits, &vsie_page->scb_s.cpuflags);
  90. }
  91. /* shadow (filter and validate) the cpuflags */
  92. static int prepare_cpuflags(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  93. {
  94. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  95. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  96. int newflags, cpuflags = atomic_read(&scb_o->cpuflags);
  97. /* we don't allow ESA/390 guests */
  98. if (!(cpuflags & CPUSTAT_ZARCH))
  99. return set_validity_icpt(scb_s, 0x0001U);
  100. if (cpuflags & (CPUSTAT_RRF | CPUSTAT_MCDS))
  101. return set_validity_icpt(scb_s, 0x0001U);
  102. else if (cpuflags & (CPUSTAT_SLSV | CPUSTAT_SLSR))
  103. return set_validity_icpt(scb_s, 0x0007U);
  104. /* intervention requests will be set later */
  105. newflags = CPUSTAT_ZARCH;
  106. if (cpuflags & CPUSTAT_GED && test_kvm_facility(vcpu->kvm, 8))
  107. newflags |= CPUSTAT_GED;
  108. if (cpuflags & CPUSTAT_GED2 && test_kvm_facility(vcpu->kvm, 78)) {
  109. if (cpuflags & CPUSTAT_GED)
  110. return set_validity_icpt(scb_s, 0x0001U);
  111. newflags |= CPUSTAT_GED2;
  112. }
  113. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_GPERE))
  114. newflags |= cpuflags & CPUSTAT_P;
  115. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_GSLS))
  116. newflags |= cpuflags & CPUSTAT_SM;
  117. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IBS))
  118. newflags |= cpuflags & CPUSTAT_IBS;
  119. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_KSS))
  120. newflags |= cpuflags & CPUSTAT_KSS;
  121. atomic_set(&scb_s->cpuflags, newflags);
  122. return 0;
  123. }
  124. /*
  125. * Create a shadow copy of the crycb block and setup key wrapping, if
  126. * requested for guest 3 and enabled for guest 2.
  127. *
  128. * We only accept format-1 (no AP in g2), but convert it into format-2
  129. * There is nothing to do for format-0.
  130. *
  131. * Returns: - 0 if shadowed or nothing to do
  132. * - > 0 if control has to be given to guest 2
  133. */
  134. static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  135. {
  136. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  137. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  138. const uint32_t crycbd_o = READ_ONCE(scb_o->crycbd);
  139. const u32 crycb_addr = crycbd_o & 0x7ffffff8U;
  140. unsigned long *b1, *b2;
  141. u8 ecb3_flags;
  142. scb_s->crycbd = 0;
  143. if (!(crycbd_o & vcpu->arch.sie_block->crycbd & CRYCB_FORMAT1))
  144. return 0;
  145. /* format-1 is supported with message-security-assist extension 3 */
  146. if (!test_kvm_facility(vcpu->kvm, 76))
  147. return 0;
  148. /* we may only allow it if enabled for guest 2 */
  149. ecb3_flags = scb_o->ecb3 & vcpu->arch.sie_block->ecb3 &
  150. (ECB3_AES | ECB3_DEA);
  151. if (!ecb3_flags)
  152. return 0;
  153. if ((crycb_addr & PAGE_MASK) != ((crycb_addr + 128) & PAGE_MASK))
  154. return set_validity_icpt(scb_s, 0x003CU);
  155. else if (!crycb_addr)
  156. return set_validity_icpt(scb_s, 0x0039U);
  157. /* copy only the wrapping keys */
  158. if (read_guest_real(vcpu, crycb_addr + 72,
  159. vsie_page->crycb.dea_wrapping_key_mask, 56))
  160. return set_validity_icpt(scb_s, 0x0035U);
  161. scb_s->ecb3 |= ecb3_flags;
  162. scb_s->crycbd = ((__u32)(__u64) &vsie_page->crycb) | CRYCB_FORMAT1 |
  163. CRYCB_FORMAT2;
  164. /* xor both blocks in one run */
  165. b1 = (unsigned long *) vsie_page->crycb.dea_wrapping_key_mask;
  166. b2 = (unsigned long *)
  167. vcpu->kvm->arch.crypto.crycb->dea_wrapping_key_mask;
  168. /* as 56%8 == 0, bitmap_xor won't overwrite any data */
  169. bitmap_xor(b1, b1, b2, BITS_PER_BYTE * 56);
  170. return 0;
  171. }
  172. /* shadow (round up/down) the ibc to avoid validity icpt */
  173. static void prepare_ibc(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  174. {
  175. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  176. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  177. /* READ_ONCE does not work on bitfields - use a temporary variable */
  178. const uint32_t __new_ibc = scb_o->ibc;
  179. const uint32_t new_ibc = READ_ONCE(__new_ibc) & 0x0fffU;
  180. __u64 min_ibc = (sclp.ibc >> 16) & 0x0fffU;
  181. scb_s->ibc = 0;
  182. /* ibc installed in g2 and requested for g3 */
  183. if (vcpu->kvm->arch.model.ibc && new_ibc) {
  184. scb_s->ibc = new_ibc;
  185. /* takte care of the minimum ibc level of the machine */
  186. if (scb_s->ibc < min_ibc)
  187. scb_s->ibc = min_ibc;
  188. /* take care of the maximum ibc level set for the guest */
  189. if (scb_s->ibc > vcpu->kvm->arch.model.ibc)
  190. scb_s->ibc = vcpu->kvm->arch.model.ibc;
  191. }
  192. }
  193. /* unshadow the scb, copying parameters back to the real scb */
  194. static void unshadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  195. {
  196. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  197. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  198. /* interception */
  199. scb_o->icptcode = scb_s->icptcode;
  200. scb_o->icptstatus = scb_s->icptstatus;
  201. scb_o->ipa = scb_s->ipa;
  202. scb_o->ipb = scb_s->ipb;
  203. scb_o->gbea = scb_s->gbea;
  204. /* timer */
  205. scb_o->cputm = scb_s->cputm;
  206. scb_o->ckc = scb_s->ckc;
  207. scb_o->todpr = scb_s->todpr;
  208. /* guest state */
  209. scb_o->gpsw = scb_s->gpsw;
  210. scb_o->gg14 = scb_s->gg14;
  211. scb_o->gg15 = scb_s->gg15;
  212. memcpy(scb_o->gcr, scb_s->gcr, 128);
  213. scb_o->pp = scb_s->pp;
  214. /* branch prediction */
  215. if (test_kvm_facility(vcpu->kvm, 82)) {
  216. scb_o->fpf &= ~FPF_BPBC;
  217. scb_o->fpf |= scb_s->fpf & FPF_BPBC;
  218. }
  219. /* interrupt intercept */
  220. switch (scb_s->icptcode) {
  221. case ICPT_PROGI:
  222. case ICPT_INSTPROGI:
  223. case ICPT_EXTINT:
  224. memcpy((void *)((u64)scb_o + 0xc0),
  225. (void *)((u64)scb_s + 0xc0), 0xf0 - 0xc0);
  226. break;
  227. case ICPT_PARTEXEC:
  228. /* MVPG only */
  229. memcpy((void *)((u64)scb_o + 0xc0),
  230. (void *)((u64)scb_s + 0xc0), 0xd0 - 0xc0);
  231. break;
  232. }
  233. if (scb_s->ihcpu != 0xffffU)
  234. scb_o->ihcpu = scb_s->ihcpu;
  235. }
  236. /*
  237. * Setup the shadow scb by copying and checking the relevant parts of the g2
  238. * provided scb.
  239. *
  240. * Returns: - 0 if the scb has been shadowed
  241. * - > 0 if control has to be given to guest 2
  242. */
  243. static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  244. {
  245. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  246. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  247. /* READ_ONCE does not work on bitfields - use a temporary variable */
  248. const uint32_t __new_prefix = scb_o->prefix;
  249. const uint32_t new_prefix = READ_ONCE(__new_prefix);
  250. const bool wants_tx = READ_ONCE(scb_o->ecb) & ECB_TE;
  251. bool had_tx = scb_s->ecb & ECB_TE;
  252. unsigned long new_mso = 0;
  253. int rc;
  254. /* make sure we don't have any leftovers when reusing the scb */
  255. scb_s->icptcode = 0;
  256. scb_s->eca = 0;
  257. scb_s->ecb = 0;
  258. scb_s->ecb2 = 0;
  259. scb_s->ecb3 = 0;
  260. scb_s->ecd = 0;
  261. scb_s->fac = 0;
  262. scb_s->fpf = 0;
  263. rc = prepare_cpuflags(vcpu, vsie_page);
  264. if (rc)
  265. goto out;
  266. /* timer */
  267. scb_s->cputm = scb_o->cputm;
  268. scb_s->ckc = scb_o->ckc;
  269. scb_s->todpr = scb_o->todpr;
  270. scb_s->epoch = scb_o->epoch;
  271. /* guest state */
  272. scb_s->gpsw = scb_o->gpsw;
  273. scb_s->gg14 = scb_o->gg14;
  274. scb_s->gg15 = scb_o->gg15;
  275. memcpy(scb_s->gcr, scb_o->gcr, 128);
  276. scb_s->pp = scb_o->pp;
  277. /* interception / execution handling */
  278. scb_s->gbea = scb_o->gbea;
  279. scb_s->lctl = scb_o->lctl;
  280. scb_s->svcc = scb_o->svcc;
  281. scb_s->ictl = scb_o->ictl;
  282. /*
  283. * SKEY handling functions can't deal with false setting of PTE invalid
  284. * bits. Therefore we cannot provide interpretation and would later
  285. * have to provide own emulation handlers.
  286. */
  287. if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_KSS))
  288. scb_s->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
  289. scb_s->icpua = scb_o->icpua;
  290. if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_SM))
  291. new_mso = READ_ONCE(scb_o->mso) & 0xfffffffffff00000UL;
  292. /* if the hva of the prefix changes, we have to remap the prefix */
  293. if (scb_s->mso != new_mso || scb_s->prefix != new_prefix)
  294. prefix_unmapped(vsie_page);
  295. /* SIE will do mso/msl validity and exception checks for us */
  296. scb_s->msl = scb_o->msl & 0xfffffffffff00000UL;
  297. scb_s->mso = new_mso;
  298. scb_s->prefix = new_prefix;
  299. /* We have to definetly flush the tlb if this scb never ran */
  300. if (scb_s->ihcpu != 0xffffU)
  301. scb_s->ihcpu = scb_o->ihcpu;
  302. /* MVPG and Protection Exception Interpretation are always available */
  303. scb_s->eca |= scb_o->eca & (ECA_MVPGI | ECA_PROTEXCI);
  304. /* Host-protection-interruption introduced with ESOP */
  305. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_ESOP))
  306. scb_s->ecb |= scb_o->ecb & ECB_HOSTPROTINT;
  307. /* transactional execution */
  308. if (test_kvm_facility(vcpu->kvm, 73) && wants_tx) {
  309. /* remap the prefix is tx is toggled on */
  310. if (!had_tx)
  311. prefix_unmapped(vsie_page);
  312. scb_s->ecb |= ECB_TE;
  313. }
  314. /* branch prediction */
  315. if (test_kvm_facility(vcpu->kvm, 82))
  316. scb_s->fpf |= scb_o->fpf & FPF_BPBC;
  317. /* SIMD */
  318. if (test_kvm_facility(vcpu->kvm, 129)) {
  319. scb_s->eca |= scb_o->eca & ECA_VX;
  320. scb_s->ecd |= scb_o->ecd & ECD_HOSTREGMGMT;
  321. }
  322. /* Run-time-Instrumentation */
  323. if (test_kvm_facility(vcpu->kvm, 64))
  324. scb_s->ecb3 |= scb_o->ecb3 & ECB3_RI;
  325. /* Instruction Execution Prevention */
  326. if (test_kvm_facility(vcpu->kvm, 130))
  327. scb_s->ecb2 |= scb_o->ecb2 & ECB2_IEP;
  328. /* Guarded Storage */
  329. if (test_kvm_facility(vcpu->kvm, 133)) {
  330. scb_s->ecb |= scb_o->ecb & ECB_GS;
  331. scb_s->ecd |= scb_o->ecd & ECD_HOSTREGMGMT;
  332. }
  333. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIIF))
  334. scb_s->eca |= scb_o->eca & ECA_SII;
  335. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IB))
  336. scb_s->eca |= scb_o->eca & ECA_IB;
  337. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI))
  338. scb_s->eca |= scb_o->eca & ECA_CEI;
  339. /* Epoch Extension */
  340. if (test_kvm_facility(vcpu->kvm, 139))
  341. scb_s->ecd |= scb_o->ecd & ECD_MEF;
  342. /* etoken */
  343. if (test_kvm_facility(vcpu->kvm, 156))
  344. scb_s->ecd |= scb_o->ecd & ECD_ETOKENF;
  345. prepare_ibc(vcpu, vsie_page);
  346. rc = shadow_crycb(vcpu, vsie_page);
  347. out:
  348. if (rc)
  349. unshadow_scb(vcpu, vsie_page);
  350. return rc;
  351. }
  352. void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
  353. unsigned long end)
  354. {
  355. struct kvm *kvm = gmap->private;
  356. struct vsie_page *cur;
  357. unsigned long prefix;
  358. struct page *page;
  359. int i;
  360. if (!gmap_is_shadow(gmap))
  361. return;
  362. if (start >= 1UL << 31)
  363. /* We are only interested in prefix pages */
  364. return;
  365. /*
  366. * Only new shadow blocks are added to the list during runtime,
  367. * therefore we can safely reference them all the time.
  368. */
  369. for (i = 0; i < kvm->arch.vsie.page_count; i++) {
  370. page = READ_ONCE(kvm->arch.vsie.pages[i]);
  371. if (!page)
  372. continue;
  373. cur = page_to_virt(page);
  374. if (READ_ONCE(cur->gmap) != gmap)
  375. continue;
  376. prefix = cur->scb_s.prefix << GUEST_PREFIX_SHIFT;
  377. /* with mso/msl, the prefix lies at an offset */
  378. prefix += cur->scb_s.mso;
  379. if (prefix <= end && start <= prefix + 2 * PAGE_SIZE - 1)
  380. prefix_unmapped_sync(cur);
  381. }
  382. }
  383. /*
  384. * Map the first prefix page and if tx is enabled also the second prefix page.
  385. *
  386. * The prefix will be protected, a gmap notifier will inform about unmaps.
  387. * The shadow scb must not be executed until the prefix is remapped, this is
  388. * guaranteed by properly handling PROG_REQUEST.
  389. *
  390. * Returns: - 0 on if successfully mapped or already mapped
  391. * - > 0 if control has to be given to guest 2
  392. * - -EAGAIN if the caller can retry immediately
  393. * - -ENOMEM if out of memory
  394. */
  395. static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  396. {
  397. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  398. u64 prefix = scb_s->prefix << GUEST_PREFIX_SHIFT;
  399. int rc;
  400. if (prefix_is_mapped(vsie_page))
  401. return 0;
  402. /* mark it as mapped so we can catch any concurrent unmappers */
  403. prefix_mapped(vsie_page);
  404. /* with mso/msl, the prefix lies at offset *mso* */
  405. prefix += scb_s->mso;
  406. rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);
  407. if (!rc && (scb_s->ecb & ECB_TE))
  408. rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
  409. prefix + PAGE_SIZE);
  410. /*
  411. * We don't have to mprotect, we will be called for all unshadows.
  412. * SIE will detect if protection applies and trigger a validity.
  413. */
  414. if (rc)
  415. prefix_unmapped(vsie_page);
  416. if (rc > 0 || rc == -EFAULT)
  417. rc = set_validity_icpt(scb_s, 0x0037U);
  418. return rc;
  419. }
  420. /*
  421. * Pin the guest page given by gpa and set hpa to the pinned host address.
  422. * Will always be pinned writable.
  423. *
  424. * Returns: - 0 on success
  425. * - -EINVAL if the gpa is not valid guest storage
  426. */
  427. static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa)
  428. {
  429. struct page *page;
  430. page = gfn_to_page(kvm, gpa_to_gfn(gpa));
  431. if (is_error_page(page))
  432. return -EINVAL;
  433. *hpa = (hpa_t) page_to_virt(page) + (gpa & ~PAGE_MASK);
  434. return 0;
  435. }
  436. /* Unpins a page previously pinned via pin_guest_page, marking it as dirty. */
  437. static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa)
  438. {
  439. kvm_release_pfn_dirty(hpa >> PAGE_SHIFT);
  440. /* mark the page always as dirty for migration */
  441. mark_page_dirty(kvm, gpa_to_gfn(gpa));
  442. }
  443. /* unpin all blocks previously pinned by pin_blocks(), marking them dirty */
  444. static void unpin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  445. {
  446. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  447. hpa_t hpa;
  448. hpa = (u64) scb_s->scaoh << 32 | scb_s->scaol;
  449. if (hpa) {
  450. unpin_guest_page(vcpu->kvm, vsie_page->sca_gpa, hpa);
  451. vsie_page->sca_gpa = 0;
  452. scb_s->scaol = 0;
  453. scb_s->scaoh = 0;
  454. }
  455. hpa = scb_s->itdba;
  456. if (hpa) {
  457. unpin_guest_page(vcpu->kvm, vsie_page->itdba_gpa, hpa);
  458. vsie_page->itdba_gpa = 0;
  459. scb_s->itdba = 0;
  460. }
  461. hpa = scb_s->gvrd;
  462. if (hpa) {
  463. unpin_guest_page(vcpu->kvm, vsie_page->gvrd_gpa, hpa);
  464. vsie_page->gvrd_gpa = 0;
  465. scb_s->gvrd = 0;
  466. }
  467. hpa = scb_s->riccbd;
  468. if (hpa) {
  469. unpin_guest_page(vcpu->kvm, vsie_page->riccbd_gpa, hpa);
  470. vsie_page->riccbd_gpa = 0;
  471. scb_s->riccbd = 0;
  472. }
  473. hpa = scb_s->sdnxo;
  474. if (hpa) {
  475. unpin_guest_page(vcpu->kvm, vsie_page->sdnx_gpa, hpa);
  476. vsie_page->sdnx_gpa = 0;
  477. scb_s->sdnxo = 0;
  478. }
  479. }
  480. /*
  481. * Instead of shadowing some blocks, we can simply forward them because the
  482. * addresses in the scb are 64 bit long.
  483. *
  484. * This works as long as the data lies in one page. If blocks ever exceed one
  485. * page, we have to fall back to shadowing.
  486. *
  487. * As we reuse the sca, the vcpu pointers contained in it are invalid. We must
  488. * therefore not enable any facilities that access these pointers (e.g. SIGPIF).
  489. *
  490. * Returns: - 0 if all blocks were pinned.
  491. * - > 0 if control has to be given to guest 2
  492. * - -ENOMEM if out of memory
  493. */
  494. static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  495. {
  496. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  497. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  498. hpa_t hpa;
  499. gpa_t gpa;
  500. int rc = 0;
  501. gpa = READ_ONCE(scb_o->scaol) & ~0xfUL;
  502. if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_64BSCAO))
  503. gpa |= (u64) READ_ONCE(scb_o->scaoh) << 32;
  504. if (gpa) {
  505. if (gpa < 2 * PAGE_SIZE)
  506. rc = set_validity_icpt(scb_s, 0x0038U);
  507. else if ((gpa & ~0x1fffUL) == kvm_s390_get_prefix(vcpu))
  508. rc = set_validity_icpt(scb_s, 0x0011U);
  509. else if ((gpa & PAGE_MASK) !=
  510. ((gpa + sizeof(struct bsca_block) - 1) & PAGE_MASK))
  511. rc = set_validity_icpt(scb_s, 0x003bU);
  512. if (!rc) {
  513. rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
  514. if (rc)
  515. rc = set_validity_icpt(scb_s, 0x0034U);
  516. }
  517. if (rc)
  518. goto unpin;
  519. vsie_page->sca_gpa = gpa;
  520. scb_s->scaoh = (u32)((u64)hpa >> 32);
  521. scb_s->scaol = (u32)(u64)hpa;
  522. }
  523. gpa = READ_ONCE(scb_o->itdba) & ~0xffUL;
  524. if (gpa && (scb_s->ecb & ECB_TE)) {
  525. if (gpa < 2 * PAGE_SIZE) {
  526. rc = set_validity_icpt(scb_s, 0x0080U);
  527. goto unpin;
  528. }
  529. /* 256 bytes cannot cross page boundaries */
  530. rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
  531. if (rc) {
  532. rc = set_validity_icpt(scb_s, 0x0080U);
  533. goto unpin;
  534. }
  535. vsie_page->itdba_gpa = gpa;
  536. scb_s->itdba = hpa;
  537. }
  538. gpa = READ_ONCE(scb_o->gvrd) & ~0x1ffUL;
  539. if (gpa && (scb_s->eca & ECA_VX) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
  540. if (gpa < 2 * PAGE_SIZE) {
  541. rc = set_validity_icpt(scb_s, 0x1310U);
  542. goto unpin;
  543. }
  544. /*
  545. * 512 bytes vector registers cannot cross page boundaries
  546. * if this block gets bigger, we have to shadow it.
  547. */
  548. rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
  549. if (rc) {
  550. rc = set_validity_icpt(scb_s, 0x1310U);
  551. goto unpin;
  552. }
  553. vsie_page->gvrd_gpa = gpa;
  554. scb_s->gvrd = hpa;
  555. }
  556. gpa = READ_ONCE(scb_o->riccbd) & ~0x3fUL;
  557. if (gpa && (scb_s->ecb3 & ECB3_RI)) {
  558. if (gpa < 2 * PAGE_SIZE) {
  559. rc = set_validity_icpt(scb_s, 0x0043U);
  560. goto unpin;
  561. }
  562. /* 64 bytes cannot cross page boundaries */
  563. rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
  564. if (rc) {
  565. rc = set_validity_icpt(scb_s, 0x0043U);
  566. goto unpin;
  567. }
  568. /* Validity 0x0044 will be checked by SIE */
  569. vsie_page->riccbd_gpa = gpa;
  570. scb_s->riccbd = hpa;
  571. }
  572. if (((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) ||
  573. (scb_s->ecd & ECD_ETOKENF)) {
  574. unsigned long sdnxc;
  575. gpa = READ_ONCE(scb_o->sdnxo) & ~0xfUL;
  576. sdnxc = READ_ONCE(scb_o->sdnxo) & 0xfUL;
  577. if (!gpa || gpa < 2 * PAGE_SIZE) {
  578. rc = set_validity_icpt(scb_s, 0x10b0U);
  579. goto unpin;
  580. }
  581. if (sdnxc < 6 || sdnxc > 12) {
  582. rc = set_validity_icpt(scb_s, 0x10b1U);
  583. goto unpin;
  584. }
  585. if (gpa & ((1 << sdnxc) - 1)) {
  586. rc = set_validity_icpt(scb_s, 0x10b2U);
  587. goto unpin;
  588. }
  589. /* Due to alignment rules (checked above) this cannot
  590. * cross page boundaries
  591. */
  592. rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
  593. if (rc) {
  594. rc = set_validity_icpt(scb_s, 0x10b0U);
  595. goto unpin;
  596. }
  597. vsie_page->sdnx_gpa = gpa;
  598. scb_s->sdnxo = hpa | sdnxc;
  599. }
  600. return 0;
  601. unpin:
  602. unpin_blocks(vcpu, vsie_page);
  603. return rc;
  604. }
  605. /* unpin the scb provided by guest 2, marking it as dirty */
  606. static void unpin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
  607. gpa_t gpa)
  608. {
  609. hpa_t hpa = (hpa_t) vsie_page->scb_o;
  610. if (hpa)
  611. unpin_guest_page(vcpu->kvm, gpa, hpa);
  612. vsie_page->scb_o = NULL;
  613. }
  614. /*
  615. * Pin the scb at gpa provided by guest 2 at vsie_page->scb_o.
  616. *
  617. * Returns: - 0 if the scb was pinned.
  618. * - > 0 if control has to be given to guest 2
  619. */
  620. static int pin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
  621. gpa_t gpa)
  622. {
  623. hpa_t hpa;
  624. int rc;
  625. rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
  626. if (rc) {
  627. rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
  628. WARN_ON_ONCE(rc);
  629. return 1;
  630. }
  631. vsie_page->scb_o = (struct kvm_s390_sie_block *) hpa;
  632. return 0;
  633. }
  634. /*
  635. * Inject a fault into guest 2.
  636. *
  637. * Returns: - > 0 if control has to be given to guest 2
  638. * < 0 if an error occurred during injection.
  639. */
  640. static int inject_fault(struct kvm_vcpu *vcpu, __u16 code, __u64 vaddr,
  641. bool write_flag)
  642. {
  643. struct kvm_s390_pgm_info pgm = {
  644. .code = code,
  645. .trans_exc_code =
  646. /* 0-51: virtual address */
  647. (vaddr & 0xfffffffffffff000UL) |
  648. /* 52-53: store / fetch */
  649. (((unsigned int) !write_flag) + 1) << 10,
  650. /* 62-63: asce id (alway primary == 0) */
  651. .exc_access_id = 0, /* always primary */
  652. .op_access_id = 0, /* not MVPG */
  653. };
  654. int rc;
  655. if (code == PGM_PROTECTION)
  656. pgm.trans_exc_code |= 0x4UL;
  657. rc = kvm_s390_inject_prog_irq(vcpu, &pgm);
  658. return rc ? rc : 1;
  659. }
  660. /*
  661. * Handle a fault during vsie execution on a gmap shadow.
  662. *
  663. * Returns: - 0 if the fault was resolved
  664. * - > 0 if control has to be given to guest 2
  665. * - < 0 if an error occurred
  666. */
  667. static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  668. {
  669. int rc;
  670. if (current->thread.gmap_int_code == PGM_PROTECTION)
  671. /* we can directly forward all protection exceptions */
  672. return inject_fault(vcpu, PGM_PROTECTION,
  673. current->thread.gmap_addr, 1);
  674. rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
  675. current->thread.gmap_addr);
  676. if (rc > 0) {
  677. rc = inject_fault(vcpu, rc,
  678. current->thread.gmap_addr,
  679. current->thread.gmap_write_flag);
  680. if (rc >= 0)
  681. vsie_page->fault_addr = current->thread.gmap_addr;
  682. }
  683. return rc;
  684. }
  685. /*
  686. * Retry the previous fault that required guest 2 intervention. This avoids
  687. * one superfluous SIE re-entry and direct exit.
  688. *
  689. * Will ignore any errors. The next SIE fault will do proper fault handling.
  690. */
  691. static void handle_last_fault(struct kvm_vcpu *vcpu,
  692. struct vsie_page *vsie_page)
  693. {
  694. if (vsie_page->fault_addr)
  695. kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
  696. vsie_page->fault_addr);
  697. vsie_page->fault_addr = 0;
  698. }
  699. static inline void clear_vsie_icpt(struct vsie_page *vsie_page)
  700. {
  701. vsie_page->scb_s.icptcode = 0;
  702. }
  703. /* rewind the psw and clear the vsie icpt, so we can retry execution */
  704. static void retry_vsie_icpt(struct vsie_page *vsie_page)
  705. {
  706. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  707. int ilen = insn_length(scb_s->ipa >> 8);
  708. /* take care of EXECUTE instructions */
  709. if (scb_s->icptstatus & 1) {
  710. ilen = (scb_s->icptstatus >> 4) & 0x6;
  711. if (!ilen)
  712. ilen = 4;
  713. }
  714. scb_s->gpsw.addr = __rewind_psw(scb_s->gpsw, ilen);
  715. clear_vsie_icpt(vsie_page);
  716. }
  717. /*
  718. * Try to shadow + enable the guest 2 provided facility list.
  719. * Retry instruction execution if enabled for and provided by guest 2.
  720. *
  721. * Returns: - 0 if handled (retry or guest 2 icpt)
  722. * - > 0 if control has to be given to guest 2
  723. */
  724. static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  725. {
  726. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  727. __u32 fac = READ_ONCE(vsie_page->scb_o->fac) & 0x7ffffff8U;
  728. if (fac && test_kvm_facility(vcpu->kvm, 7)) {
  729. retry_vsie_icpt(vsie_page);
  730. if (read_guest_real(vcpu, fac, &vsie_page->fac,
  731. sizeof(vsie_page->fac)))
  732. return set_validity_icpt(scb_s, 0x1090U);
  733. scb_s->fac = (__u32)(__u64) &vsie_page->fac;
  734. }
  735. return 0;
  736. }
  737. /*
  738. * Run the vsie on a shadow scb and a shadow gmap, without any further
  739. * sanity checks, handling SIE faults.
  740. *
  741. * Returns: - 0 everything went fine
  742. * - > 0 if control has to be given to guest 2
  743. * - < 0 if an error occurred
  744. */
  745. static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  746. __releases(vcpu->kvm->srcu)
  747. __acquires(vcpu->kvm->srcu)
  748. {
  749. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  750. struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
  751. int guest_bp_isolation;
  752. int rc;
  753. handle_last_fault(vcpu, vsie_page);
  754. if (need_resched())
  755. schedule();
  756. if (test_cpu_flag(CIF_MCCK_PENDING))
  757. s390_handle_mcck();
  758. srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
  759. /* save current guest state of bp isolation override */
  760. guest_bp_isolation = test_thread_flag(TIF_ISOLATE_BP_GUEST);
  761. /*
  762. * The guest is running with BPBC, so we have to force it on for our
  763. * nested guest. This is done by enabling BPBC globally, so the BPBC
  764. * control in the SCB (which the nested guest can modify) is simply
  765. * ignored.
  766. */
  767. if (test_kvm_facility(vcpu->kvm, 82) &&
  768. vcpu->arch.sie_block->fpf & FPF_BPBC)
  769. set_thread_flag(TIF_ISOLATE_BP_GUEST);
  770. local_irq_disable();
  771. guest_enter_irqoff();
  772. local_irq_enable();
  773. rc = sie64a(scb_s, vcpu->run->s.regs.gprs);
  774. local_irq_disable();
  775. guest_exit_irqoff();
  776. local_irq_enable();
  777. /* restore guest state for bp isolation override */
  778. if (!guest_bp_isolation)
  779. clear_thread_flag(TIF_ISOLATE_BP_GUEST);
  780. vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
  781. if (rc == -EINTR) {
  782. VCPU_EVENT(vcpu, 3, "%s", "machine check");
  783. kvm_s390_reinject_machine_check(vcpu, &vsie_page->mcck_info);
  784. return 0;
  785. }
  786. if (rc > 0)
  787. rc = 0; /* we could still have an icpt */
  788. else if (rc == -EFAULT)
  789. return handle_fault(vcpu, vsie_page);
  790. switch (scb_s->icptcode) {
  791. case ICPT_INST:
  792. if (scb_s->ipa == 0xb2b0)
  793. rc = handle_stfle(vcpu, vsie_page);
  794. break;
  795. case ICPT_STOP:
  796. /* stop not requested by g2 - must have been a kick */
  797. if (!(atomic_read(&scb_o->cpuflags) & CPUSTAT_STOP_INT))
  798. clear_vsie_icpt(vsie_page);
  799. break;
  800. case ICPT_VALIDITY:
  801. if ((scb_s->ipa & 0xf000) != 0xf000)
  802. scb_s->ipa += 0x1000;
  803. break;
  804. }
  805. return rc;
  806. }
  807. static void release_gmap_shadow(struct vsie_page *vsie_page)
  808. {
  809. if (vsie_page->gmap)
  810. gmap_put(vsie_page->gmap);
  811. WRITE_ONCE(vsie_page->gmap, NULL);
  812. prefix_unmapped(vsie_page);
  813. }
  814. static int acquire_gmap_shadow(struct kvm_vcpu *vcpu,
  815. struct vsie_page *vsie_page)
  816. {
  817. unsigned long asce;
  818. union ctlreg0 cr0;
  819. struct gmap *gmap;
  820. int edat;
  821. asce = vcpu->arch.sie_block->gcr[1];
  822. cr0.val = vcpu->arch.sie_block->gcr[0];
  823. edat = cr0.edat && test_kvm_facility(vcpu->kvm, 8);
  824. edat += edat && test_kvm_facility(vcpu->kvm, 78);
  825. /*
  826. * ASCE or EDAT could have changed since last icpt, or the gmap
  827. * we're holding has been unshadowed. If the gmap is still valid,
  828. * we can safely reuse it.
  829. */
  830. if (vsie_page->gmap && gmap_shadow_valid(vsie_page->gmap, asce, edat))
  831. return 0;
  832. /* release the old shadow - if any, and mark the prefix as unmapped */
  833. release_gmap_shadow(vsie_page);
  834. gmap = gmap_shadow(vcpu->arch.gmap, asce, edat);
  835. if (IS_ERR(gmap))
  836. return PTR_ERR(gmap);
  837. gmap->private = vcpu->kvm;
  838. WRITE_ONCE(vsie_page->gmap, gmap);
  839. return 0;
  840. }
  841. /*
  842. * Register the shadow scb at the VCPU, e.g. for kicking out of vsie.
  843. */
  844. static void register_shadow_scb(struct kvm_vcpu *vcpu,
  845. struct vsie_page *vsie_page)
  846. {
  847. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  848. WRITE_ONCE(vcpu->arch.vsie_block, &vsie_page->scb_s);
  849. /*
  850. * External calls have to lead to a kick of the vcpu and
  851. * therefore the vsie -> Simulate Wait state.
  852. */
  853. kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT);
  854. /*
  855. * We have to adjust the g3 epoch by the g2 epoch. The epoch will
  856. * automatically be adjusted on tod clock changes via kvm_sync_clock.
  857. */
  858. preempt_disable();
  859. scb_s->epoch += vcpu->kvm->arch.epoch;
  860. if (scb_s->ecd & ECD_MEF) {
  861. scb_s->epdx += vcpu->kvm->arch.epdx;
  862. if (scb_s->epoch < vcpu->kvm->arch.epoch)
  863. scb_s->epdx += 1;
  864. }
  865. preempt_enable();
  866. }
  867. /*
  868. * Unregister a shadow scb from a VCPU.
  869. */
  870. static void unregister_shadow_scb(struct kvm_vcpu *vcpu)
  871. {
  872. kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT);
  873. WRITE_ONCE(vcpu->arch.vsie_block, NULL);
  874. }
  875. /*
  876. * Run the vsie on a shadowed scb, managing the gmap shadow, handling
  877. * prefix pages and faults.
  878. *
  879. * Returns: - 0 if no errors occurred
  880. * - > 0 if control has to be given to guest 2
  881. * - -ENOMEM if out of memory
  882. */
  883. static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  884. {
  885. struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
  886. int rc = 0;
  887. while (1) {
  888. rc = acquire_gmap_shadow(vcpu, vsie_page);
  889. if (!rc)
  890. rc = map_prefix(vcpu, vsie_page);
  891. if (!rc) {
  892. gmap_enable(vsie_page->gmap);
  893. update_intervention_requests(vsie_page);
  894. rc = do_vsie_run(vcpu, vsie_page);
  895. gmap_enable(vcpu->arch.gmap);
  896. }
  897. atomic_andnot(PROG_BLOCK_SIE, &scb_s->prog20);
  898. if (rc == -EAGAIN)
  899. rc = 0;
  900. if (rc || scb_s->icptcode || signal_pending(current) ||
  901. kvm_s390_vcpu_has_irq(vcpu, 0))
  902. break;
  903. }
  904. if (rc == -EFAULT) {
  905. /*
  906. * Addressing exceptions are always presentes as intercepts.
  907. * As addressing exceptions are suppressing and our guest 3 PSW
  908. * points at the responsible instruction, we have to
  909. * forward the PSW and set the ilc. If we can't read guest 3
  910. * instruction, we can use an arbitrary ilc. Let's always use
  911. * ilen = 4 for now, so we can avoid reading in guest 3 virtual
  912. * memory. (we could also fake the shadow so the hardware
  913. * handles it).
  914. */
  915. scb_s->icptcode = ICPT_PROGI;
  916. scb_s->iprcc = PGM_ADDRESSING;
  917. scb_s->pgmilc = 4;
  918. scb_s->gpsw.addr = __rewind_psw(scb_s->gpsw, 4);
  919. }
  920. return rc;
  921. }
  922. /*
  923. * Get or create a vsie page for a scb address.
  924. *
  925. * Returns: - address of a vsie page (cached or new one)
  926. * - NULL if the same scb address is already used by another VCPU
  927. * - ERR_PTR(-ENOMEM) if out of memory
  928. */
  929. static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
  930. {
  931. struct vsie_page *vsie_page;
  932. struct page *page;
  933. int nr_vcpus;
  934. rcu_read_lock();
  935. page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9);
  936. rcu_read_unlock();
  937. if (page) {
  938. if (page_ref_inc_return(page) == 2)
  939. return page_to_virt(page);
  940. page_ref_dec(page);
  941. }
  942. /*
  943. * We want at least #online_vcpus shadows, so every VCPU can execute
  944. * the VSIE in parallel.
  945. */
  946. nr_vcpus = atomic_read(&kvm->online_vcpus);
  947. mutex_lock(&kvm->arch.vsie.mutex);
  948. if (kvm->arch.vsie.page_count < nr_vcpus) {
  949. page = alloc_page(GFP_KERNEL | __GFP_ZERO | GFP_DMA);
  950. if (!page) {
  951. mutex_unlock(&kvm->arch.vsie.mutex);
  952. return ERR_PTR(-ENOMEM);
  953. }
  954. page_ref_inc(page);
  955. kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = page;
  956. kvm->arch.vsie.page_count++;
  957. } else {
  958. /* reuse an existing entry that belongs to nobody */
  959. while (true) {
  960. page = kvm->arch.vsie.pages[kvm->arch.vsie.next];
  961. if (page_ref_inc_return(page) == 2)
  962. break;
  963. page_ref_dec(page);
  964. kvm->arch.vsie.next++;
  965. kvm->arch.vsie.next %= nr_vcpus;
  966. }
  967. radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
  968. }
  969. page->index = addr;
  970. /* double use of the same address */
  971. if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, page)) {
  972. page_ref_dec(page);
  973. mutex_unlock(&kvm->arch.vsie.mutex);
  974. return NULL;
  975. }
  976. mutex_unlock(&kvm->arch.vsie.mutex);
  977. vsie_page = page_to_virt(page);
  978. memset(&vsie_page->scb_s, 0, sizeof(struct kvm_s390_sie_block));
  979. release_gmap_shadow(vsie_page);
  980. vsie_page->fault_addr = 0;
  981. vsie_page->scb_s.ihcpu = 0xffffU;
  982. return vsie_page;
  983. }
  984. /* put a vsie page acquired via get_vsie_page */
  985. static void put_vsie_page(struct kvm *kvm, struct vsie_page *vsie_page)
  986. {
  987. struct page *page = pfn_to_page(__pa(vsie_page) >> PAGE_SHIFT);
  988. page_ref_dec(page);
  989. }
  990. int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
  991. {
  992. struct vsie_page *vsie_page;
  993. unsigned long scb_addr;
  994. int rc;
  995. vcpu->stat.instruction_sie++;
  996. if (!test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIEF2))
  997. return -EOPNOTSUPP;
  998. if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
  999. return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
  1000. BUILD_BUG_ON(sizeof(struct vsie_page) != PAGE_SIZE);
  1001. scb_addr = kvm_s390_get_base_disp_s(vcpu, NULL);
  1002. /* 512 byte alignment */
  1003. if (unlikely(scb_addr & 0x1ffUL))
  1004. return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
  1005. if (signal_pending(current) || kvm_s390_vcpu_has_irq(vcpu, 0))
  1006. return 0;
  1007. vsie_page = get_vsie_page(vcpu->kvm, scb_addr);
  1008. if (IS_ERR(vsie_page))
  1009. return PTR_ERR(vsie_page);
  1010. else if (!vsie_page)
  1011. /* double use of sie control block - simply do nothing */
  1012. return 0;
  1013. rc = pin_scb(vcpu, vsie_page, scb_addr);
  1014. if (rc)
  1015. goto out_put;
  1016. rc = shadow_scb(vcpu, vsie_page);
  1017. if (rc)
  1018. goto out_unpin_scb;
  1019. rc = pin_blocks(vcpu, vsie_page);
  1020. if (rc)
  1021. goto out_unshadow;
  1022. register_shadow_scb(vcpu, vsie_page);
  1023. rc = vsie_run(vcpu, vsie_page);
  1024. unregister_shadow_scb(vcpu);
  1025. unpin_blocks(vcpu, vsie_page);
  1026. out_unshadow:
  1027. unshadow_scb(vcpu, vsie_page);
  1028. out_unpin_scb:
  1029. unpin_scb(vcpu, vsie_page, scb_addr);
  1030. out_put:
  1031. put_vsie_page(vcpu->kvm, vsie_page);
  1032. return rc < 0 ? rc : 0;
  1033. }
  1034. /* Init the vsie data structures. To be called when a vm is initialized. */
  1035. void kvm_s390_vsie_init(struct kvm *kvm)
  1036. {
  1037. mutex_init(&kvm->arch.vsie.mutex);
  1038. INIT_RADIX_TREE(&kvm->arch.vsie.addr_to_page, GFP_KERNEL);
  1039. }
  1040. /* Destroy the vsie data structures. To be called when a vm is destroyed. */
  1041. void kvm_s390_vsie_destroy(struct kvm *kvm)
  1042. {
  1043. struct vsie_page *vsie_page;
  1044. struct page *page;
  1045. int i;
  1046. mutex_lock(&kvm->arch.vsie.mutex);
  1047. for (i = 0; i < kvm->arch.vsie.page_count; i++) {
  1048. page = kvm->arch.vsie.pages[i];
  1049. kvm->arch.vsie.pages[i] = NULL;
  1050. vsie_page = page_to_virt(page);
  1051. release_gmap_shadow(vsie_page);
  1052. /* free the radix tree entry */
  1053. radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
  1054. __free_page(page);
  1055. }
  1056. kvm->arch.vsie.page_count = 0;
  1057. mutex_unlock(&kvm->arch.vsie.mutex);
  1058. }
  1059. void kvm_s390_vsie_kick(struct kvm_vcpu *vcpu)
  1060. {
  1061. struct kvm_s390_sie_block *scb = READ_ONCE(vcpu->arch.vsie_block);
  1062. /*
  1063. * Even if the VCPU lets go of the shadow sie block reference, it is
  1064. * still valid in the cache. So we can safely kick it.
  1065. */
  1066. if (scb) {
  1067. atomic_or(PROG_BLOCK_SIE, &scb->prog20);
  1068. if (scb->prog0c & PROG_IN_SIE)
  1069. atomic_or(CPUSTAT_STOP_INT, &scb->cpuflags);
  1070. }
  1071. }