mmu_phyp.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669
  1. /*-
  2. * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  3. *
  4. * Copyright (C) 2010 Andreas Tobler
  5. * All rights reserved.
  6. *
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following conditions
  9. * are met:
  10. * 1. Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * 2. Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in the
  14. * documentation and/or other materials provided with the distribution.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  17. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  18. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  19. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  20. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  21. * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  22. * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  23. * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  24. * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  25. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. #include <sys/cdefs.h>
  28. __FBSDID("$FreeBSD$");
  29. #include <sys/param.h>
  30. #include <sys/kernel.h>
  31. #include <sys/ktr.h>
  32. #include <sys/lock.h>
  33. #include <sys/rmlock.h>
  34. #include <sys/mutex.h>
  35. #include <sys/proc.h>
  36. #include <sys/sysctl.h>
  37. #include <sys/systm.h>
  38. #include <sys/vmmeter.h>
  39. #include <dev/ofw/openfirm.h>
  40. #include <machine/ofw_machdep.h>
  41. #include <vm/vm.h>
  42. #include <vm/vm_param.h>
  43. #include <vm/vm_kern.h>
  44. #include <vm/vm_page.h>
  45. #include <vm/vm_map.h>
  46. #include <vm/vm_object.h>
  47. #include <vm/vm_extern.h>
  48. #include <vm/vm_pageout.h>
  49. #include <vm/uma.h>
  50. #include <powerpc/aim/mmu_oea64.h>
  51. #include "phyp-hvcall.h"
  52. #define MMU_PHYP_DEBUG 0
  53. #define MMU_PHYP_ID "mmu_phyp: "
  54. #if MMU_PHYP_DEBUG
  55. #define dprintf(fmt, ...) printf(fmt, ## __VA_ARGS__)
  56. #define dprintf0(fmt, ...) dprintf(MMU_PHYP_ID fmt, ## __VA_ARGS__)
  57. #else
  58. #define dprintf(fmt, args...) do { ; } while(0)
  59. #define dprintf0(fmt, args...) do { ; } while(0)
  60. #endif
  61. static struct rmlock mphyp_eviction_lock;
  62. /*
  63. * Kernel MMU interface
  64. */
  65. static void mphyp_install(void);
  66. static void mphyp_bootstrap(vm_offset_t kernelstart,
  67. vm_offset_t kernelend);
  68. static void mphyp_cpu_bootstrap(int ap);
  69. static void *mphyp_dump_pmap(void *ctx, void *buf,
  70. u_long *nbytes);
  71. static int64_t mphyp_pte_synch(struct pvo_entry *pvo);
  72. static int64_t mphyp_pte_clear(struct pvo_entry *pvo, uint64_t ptebit);
  73. static int64_t mphyp_pte_unset(struct pvo_entry *pvo);
  74. static int64_t mphyp_pte_insert(struct pvo_entry *pvo);
  75. static int64_t mphyp_pte_unset_sp(struct pvo_entry *pvo);
  76. static int64_t mphyp_pte_insert_sp(struct pvo_entry *pvo);
  77. static int64_t mphyp_pte_replace_sp(struct pvo_entry *pvo);
  78. static struct pmap_funcs mphyp_methods = {
  79. .install = mphyp_install,
  80. .bootstrap = mphyp_bootstrap,
  81. .cpu_bootstrap = mphyp_cpu_bootstrap,
  82. .dumpsys_dump_pmap = mphyp_dump_pmap,
  83. };
  84. static struct moea64_funcs mmu_phyp_funcs = {
  85. .pte_synch = mphyp_pte_synch,
  86. .pte_clear = mphyp_pte_clear,
  87. .pte_unset = mphyp_pte_unset,
  88. .pte_insert = mphyp_pte_insert,
  89. .pte_unset_sp = mphyp_pte_unset_sp,
  90. .pte_insert_sp = mphyp_pte_insert_sp,
  91. .pte_replace_sp = mphyp_pte_replace_sp,
  92. };
  93. MMU_DEF_INHERIT(pseries_mmu, "mmu_phyp", mphyp_methods, oea64_mmu);
  94. static int brokenkvm = 0;
  95. static void
  96. print_kvm_bug_warning(void *data)
  97. {
  98. if (brokenkvm)
  99. printf("WARNING: Running on a broken hypervisor that does "
  100. "not support mandatory H_CLEAR_MOD and H_CLEAR_REF "
  101. "hypercalls. Performance will be suboptimal.\n");
  102. }
  103. SYSINIT(kvmbugwarn1, SI_SUB_COPYRIGHT, SI_ORDER_THIRD + 1,
  104. print_kvm_bug_warning, NULL);
  105. SYSINIT(kvmbugwarn2, SI_SUB_LAST, SI_ORDER_THIRD + 1, print_kvm_bug_warning,
  106. NULL);
  107. static void
  108. mphyp_install()
  109. {
  110. moea64_ops = &mmu_phyp_funcs;
  111. }
  112. static void
  113. mphyp_bootstrap(vm_offset_t kernelstart, vm_offset_t kernelend)
  114. {
  115. uint64_t final_pteg_count = 0;
  116. char buf[8];
  117. uint32_t prop[2];
  118. uint32_t nptlp, shift = 0, slb_encoding = 0;
  119. uint32_t lp_size, lp_encoding;
  120. struct lpte old;
  121. uint64_t vsid;
  122. phandle_t dev, node, root;
  123. int idx, len, res;
  124. bool has_lp;
  125. rm_init(&mphyp_eviction_lock, "pte eviction");
  126. moea64_early_bootstrap(kernelstart, kernelend);
  127. root = OF_peer(0);
  128. dev = OF_child(root);
  129. while (dev != 0) {
  130. res = OF_getprop(dev, "name", buf, sizeof(buf));
  131. if (res > 0 && strcmp(buf, "cpus") == 0)
  132. break;
  133. dev = OF_peer(dev);
  134. }
  135. node = OF_child(dev);
  136. while (node != 0) {
  137. res = OF_getprop(node, "device_type", buf, sizeof(buf));
  138. if (res > 0 && strcmp(buf, "cpu") == 0)
  139. break;
  140. node = OF_peer(node);
  141. }
  142. res = OF_getencprop(node, "ibm,pft-size", prop, sizeof(prop));
  143. if (res <= 0)
  144. panic("mmu_phyp: unknown PFT size");
  145. final_pteg_count = 1 << prop[1];
  146. res = OF_getencprop(node, "ibm,slb-size", prop, sizeof(prop[0]));
  147. if (res > 0)
  148. n_slbs = prop[0];
  149. dprintf0("slb-size=%i\n", n_slbs);
  150. moea64_pteg_count = final_pteg_count / sizeof(struct lpteg);
  151. /* Clear any old page table entries */
  152. for (idx = 0; idx < moea64_pteg_count*8; idx++) {
  153. phyp_pft_hcall(H_READ, 0, idx, 0, 0, &old.pte_hi,
  154. &old.pte_lo, &old.pte_lo);
  155. vsid = (old.pte_hi << (ADDR_API_SHFT64 - ADDR_PIDX_SHFT)) >> 28;
  156. if (vsid == VSID_VRMA || vsid == 0 /* Older VRMA */)
  157. continue;
  158. if (old.pte_hi & LPTE_VALID)
  159. phyp_hcall(H_REMOVE, 0, idx, 0);
  160. }
  161. /*
  162. * Scan the large page size property for PAPR compatible machines.
  163. * See PAPR D.5 Changes to Section 5.1.4, 'CPU Node Properties'
  164. * for the encoding of the property.
  165. */
  166. len = OF_getproplen(node, "ibm,segment-page-sizes");
  167. if (len > 0) {
  168. /*
  169. * We have to use a variable length array on the stack
  170. * since we have very limited stack space.
  171. */
  172. pcell_t arr[len/sizeof(cell_t)];
  173. res = OF_getencprop(node, "ibm,segment-page-sizes", arr,
  174. sizeof(arr));
  175. len /= 4;
  176. idx = 0;
  177. has_lp = false;
  178. while (len > 0) {
  179. shift = arr[idx];
  180. slb_encoding = arr[idx + 1];
  181. nptlp = arr[idx + 2];
  182. dprintf0("Segment Page Size: "
  183. "%uKB, slb_enc=0x%X: {size, encoding}[%u] =",
  184. shift > 10? 1 << (shift-10) : 0,
  185. slb_encoding, nptlp);
  186. idx += 3;
  187. len -= 3;
  188. while (len > 0 && nptlp) {
  189. lp_size = arr[idx];
  190. lp_encoding = arr[idx+1];
  191. dprintf(" {%uKB, 0x%X}",
  192. lp_size > 10? 1 << (lp_size-10) : 0,
  193. lp_encoding);
  194. if (slb_encoding == SLBV_L && lp_encoding == 0)
  195. has_lp = true;
  196. if (slb_encoding == SLB_PGSZ_4K_4K &&
  197. lp_encoding == LP_4K_16M)
  198. moea64_has_lp_4k_16m = true;
  199. idx += 2;
  200. len -= 2;
  201. nptlp--;
  202. }
  203. dprintf("\n");
  204. if (has_lp && moea64_has_lp_4k_16m)
  205. break;
  206. }
  207. if (has_lp) {
  208. moea64_large_page_shift = shift;
  209. moea64_large_page_size = 1ULL << lp_size;
  210. moea64_large_page_mask = moea64_large_page_size - 1;
  211. hw_direct_map = 1;
  212. printf(MMU_PHYP_ID
  213. "Support for hugepages of %uKB detected\n",
  214. moea64_large_page_shift > 10?
  215. 1 << (moea64_large_page_shift-10) : 0);
  216. } else {
  217. moea64_large_page_size = 0;
  218. moea64_large_page_shift = 0;
  219. moea64_large_page_mask = 0;
  220. hw_direct_map = 0;
  221. printf(MMU_PHYP_ID
  222. "Support for hugepages not found\n");
  223. }
  224. }
  225. moea64_mid_bootstrap(kernelstart, kernelend);
  226. moea64_late_bootstrap(kernelstart, kernelend);
  227. /* Test for broken versions of KVM that don't conform to the spec */
  228. if (phyp_hcall(H_CLEAR_MOD, 0, 0) == H_FUNCTION)
  229. brokenkvm = 1;
  230. }
  231. static void
  232. mphyp_cpu_bootstrap(int ap)
  233. {
  234. struct slb *slb = PCPU_GET(aim.slb);
  235. register_t seg0;
  236. int i;
  237. /*
  238. * Install kernel SLB entries
  239. */
  240. __asm __volatile ("slbia");
  241. __asm __volatile ("slbmfee %0,%1; slbie %0;" : "=r"(seg0) : "r"(0));
  242. for (i = 0; i < 64; i++) {
  243. if (!(slb[i].slbe & SLBE_VALID))
  244. continue;
  245. __asm __volatile ("slbmte %0, %1" ::
  246. "r"(slb[i].slbv), "r"(slb[i].slbe));
  247. }
  248. }
  249. static int64_t
  250. mphyp_pte_synch(struct pvo_entry *pvo)
  251. {
  252. struct lpte pte;
  253. uint64_t junk;
  254. __asm __volatile("ptesync");
  255. phyp_pft_hcall(H_READ, 0, pvo->pvo_pte.slot, 0, 0, &pte.pte_hi,
  256. &pte.pte_lo, &junk);
  257. if ((pte.pte_hi & LPTE_AVPN_MASK) !=
  258. ((pvo->pvo_vpn >> (ADDR_API_SHFT64 - ADDR_PIDX_SHFT)) &
  259. LPTE_AVPN_MASK))
  260. return (-1);
  261. if (!(pte.pte_hi & LPTE_VALID))
  262. return (-1);
  263. return (pte.pte_lo & (LPTE_CHG | LPTE_REF));
  264. }
  265. static int64_t
  266. mphyp_pte_clear(struct pvo_entry *pvo, uint64_t ptebit)
  267. {
  268. struct rm_priotracker track;
  269. int64_t refchg;
  270. uint64_t ptelo, junk;
  271. int err;
  272. /*
  273. * This involves two steps (synch and clear) so we need the entry
  274. * not to change in the middle. We are protected against deliberate
  275. * unset by virtue of holding the pmap lock. Protection against
  276. * incidental unset (page table eviction) comes from holding the
  277. * shared eviction lock.
  278. */
  279. PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
  280. rm_rlock(&mphyp_eviction_lock, &track);
  281. refchg = mphyp_pte_synch(pvo);
  282. if (refchg < 0) {
  283. rm_runlock(&mphyp_eviction_lock, &track);
  284. return (refchg);
  285. }
  286. if (brokenkvm) {
  287. /*
  288. * No way to clear either bit, which is total madness.
  289. * Pessimistically claim that, once modified, it stays so
  290. * forever and that it is never referenced.
  291. */
  292. rm_runlock(&mphyp_eviction_lock, &track);
  293. return (refchg & ~LPTE_REF);
  294. }
  295. if (ptebit & LPTE_CHG) {
  296. err = phyp_pft_hcall(H_CLEAR_MOD, 0, pvo->pvo_pte.slot, 0, 0,
  297. &ptelo, &junk, &junk);
  298. KASSERT(err == H_SUCCESS,
  299. ("Error clearing page change bit: %d", err));
  300. refchg |= (ptelo & LPTE_CHG);
  301. }
  302. if (ptebit & LPTE_REF) {
  303. err = phyp_pft_hcall(H_CLEAR_REF, 0, pvo->pvo_pte.slot, 0, 0,
  304. &ptelo, &junk, &junk);
  305. KASSERT(err == H_SUCCESS,
  306. ("Error clearing page reference bit: %d", err));
  307. refchg |= (ptelo & LPTE_REF);
  308. }
  309. rm_runlock(&mphyp_eviction_lock, &track);
  310. return (refchg);
  311. }
  312. static int64_t
  313. mphyp_pte_unset(struct pvo_entry *pvo)
  314. {
  315. struct lpte pte;
  316. uint64_t junk;
  317. int err;
  318. PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
  319. moea64_pte_from_pvo(pvo, &pte);
  320. err = phyp_pft_hcall(H_REMOVE, H_AVPN, pvo->pvo_pte.slot,
  321. pte.pte_hi & LPTE_AVPN_MASK, 0, &pte.pte_hi, &pte.pte_lo,
  322. &junk);
  323. KASSERT(err == H_SUCCESS || err == H_NOT_FOUND,
  324. ("Error removing page: %d", err));
  325. if (err == H_NOT_FOUND) {
  326. STAT_MOEA64(moea64_pte_overflow--);
  327. return (-1);
  328. }
  329. return (pte.pte_lo & (LPTE_REF | LPTE_CHG));
  330. }
  331. static uintptr_t
  332. mphyp_pte_spillable_ident(uintptr_t ptegbase, struct lpte *to_evict)
  333. {
  334. uint64_t slot, junk, k;
  335. struct lpte pt;
  336. int i, j;
  337. /* Start at a random slot */
  338. i = mftb() % 8;
  339. k = -1;
  340. for (j = 0; j < 8; j++) {
  341. slot = ptegbase + (i + j) % 8;
  342. phyp_pft_hcall(H_READ, 0, slot, 0, 0, &pt.pte_hi,
  343. &pt.pte_lo, &junk);
  344. if ((pt.pte_hi & (LPTE_WIRED | LPTE_BIG)) != 0)
  345. continue;
  346. /* This is a candidate, so remember it */
  347. k = slot;
  348. /* Try to get a page that has not been used lately */
  349. if (!(pt.pte_hi & LPTE_VALID) || !(pt.pte_lo & LPTE_REF)) {
  350. memcpy(to_evict, &pt, sizeof(struct lpte));
  351. return (k);
  352. }
  353. }
  354. if (k == -1)
  355. return (k);
  356. phyp_pft_hcall(H_READ, 0, k, 0, 0, &to_evict->pte_hi,
  357. &to_evict->pte_lo, &junk);
  358. return (k);
  359. }
  360. static __inline int64_t
  361. mphyp_pte_insert_locked(struct pvo_entry *pvo, struct lpte *pte)
  362. {
  363. struct lpte evicted;
  364. uint64_t index, junk;
  365. int64_t result;
  366. /*
  367. * First try primary hash.
  368. */
  369. pvo->pvo_pte.slot &= ~7UL; /* Base slot address */
  370. result = phyp_pft_hcall(H_ENTER, 0, pvo->pvo_pte.slot, pte->pte_hi,
  371. pte->pte_lo, &index, &evicted.pte_lo, &junk);
  372. if (result == H_SUCCESS) {
  373. pvo->pvo_pte.slot = index;
  374. return (0);
  375. }
  376. KASSERT(result == H_PTEG_FULL, ("Page insertion error: %ld "
  377. "(ptegidx: %#zx/%#lx, PTE %#lx/%#lx", result, pvo->pvo_pte.slot,
  378. moea64_pteg_count, pte->pte_hi, pte->pte_lo));
  379. /*
  380. * Next try secondary hash.
  381. */
  382. pvo->pvo_vaddr ^= PVO_HID;
  383. pte->pte_hi ^= LPTE_HID;
  384. pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3);
  385. result = phyp_pft_hcall(H_ENTER, 0, pvo->pvo_pte.slot,
  386. pte->pte_hi, pte->pte_lo, &index, &evicted.pte_lo, &junk);
  387. if (result == H_SUCCESS) {
  388. pvo->pvo_pte.slot = index;
  389. return (0);
  390. }
  391. KASSERT(result == H_PTEG_FULL, ("Secondary page insertion error: %ld",
  392. result));
  393. return (-1);
  394. }
  395. static __inline int64_t
  396. mphyp_pte_evict_and_insert_locked(struct pvo_entry *pvo, struct lpte *pte)
  397. {
  398. struct lpte evicted;
  399. uint64_t index, junk, lastptelo;
  400. int64_t result;
  401. evicted.pte_hi = 0;
  402. index = mphyp_pte_spillable_ident(pvo->pvo_pte.slot, &evicted);
  403. if (index == -1L) {
  404. /* Try other hash table? */
  405. pvo->pvo_vaddr ^= PVO_HID;
  406. pte->pte_hi ^= LPTE_HID;
  407. pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3);
  408. index = mphyp_pte_spillable_ident(pvo->pvo_pte.slot, &evicted);
  409. }
  410. if (index == -1L) {
  411. /* No freeable slots in either PTEG? We're hosed. */
  412. rm_wunlock(&mphyp_eviction_lock);
  413. panic("mphyp_pte_insert: overflow");
  414. return (-1);
  415. }
  416. /* Victim acquired: update page before waving goodbye */
  417. if (evicted.pte_hi & LPTE_VALID) {
  418. result = phyp_pft_hcall(H_REMOVE, H_AVPN, index,
  419. evicted.pte_hi & LPTE_AVPN_MASK, 0, &junk, &lastptelo,
  420. &junk);
  421. STAT_MOEA64(moea64_pte_overflow++);
  422. KASSERT(result == H_SUCCESS || result == H_NOT_FOUND,
  423. ("Error evicting page: %d", (int)result));
  424. }
  425. /*
  426. * Set the new PTE.
  427. */
  428. result = phyp_pft_hcall(H_ENTER, H_EXACT, index, pte->pte_hi,
  429. pte->pte_lo, &index, &evicted.pte_lo, &junk);
  430. pvo->pvo_pte.slot = index;
  431. if (result == H_SUCCESS)
  432. return (0);
  433. rm_wunlock(&mphyp_eviction_lock);
  434. panic("Page replacement error: %ld", result);
  435. return (result);
  436. }
  437. static int64_t
  438. mphyp_pte_insert(struct pvo_entry *pvo)
  439. {
  440. struct rm_priotracker track;
  441. int64_t ret;
  442. struct lpte pte;
  443. PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
  444. /* Initialize PTE */
  445. moea64_pte_from_pvo(pvo, &pte);
  446. /* Make sure further insertion is locked out during evictions */
  447. rm_rlock(&mphyp_eviction_lock, &track);
  448. ret = mphyp_pte_insert_locked(pvo, &pte);
  449. rm_runlock(&mphyp_eviction_lock, &track);
  450. if (ret == -1) {
  451. /*
  452. * Out of luck. Find a PTE to sacrifice.
  453. */
  454. /* Lock out all insertions for a bit */
  455. rm_wlock(&mphyp_eviction_lock);
  456. ret = mphyp_pte_evict_and_insert_locked(pvo, &pte);
  457. rm_wunlock(&mphyp_eviction_lock); /* All clear */
  458. }
  459. return (ret);
  460. }
  461. static void *
  462. mphyp_dump_pmap(void *ctx, void *buf, u_long *nbytes)
  463. {
  464. struct dump_context *dctx;
  465. struct lpte p, *pbuf;
  466. int bufidx;
  467. uint64_t junk;
  468. u_long ptex, ptex_end;
  469. dctx = (struct dump_context *)ctx;
  470. pbuf = (struct lpte *)buf;
  471. bufidx = 0;
  472. ptex = dctx->ptex;
  473. ptex_end = ptex + dctx->blksz / sizeof(struct lpte);
  474. ptex_end = MIN(ptex_end, dctx->ptex_end);
  475. *nbytes = (ptex_end - ptex) * sizeof(struct lpte);
  476. if (*nbytes == 0)
  477. return (NULL);
  478. for (; ptex < ptex_end; ptex++) {
  479. phyp_pft_hcall(H_READ, 0, ptex, 0, 0,
  480. &p.pte_hi, &p.pte_lo, &junk);
  481. pbuf[bufidx++] = p;
  482. }
  483. dctx->ptex = ptex;
  484. return (buf);
  485. }
  486. static int64_t
  487. mphyp_pte_unset_sp(struct pvo_entry *pvo)
  488. {
  489. struct lpte pte;
  490. uint64_t junk, refchg;
  491. int err;
  492. vm_offset_t eva;
  493. pmap_t pm;
  494. pm = pvo->pvo_pmap;
  495. PMAP_LOCK_ASSERT(pm, MA_OWNED);
  496. KASSERT((PVO_VADDR(pvo) & HPT_SP_MASK) == 0,
  497. ("%s: va %#jx unaligned", __func__, (uintmax_t)PVO_VADDR(pvo)));
  498. refchg = 0;
  499. eva = PVO_VADDR(pvo) + HPT_SP_SIZE;
  500. for (; pvo != NULL && PVO_VADDR(pvo) < eva;
  501. pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
  502. moea64_pte_from_pvo(pvo, &pte);
  503. err = phyp_pft_hcall(H_REMOVE, H_AVPN, pvo->pvo_pte.slot,
  504. pte.pte_hi & LPTE_AVPN_MASK, 0, &pte.pte_hi, &pte.pte_lo,
  505. &junk);
  506. KASSERT(err == H_SUCCESS || err == H_NOT_FOUND,
  507. ("Error removing page: %d", err));
  508. if (err == H_NOT_FOUND)
  509. STAT_MOEA64(moea64_pte_overflow--);
  510. refchg |= pte.pte_lo & (LPTE_REF | LPTE_CHG);
  511. }
  512. return (refchg);
  513. }
  514. static int64_t
  515. mphyp_pte_insert_sp(struct pvo_entry *pvo)
  516. {
  517. struct rm_priotracker track;
  518. int64_t ret;
  519. struct lpte pte;
  520. vm_offset_t eva;
  521. pmap_t pm;
  522. pm = pvo->pvo_pmap;
  523. PMAP_LOCK_ASSERT(pm, MA_OWNED);
  524. KASSERT((PVO_VADDR(pvo) & HPT_SP_MASK) == 0,
  525. ("%s: va %#jx unaligned", __func__, (uintmax_t)PVO_VADDR(pvo)));
  526. eva = PVO_VADDR(pvo) + HPT_SP_SIZE;
  527. /* Make sure further insertion is locked out during evictions */
  528. rm_rlock(&mphyp_eviction_lock, &track);
  529. for (; pvo != NULL && PVO_VADDR(pvo) < eva;
  530. pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
  531. /* Initialize PTE */
  532. moea64_pte_from_pvo(pvo, &pte);
  533. ret = mphyp_pte_insert_locked(pvo, &pte);
  534. if (ret == -1) {
  535. /*
  536. * Out of luck. Find a PTE to sacrifice.
  537. */
  538. /* Lock out all insertions for a bit */
  539. rm_runlock(&mphyp_eviction_lock, &track);
  540. rm_wlock(&mphyp_eviction_lock);
  541. mphyp_pte_evict_and_insert_locked(pvo, &pte);
  542. rm_wunlock(&mphyp_eviction_lock); /* All clear */
  543. rm_rlock(&mphyp_eviction_lock, &track);
  544. }
  545. }
  546. rm_runlock(&mphyp_eviction_lock, &track);
  547. return (0);
  548. }
  549. static int64_t
  550. mphyp_pte_replace_sp(struct pvo_entry *pvo)
  551. {
  552. int64_t refchg;
  553. refchg = mphyp_pte_unset_sp(pvo);
  554. mphyp_pte_insert_sp(pvo);
  555. return (refchg);
  556. }