pmap_32.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995
  1. /*-
  2. * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  3. *
  4. * Copyright (C) 2020 Justin Hibbits
  5. * Copyright (C) 2007-2009 Semihalf, Rafal Jaworowski <raj@semihalf.com>
  6. * Copyright (C) 2006 Semihalf, Marian Balakowicz <m8@semihalf.com>
  7. * All rights reserved.
  8. *
  9. * Redistribution and use in source and binary forms, with or without
  10. * modification, are permitted provided that the following conditions
  11. * are met:
  12. * 1. Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. * 2. Redistributions in binary form must reproduce the above copyright
  15. * notice, this list of conditions and the following disclaimer in the
  16. * documentation and/or other materials provided with the distribution.
  17. *
  18. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  19. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  20. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
  21. * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  22. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
  23. * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  24. * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  25. * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  26. * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  27. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  28. *
  29. * Some hw specific parts of this pmap were derived or influenced
  30. * by NetBSD's ibm4xx pmap module. More generic code is shared with
  31. * a few other pmap modules from the FreeBSD tree.
  32. */
  33. /*
  34. * VM layout notes:
  35. *
  36. * Kernel and user threads run within one common virtual address space
  37. * defined by AS=0.
  38. *
  39. * 32-bit pmap:
  40. * Virtual address space layout:
  41. * -----------------------------
  42. * 0x0000_0000 - 0x7fff_ffff : user process
  43. * 0x8000_0000 - 0xbfff_ffff : pmap_mapdev()-ed area (PCI/PCIE etc.)
  44. * 0xc000_0000 - 0xffff_efff : KVA
  45. */
  46. #include <sys/cdefs.h>
  47. __FBSDID("$FreeBSD$");
  48. #include "opt_ddb.h"
  49. #include "opt_kstack_pages.h"
  50. #include <sys/param.h>
  51. #include <sys/conf.h>
  52. #include <sys/malloc.h>
  53. #include <sys/ktr.h>
  54. #include <sys/proc.h>
  55. #include <sys/user.h>
  56. #include <sys/queue.h>
  57. #include <sys/systm.h>
  58. #include <sys/kernel.h>
  59. #include <sys/kerneldump.h>
  60. #include <sys/linker.h>
  61. #include <sys/msgbuf.h>
  62. #include <sys/lock.h>
  63. #include <sys/mutex.h>
  64. #include <sys/rwlock.h>
  65. #include <sys/sched.h>
  66. #include <sys/smp.h>
  67. #include <sys/vmmeter.h>
  68. #include <vm/vm.h>
  69. #include <vm/vm_page.h>
  70. #include <vm/vm_kern.h>
  71. #include <vm/vm_pageout.h>
  72. #include <vm/vm_extern.h>
  73. #include <vm/vm_object.h>
  74. #include <vm/vm_param.h>
  75. #include <vm/vm_map.h>
  76. #include <vm/vm_pager.h>
  77. #include <vm/vm_phys.h>
  78. #include <vm/vm_pagequeue.h>
  79. #include <vm/uma.h>
  80. #include <machine/_inttypes.h>
  81. #include <machine/cpu.h>
  82. #include <machine/pcb.h>
  83. #include <machine/platform.h>
  84. #include <machine/tlb.h>
  85. #include <machine/spr.h>
  86. #include <machine/md_var.h>
  87. #include <machine/mmuvar.h>
  88. #include <machine/pmap.h>
  89. #include <machine/pte.h>
  90. #include <ddb/ddb.h>
  91. #define PRI0ptrX "08x"
  92. /* Reserved KVA space and mutex for mmu_booke_zero_page. */
  93. static vm_offset_t zero_page_va;
  94. static struct mtx zero_page_mutex;
  95. /* Reserved KVA space and mutex for mmu_booke_copy_page. */
  96. static vm_offset_t copy_page_src_va;
  97. static vm_offset_t copy_page_dst_va;
  98. static struct mtx copy_page_mutex;
  99. static vm_offset_t kernel_ptbl_root;
  100. static unsigned int kernel_ptbls; /* Number of KVA ptbls. */
  101. /**************************************************************************/
  102. /* PMAP */
  103. /**************************************************************************/
  104. #define VM_MAPDEV_BASE ((vm_offset_t)VM_MAXUSER_ADDRESS + PAGE_SIZE)
  105. static void tid_flush(tlbtid_t tid);
  106. static unsigned long ilog2(unsigned long);
  107. /**************************************************************************/
  108. /* Page table management */
  109. /**************************************************************************/
  110. #define PMAP_ROOT_SIZE (sizeof(pte_t**) * PDIR_NENTRIES)
  111. static void ptbl_init(void);
  112. static struct ptbl_buf *ptbl_buf_alloc(void);
  113. static void ptbl_buf_free(struct ptbl_buf *);
  114. static void ptbl_free_pmap_ptbl(pmap_t, pte_t *);
  115. static pte_t *ptbl_alloc(pmap_t, unsigned int, boolean_t);
  116. static void ptbl_free(pmap_t, unsigned int);
  117. static void ptbl_hold(pmap_t, unsigned int);
  118. static int ptbl_unhold(pmap_t, unsigned int);
  119. static vm_paddr_t pte_vatopa(pmap_t, vm_offset_t);
  120. static int pte_enter(pmap_t, vm_page_t, vm_offset_t, uint32_t, boolean_t);
  121. static int pte_remove(pmap_t, vm_offset_t, uint8_t);
  122. static pte_t *pte_find(pmap_t, vm_offset_t);
  123. struct ptbl_buf {
  124. TAILQ_ENTRY(ptbl_buf) link; /* list link */
  125. vm_offset_t kva; /* va of mapping */
  126. };
  127. /* Number of kva ptbl buffers, each covering one ptbl (PTBL_PAGES). */
  128. #define PTBL_BUFS (128 * 16)
  129. /* ptbl free list and a lock used for access synchronization. */
  130. static TAILQ_HEAD(, ptbl_buf) ptbl_buf_freelist;
  131. static struct mtx ptbl_buf_freelist_lock;
  132. /* Base address of kva space allocated fot ptbl bufs. */
  133. static vm_offset_t ptbl_buf_pool_vabase;
  134. /* Pointer to ptbl_buf structures. */
  135. static struct ptbl_buf *ptbl_bufs;
  136. /**************************************************************************/
  137. /* Page table related */
  138. /**************************************************************************/
  139. /* Initialize pool of kva ptbl buffers. */
  140. static void
  141. ptbl_init(void)
  142. {
  143. int i;
  144. CTR3(KTR_PMAP, "%s: s (ptbl_bufs = 0x%08x size 0x%08x)", __func__,
  145. (uint32_t)ptbl_bufs, sizeof(struct ptbl_buf) * PTBL_BUFS);
  146. CTR3(KTR_PMAP, "%s: s (ptbl_buf_pool_vabase = 0x%08x size = 0x%08x)",
  147. __func__, ptbl_buf_pool_vabase, PTBL_BUFS * PTBL_PAGES * PAGE_SIZE);
  148. mtx_init(&ptbl_buf_freelist_lock, "ptbl bufs lock", NULL, MTX_DEF);
  149. TAILQ_INIT(&ptbl_buf_freelist);
  150. for (i = 0; i < PTBL_BUFS; i++) {
  151. ptbl_bufs[i].kva =
  152. ptbl_buf_pool_vabase + i * PTBL_PAGES * PAGE_SIZE;
  153. TAILQ_INSERT_TAIL(&ptbl_buf_freelist, &ptbl_bufs[i], link);
  154. }
  155. }
  156. /* Get a ptbl_buf from the freelist. */
  157. static struct ptbl_buf *
  158. ptbl_buf_alloc(void)
  159. {
  160. struct ptbl_buf *buf;
  161. mtx_lock(&ptbl_buf_freelist_lock);
  162. buf = TAILQ_FIRST(&ptbl_buf_freelist);
  163. if (buf != NULL)
  164. TAILQ_REMOVE(&ptbl_buf_freelist, buf, link);
  165. mtx_unlock(&ptbl_buf_freelist_lock);
  166. CTR2(KTR_PMAP, "%s: buf = %p", __func__, buf);
  167. return (buf);
  168. }
  169. /* Return ptbl buff to free pool. */
  170. static void
  171. ptbl_buf_free(struct ptbl_buf *buf)
  172. {
  173. CTR2(KTR_PMAP, "%s: buf = %p", __func__, buf);
  174. mtx_lock(&ptbl_buf_freelist_lock);
  175. TAILQ_INSERT_TAIL(&ptbl_buf_freelist, buf, link);
  176. mtx_unlock(&ptbl_buf_freelist_lock);
  177. }
  178. /*
  179. * Search the list of allocated ptbl bufs and find on list of allocated ptbls
  180. */
  181. static void
  182. ptbl_free_pmap_ptbl(pmap_t pmap, pte_t *ptbl)
  183. {
  184. struct ptbl_buf *pbuf;
  185. CTR2(KTR_PMAP, "%s: ptbl = %p", __func__, ptbl);
  186. PMAP_LOCK_ASSERT(pmap, MA_OWNED);
  187. TAILQ_FOREACH(pbuf, &pmap->pm_ptbl_list, link)
  188. if (pbuf->kva == (vm_offset_t)ptbl) {
  189. /* Remove from pmap ptbl buf list. */
  190. TAILQ_REMOVE(&pmap->pm_ptbl_list, pbuf, link);
  191. /* Free corresponding ptbl buf. */
  192. ptbl_buf_free(pbuf);
  193. break;
  194. }
  195. }
  196. /* Allocate page table. */
  197. static pte_t *
  198. ptbl_alloc(pmap_t pmap, unsigned int pdir_idx, boolean_t nosleep)
  199. {
  200. vm_page_t mtbl[PTBL_PAGES];
  201. vm_page_t m;
  202. struct ptbl_buf *pbuf;
  203. unsigned int pidx;
  204. pte_t *ptbl;
  205. int i, j;
  206. CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap,
  207. (pmap == kernel_pmap), pdir_idx);
  208. KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)),
  209. ("ptbl_alloc: invalid pdir_idx"));
  210. KASSERT((pmap->pm_pdir[pdir_idx] == NULL),
  211. ("pte_alloc: valid ptbl entry exists!"));
  212. pbuf = ptbl_buf_alloc();
  213. if (pbuf == NULL)
  214. panic("pte_alloc: couldn't alloc kernel virtual memory");
  215. ptbl = (pte_t *)pbuf->kva;
  216. CTR2(KTR_PMAP, "%s: ptbl kva = %p", __func__, ptbl);
  217. for (i = 0; i < PTBL_PAGES; i++) {
  218. pidx = (PTBL_PAGES * pdir_idx) + i;
  219. while ((m = vm_page_alloc(NULL, pidx,
  220. VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
  221. if (nosleep) {
  222. ptbl_free_pmap_ptbl(pmap, ptbl);
  223. for (j = 0; j < i; j++)
  224. vm_page_free(mtbl[j]);
  225. vm_wire_sub(i);
  226. return (NULL);
  227. }
  228. PMAP_UNLOCK(pmap);
  229. rw_wunlock(&pvh_global_lock);
  230. vm_wait(NULL);
  231. rw_wlock(&pvh_global_lock);
  232. PMAP_LOCK(pmap);
  233. }
  234. mtbl[i] = m;
  235. }
  236. /* Map allocated pages into kernel_pmap. */
  237. mmu_booke_qenter((vm_offset_t)ptbl, mtbl, PTBL_PAGES);
  238. /* Zero whole ptbl. */
  239. bzero((caddr_t)ptbl, PTBL_PAGES * PAGE_SIZE);
  240. /* Add pbuf to the pmap ptbl bufs list. */
  241. TAILQ_INSERT_TAIL(&pmap->pm_ptbl_list, pbuf, link);
  242. return (ptbl);
  243. }
  244. /* Free ptbl pages and invalidate pdir entry. */
  245. static void
  246. ptbl_free(pmap_t pmap, unsigned int pdir_idx)
  247. {
  248. pte_t *ptbl;
  249. vm_paddr_t pa;
  250. vm_offset_t va;
  251. vm_page_t m;
  252. int i;
  253. CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap,
  254. (pmap == kernel_pmap), pdir_idx);
  255. KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)),
  256. ("ptbl_free: invalid pdir_idx"));
  257. ptbl = pmap->pm_pdir[pdir_idx];
  258. CTR2(KTR_PMAP, "%s: ptbl = %p", __func__, ptbl);
  259. KASSERT((ptbl != NULL), ("ptbl_free: null ptbl"));
  260. /*
  261. * Invalidate the pdir entry as soon as possible, so that other CPUs
  262. * don't attempt to look up the page tables we are releasing.
  263. */
  264. mtx_lock_spin(&tlbivax_mutex);
  265. tlb_miss_lock();
  266. pmap->pm_pdir[pdir_idx] = NULL;
  267. tlb_miss_unlock();
  268. mtx_unlock_spin(&tlbivax_mutex);
  269. for (i = 0; i < PTBL_PAGES; i++) {
  270. va = ((vm_offset_t)ptbl + (i * PAGE_SIZE));
  271. pa = pte_vatopa(kernel_pmap, va);
  272. m = PHYS_TO_VM_PAGE(pa);
  273. vm_page_free_zero(m);
  274. vm_wire_sub(1);
  275. mmu_booke_kremove(va);
  276. }
  277. ptbl_free_pmap_ptbl(pmap, ptbl);
  278. }
  279. /*
  280. * Decrement ptbl pages hold count and attempt to free ptbl pages.
  281. * Called when removing pte entry from ptbl.
  282. *
  283. * Return 1 if ptbl pages were freed.
  284. */
  285. static int
  286. ptbl_unhold(pmap_t pmap, unsigned int pdir_idx)
  287. {
  288. pte_t *ptbl;
  289. vm_paddr_t pa;
  290. vm_page_t m;
  291. int i;
  292. CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap,
  293. (pmap == kernel_pmap), pdir_idx);
  294. KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)),
  295. ("ptbl_unhold: invalid pdir_idx"));
  296. KASSERT((pmap != kernel_pmap),
  297. ("ptbl_unhold: unholding kernel ptbl!"));
  298. ptbl = pmap->pm_pdir[pdir_idx];
  299. //debugf("ptbl_unhold: ptbl = 0x%08x\n", (u_int32_t)ptbl);
  300. KASSERT(((vm_offset_t)ptbl >= VM_MIN_KERNEL_ADDRESS),
  301. ("ptbl_unhold: non kva ptbl"));
  302. /* decrement hold count */
  303. for (i = 0; i < PTBL_PAGES; i++) {
  304. pa = pte_vatopa(kernel_pmap,
  305. (vm_offset_t)ptbl + (i * PAGE_SIZE));
  306. m = PHYS_TO_VM_PAGE(pa);
  307. m->ref_count--;
  308. }
  309. /*
  310. * Free ptbl pages if there are no pte etries in this ptbl.
  311. * ref_count has the same value for all ptbl pages, so check the last
  312. * page.
  313. */
  314. if (m->ref_count == 0) {
  315. ptbl_free(pmap, pdir_idx);
  316. //debugf("ptbl_unhold: e (freed ptbl)\n");
  317. return (1);
  318. }
  319. return (0);
  320. }
  321. /*
  322. * Increment hold count for ptbl pages. This routine is used when a new pte
  323. * entry is being inserted into the ptbl.
  324. */
  325. static void
  326. ptbl_hold(pmap_t pmap, unsigned int pdir_idx)
  327. {
  328. vm_paddr_t pa;
  329. pte_t *ptbl;
  330. vm_page_t m;
  331. int i;
  332. CTR3(KTR_PMAP, "%s: pmap = %p pdir_idx = %d", __func__, pmap,
  333. pdir_idx);
  334. KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)),
  335. ("ptbl_hold: invalid pdir_idx"));
  336. KASSERT((pmap != kernel_pmap),
  337. ("ptbl_hold: holding kernel ptbl!"));
  338. ptbl = pmap->pm_pdir[pdir_idx];
  339. KASSERT((ptbl != NULL), ("ptbl_hold: null ptbl"));
  340. for (i = 0; i < PTBL_PAGES; i++) {
  341. pa = pte_vatopa(kernel_pmap,
  342. (vm_offset_t)ptbl + (i * PAGE_SIZE));
  343. m = PHYS_TO_VM_PAGE(pa);
  344. m->ref_count++;
  345. }
  346. }
  347. /*
  348. * Clean pte entry, try to free page table page if requested.
  349. *
  350. * Return 1 if ptbl pages were freed, otherwise return 0.
  351. */
  352. static int
  353. pte_remove(pmap_t pmap, vm_offset_t va, uint8_t flags)
  354. {
  355. unsigned int pdir_idx = PDIR_IDX(va);
  356. unsigned int ptbl_idx = PTBL_IDX(va);
  357. vm_page_t m;
  358. pte_t *ptbl;
  359. pte_t *pte;
  360. //int su = (pmap == kernel_pmap);
  361. //debugf("pte_remove: s (su = %d pmap = 0x%08x va = 0x%08x flags = %d)\n",
  362. // su, (u_int32_t)pmap, va, flags);
  363. ptbl = pmap->pm_pdir[pdir_idx];
  364. KASSERT(ptbl, ("pte_remove: null ptbl"));
  365. pte = &ptbl[ptbl_idx];
  366. if (pte == NULL || !PTE_ISVALID(pte))
  367. return (0);
  368. if (PTE_ISWIRED(pte))
  369. pmap->pm_stats.wired_count--;
  370. /* Get vm_page_t for mapped pte. */
  371. m = PHYS_TO_VM_PAGE(PTE_PA(pte));
  372. /* Handle managed entry. */
  373. if (PTE_ISMANAGED(pte)) {
  374. if (PTE_ISMODIFIED(pte))
  375. vm_page_dirty(m);
  376. if (PTE_ISREFERENCED(pte))
  377. vm_page_aflag_set(m, PGA_REFERENCED);
  378. pv_remove(pmap, va, m);
  379. } else if (pmap == kernel_pmap && m && m->md.pv_tracked) {
  380. /*
  381. * Always pv_insert()/pv_remove() on MPC85XX, in case DPAA is
  382. * used. This is needed by the NCSW support code for fast
  383. * VA<->PA translation.
  384. */
  385. pv_remove(pmap, va, m);
  386. if (TAILQ_EMPTY(&m->md.pv_list))
  387. m->md.pv_tracked = false;
  388. }
  389. mtx_lock_spin(&tlbivax_mutex);
  390. tlb_miss_lock();
  391. tlb0_flush_entry(va);
  392. *pte = 0;
  393. tlb_miss_unlock();
  394. mtx_unlock_spin(&tlbivax_mutex);
  395. pmap->pm_stats.resident_count--;
  396. if (flags & PTBL_UNHOLD) {
  397. //debugf("pte_remove: e (unhold)\n");
  398. return (ptbl_unhold(pmap, pdir_idx));
  399. }
  400. //debugf("pte_remove: e\n");
  401. return (0);
  402. }
  403. /*
  404. * Insert PTE for a given page and virtual address.
  405. */
  406. static int
  407. pte_enter(pmap_t pmap, vm_page_t m, vm_offset_t va, uint32_t flags,
  408. boolean_t nosleep)
  409. {
  410. unsigned int pdir_idx = PDIR_IDX(va);
  411. unsigned int ptbl_idx = PTBL_IDX(va);
  412. pte_t *ptbl, *pte, pte_tmp;
  413. CTR4(KTR_PMAP, "%s: su = %d pmap = %p va = %p", __func__,
  414. pmap == kernel_pmap, pmap, va);
  415. /* Get the page table pointer. */
  416. ptbl = pmap->pm_pdir[pdir_idx];
  417. if (ptbl == NULL) {
  418. /* Allocate page table pages. */
  419. ptbl = ptbl_alloc(pmap, pdir_idx, nosleep);
  420. if (ptbl == NULL) {
  421. KASSERT(nosleep, ("nosleep and NULL ptbl"));
  422. return (ENOMEM);
  423. }
  424. pmap->pm_pdir[pdir_idx] = ptbl;
  425. pte = &ptbl[ptbl_idx];
  426. } else {
  427. /*
  428. * Check if there is valid mapping for requested
  429. * va, if there is, remove it.
  430. */
  431. pte = &pmap->pm_pdir[pdir_idx][ptbl_idx];
  432. if (PTE_ISVALID(pte)) {
  433. pte_remove(pmap, va, PTBL_HOLD);
  434. } else {
  435. /*
  436. * pte is not used, increment hold count
  437. * for ptbl pages.
  438. */
  439. if (pmap != kernel_pmap)
  440. ptbl_hold(pmap, pdir_idx);
  441. }
  442. }
  443. /*
  444. * Insert pv_entry into pv_list for mapped page if part of managed
  445. * memory.
  446. */
  447. if ((m->oflags & VPO_UNMANAGED) == 0) {
  448. flags |= PTE_MANAGED;
  449. /* Create and insert pv entry. */
  450. pv_insert(pmap, va, m);
  451. }
  452. pmap->pm_stats.resident_count++;
  453. pte_tmp = PTE_RPN_FROM_PA(VM_PAGE_TO_PHYS(m));
  454. pte_tmp |= (PTE_VALID | flags | PTE_PS_4KB); /* 4KB pages only */
  455. mtx_lock_spin(&tlbivax_mutex);
  456. tlb_miss_lock();
  457. tlb0_flush_entry(va);
  458. *pte = pte_tmp;
  459. tlb_miss_unlock();
  460. mtx_unlock_spin(&tlbivax_mutex);
  461. return (0);
  462. }
  463. /* Return the pa for the given pmap/va. */
  464. static vm_paddr_t
  465. pte_vatopa(pmap_t pmap, vm_offset_t va)
  466. {
  467. vm_paddr_t pa = 0;
  468. pte_t *pte;
  469. pte = pte_find(pmap, va);
  470. if ((pte != NULL) && PTE_ISVALID(pte))
  471. pa = (PTE_PA(pte) | (va & PTE_PA_MASK));
  472. return (pa);
  473. }
  474. /* Get a pointer to a PTE in a page table. */
  475. static pte_t *
  476. pte_find(pmap_t pmap, vm_offset_t va)
  477. {
  478. unsigned int pdir_idx = PDIR_IDX(va);
  479. unsigned int ptbl_idx = PTBL_IDX(va);
  480. KASSERT((pmap != NULL), ("pte_find: invalid pmap"));
  481. if (pmap->pm_pdir[pdir_idx])
  482. return (&(pmap->pm_pdir[pdir_idx][ptbl_idx]));
  483. return (NULL);
  484. }
  485. /* Get a pointer to a PTE in a page table, or the next closest (greater) one. */
  486. static __inline pte_t *
  487. pte_find_next(pmap_t pmap, vm_offset_t *pva)
  488. {
  489. vm_offset_t va;
  490. pte_t **pdir;
  491. pte_t *pte;
  492. unsigned long i, j;
  493. KASSERT((pmap != NULL), ("pte_find: invalid pmap"));
  494. va = *pva;
  495. i = PDIR_IDX(va);
  496. j = PTBL_IDX(va);
  497. pdir = pmap->pm_pdir;
  498. for (; i < PDIR_NENTRIES; i++, j = 0) {
  499. if (pdir[i] == NULL)
  500. continue;
  501. for (; j < PTBL_NENTRIES; j++) {
  502. pte = &pdir[i][j];
  503. if (!PTE_ISVALID(pte))
  504. continue;
  505. *pva = PDIR_SIZE * i + PAGE_SIZE * j;
  506. return (pte);
  507. }
  508. }
  509. return (NULL);
  510. }
  511. /* Set up kernel page tables. */
  512. static void
  513. kernel_pte_alloc(vm_offset_t data_end, vm_offset_t addr)
  514. {
  515. pte_t *pte;
  516. vm_offset_t va;
  517. vm_offset_t pdir_start;
  518. int i;
  519. kptbl_min = VM_MIN_KERNEL_ADDRESS / PDIR_SIZE;
  520. kernel_pmap->pm_pdir = (pte_t **)kernel_ptbl_root;
  521. pdir_start = kernel_ptbl_root + PDIR_NENTRIES * sizeof(pte_t);
  522. /* Initialize kernel pdir */
  523. for (i = 0; i < kernel_ptbls; i++) {
  524. kernel_pmap->pm_pdir[kptbl_min + i] =
  525. (pte_t *)(pdir_start + (i * PAGE_SIZE * PTBL_PAGES));
  526. }
  527. /*
  528. * Fill in PTEs covering kernel code and data. They are not required
  529. * for address translation, as this area is covered by static TLB1
  530. * entries, but for pte_vatopa() to work correctly with kernel area
  531. * addresses.
  532. */
  533. for (va = addr; va < data_end; va += PAGE_SIZE) {
  534. pte = &(kernel_pmap->pm_pdir[PDIR_IDX(va)][PTBL_IDX(va)]);
  535. powerpc_sync();
  536. *pte = PTE_RPN_FROM_PA(kernload + (va - kernstart));
  537. *pte |= PTE_M | PTE_SR | PTE_SW | PTE_SX | PTE_WIRED |
  538. PTE_VALID | PTE_PS_4KB;
  539. }
  540. }
  541. static vm_offset_t
  542. mmu_booke_alloc_kernel_pgtables(vm_offset_t data_end)
  543. {
  544. /* Allocate space for ptbl_bufs. */
  545. ptbl_bufs = (struct ptbl_buf *)data_end;
  546. data_end += sizeof(struct ptbl_buf) * PTBL_BUFS;
  547. debugf(" ptbl_bufs at 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n",
  548. (uintptr_t)ptbl_bufs, data_end);
  549. data_end = round_page(data_end);
  550. kernel_ptbl_root = data_end;
  551. data_end += PDIR_NENTRIES * sizeof(pte_t*);
  552. /* Allocate PTE tables for kernel KVA. */
  553. kernel_ptbls = howmany(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS,
  554. PDIR_SIZE);
  555. data_end += kernel_ptbls * PTBL_PAGES * PAGE_SIZE;
  556. debugf(" kernel ptbls: %d\n", kernel_ptbls);
  557. debugf(" kernel pdir at %#jx end = %#jx\n",
  558. (uintmax_t)kernel_ptbl_root, (uintmax_t)data_end);
  559. return (data_end);
  560. }
  561. /*
  562. * Initialize a preallocated and zeroed pmap structure,
  563. * such as one in a vmspace structure.
  564. */
  565. static int
  566. mmu_booke_pinit(pmap_t pmap)
  567. {
  568. int i;
  569. CTR4(KTR_PMAP, "%s: pmap = %p, proc %d '%s'", __func__, pmap,
  570. curthread->td_proc->p_pid, curthread->td_proc->p_comm);
  571. KASSERT((pmap != kernel_pmap), ("pmap_pinit: initializing kernel_pmap"));
  572. for (i = 0; i < MAXCPU; i++)
  573. pmap->pm_tid[i] = TID_NONE;
  574. CPU_ZERO(&kernel_pmap->pm_active);
  575. bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
  576. pmap->pm_pdir = uma_zalloc(ptbl_root_zone, M_WAITOK);
  577. bzero(pmap->pm_pdir, sizeof(pte_t *) * PDIR_NENTRIES);
  578. TAILQ_INIT(&pmap->pm_ptbl_list);
  579. return (1);
  580. }
  581. /*
  582. * Release any resources held by the given physical map.
  583. * Called when a pmap initialized by mmu_booke_pinit is being released.
  584. * Should only be called if the map contains no valid mappings.
  585. */
  586. static void
  587. mmu_booke_release(pmap_t pmap)
  588. {
  589. KASSERT(pmap->pm_stats.resident_count == 0,
  590. ("pmap_release: pmap resident count %ld != 0",
  591. pmap->pm_stats.resident_count));
  592. uma_zfree(ptbl_root_zone, pmap->pm_pdir);
  593. }
  594. static void
  595. mmu_booke_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
  596. {
  597. pte_t *pte;
  598. vm_paddr_t pa = 0;
  599. int sync_sz, valid;
  600. pmap_t pmap;
  601. vm_page_t m;
  602. vm_offset_t addr;
  603. int active;
  604. rw_wlock(&pvh_global_lock);
  605. pmap = PCPU_GET(curpmap);
  606. active = (pm == kernel_pmap || pm == pmap) ? 1 : 0;
  607. while (sz > 0) {
  608. PMAP_LOCK(pm);
  609. pte = pte_find(pm, va);
  610. valid = (pte != NULL && PTE_ISVALID(pte)) ? 1 : 0;
  611. if (valid)
  612. pa = PTE_PA(pte);
  613. PMAP_UNLOCK(pm);
  614. sync_sz = PAGE_SIZE - (va & PAGE_MASK);
  615. sync_sz = min(sync_sz, sz);
  616. if (valid) {
  617. if (!active) {
  618. /* Create a mapping in the active pmap. */
  619. addr = 0;
  620. m = PHYS_TO_VM_PAGE(pa);
  621. PMAP_LOCK(pmap);
  622. pte_enter(pmap, m, addr,
  623. PTE_SR | PTE_VALID, FALSE);
  624. addr += (va & PAGE_MASK);
  625. __syncicache((void *)addr, sync_sz);
  626. pte_remove(pmap, addr, PTBL_UNHOLD);
  627. PMAP_UNLOCK(pmap);
  628. } else
  629. __syncicache((void *)va, sync_sz);
  630. }
  631. va += sync_sz;
  632. sz -= sync_sz;
  633. }
  634. rw_wunlock(&pvh_global_lock);
  635. }
  636. /*
  637. * mmu_booke_zero_page_area zeros the specified hardware page by
  638. * mapping it into virtual memory and using bzero to clear
  639. * its contents.
  640. *
  641. * off and size must reside within a single page.
  642. */
  643. static void
  644. mmu_booke_zero_page_area(vm_page_t m, int off, int size)
  645. {
  646. vm_offset_t va;
  647. /* XXX KASSERT off and size are within a single page? */
  648. mtx_lock(&zero_page_mutex);
  649. va = zero_page_va;
  650. mmu_booke_kenter(va, VM_PAGE_TO_PHYS(m));
  651. bzero((caddr_t)va + off, size);
  652. mmu_booke_kremove(va);
  653. mtx_unlock(&zero_page_mutex);
  654. }
  655. /*
  656. * mmu_booke_zero_page zeros the specified hardware page.
  657. */
  658. static void
  659. mmu_booke_zero_page(vm_page_t m)
  660. {
  661. vm_offset_t off, va;
  662. va = zero_page_va;
  663. mtx_lock(&zero_page_mutex);
  664. mmu_booke_kenter(va, VM_PAGE_TO_PHYS(m));
  665. for (off = 0; off < PAGE_SIZE; off += cacheline_size)
  666. __asm __volatile("dcbz 0,%0" :: "r"(va + off));
  667. mmu_booke_kremove(va);
  668. mtx_unlock(&zero_page_mutex);
  669. }
  670. /*
  671. * mmu_booke_copy_page copies the specified (machine independent) page by
  672. * mapping the page into virtual memory and using memcopy to copy the page,
  673. * one machine dependent page at a time.
  674. */
  675. static void
  676. mmu_booke_copy_page(vm_page_t sm, vm_page_t dm)
  677. {
  678. vm_offset_t sva, dva;
  679. sva = copy_page_src_va;
  680. dva = copy_page_dst_va;
  681. mtx_lock(&copy_page_mutex);
  682. mmu_booke_kenter(sva, VM_PAGE_TO_PHYS(sm));
  683. mmu_booke_kenter(dva, VM_PAGE_TO_PHYS(dm));
  684. memcpy((caddr_t)dva, (caddr_t)sva, PAGE_SIZE);
  685. mmu_booke_kremove(dva);
  686. mmu_booke_kremove(sva);
  687. mtx_unlock(&copy_page_mutex);
  688. }
  689. static inline void
  690. mmu_booke_copy_pages(vm_page_t *ma, vm_offset_t a_offset,
  691. vm_page_t *mb, vm_offset_t b_offset, int xfersize)
  692. {
  693. void *a_cp, *b_cp;
  694. vm_offset_t a_pg_offset, b_pg_offset;
  695. int cnt;
  696. mtx_lock(&copy_page_mutex);
  697. while (xfersize > 0) {
  698. a_pg_offset = a_offset & PAGE_MASK;
  699. cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
  700. mmu_booke_kenter(copy_page_src_va,
  701. VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT]));
  702. a_cp = (char *)copy_page_src_va + a_pg_offset;
  703. b_pg_offset = b_offset & PAGE_MASK;
  704. cnt = min(cnt, PAGE_SIZE - b_pg_offset);
  705. mmu_booke_kenter(copy_page_dst_va,
  706. VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT]));
  707. b_cp = (char *)copy_page_dst_va + b_pg_offset;
  708. bcopy(a_cp, b_cp, cnt);
  709. mmu_booke_kremove(copy_page_dst_va);
  710. mmu_booke_kremove(copy_page_src_va);
  711. a_offset += cnt;
  712. b_offset += cnt;
  713. xfersize -= cnt;
  714. }
  715. mtx_unlock(&copy_page_mutex);
  716. }
  717. static vm_offset_t
  718. mmu_booke_quick_enter_page(vm_page_t m)
  719. {
  720. vm_paddr_t paddr;
  721. vm_offset_t qaddr;
  722. uint32_t flags;
  723. pte_t *pte;
  724. paddr = VM_PAGE_TO_PHYS(m);
  725. flags = PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID;
  726. flags |= tlb_calc_wimg(paddr, pmap_page_get_memattr(m)) << PTE_MAS2_SHIFT;
  727. flags |= PTE_PS_4KB;
  728. critical_enter();
  729. qaddr = PCPU_GET(qmap_addr);
  730. pte = pte_find(kernel_pmap, qaddr);
  731. KASSERT(*pte == 0, ("mmu_booke_quick_enter_page: PTE busy"));
  732. /*
  733. * XXX: tlbivax is broadcast to other cores, but qaddr should
  734. * not be present in other TLBs. Is there a better instruction
  735. * sequence to use? Or just forget it & use mmu_booke_kenter()...
  736. */
  737. __asm __volatile("tlbivax 0, %0" :: "r"(qaddr & MAS2_EPN_MASK));
  738. __asm __volatile("isync; msync");
  739. *pte = PTE_RPN_FROM_PA(paddr) | flags;
  740. /* Flush the real memory from the instruction cache. */
  741. if ((flags & (PTE_I | PTE_G)) == 0)
  742. __syncicache((void *)qaddr, PAGE_SIZE);
  743. return (qaddr);
  744. }
  745. static void
  746. mmu_booke_quick_remove_page(vm_offset_t addr)
  747. {
  748. pte_t *pte;
  749. pte = pte_find(kernel_pmap, addr);
  750. KASSERT(PCPU_GET(qmap_addr) == addr,
  751. ("mmu_booke_quick_remove_page: invalid address"));
  752. KASSERT(*pte != 0,
  753. ("mmu_booke_quick_remove_page: PTE not in use"));
  754. *pte = 0;
  755. critical_exit();
  756. }
  757. /**************************************************************************/
  758. /* TID handling */
  759. /**************************************************************************/
  760. /*
  761. * Return the largest uint value log such that 2^log <= num.
  762. */
  763. static unsigned long
  764. ilog2(unsigned long num)
  765. {
  766. long lz;
  767. __asm ("cntlzw %0, %1" : "=r" (lz) : "r" (num));
  768. return (31 - lz);
  769. }
  770. /*
  771. * Invalidate all TLB0 entries which match the given TID. Note this is
  772. * dedicated for cases when invalidations should NOT be propagated to other
  773. * CPUs.
  774. */
  775. static void
  776. tid_flush(tlbtid_t tid)
  777. {
  778. register_t msr;
  779. uint32_t mas0, mas1, mas2;
  780. int entry, way;
  781. /* Don't evict kernel translations */
  782. if (tid == TID_KERNEL)
  783. return;
  784. msr = mfmsr();
  785. __asm __volatile("wrteei 0");
  786. /*
  787. * Newer (e500mc and later) have tlbilx, which doesn't broadcast, so use
  788. * it for PID invalidation.
  789. */
  790. switch ((mfpvr() >> 16) & 0xffff) {
  791. case FSL_E500mc:
  792. case FSL_E5500:
  793. case FSL_E6500:
  794. mtspr(SPR_MAS6, tid << MAS6_SPID0_SHIFT);
  795. /* tlbilxpid */
  796. __asm __volatile("isync; .long 0x7c200024; isync; msync");
  797. __asm __volatile("wrtee %0" :: "r"(msr));
  798. return;
  799. }
  800. for (way = 0; way < TLB0_WAYS; way++)
  801. for (entry = 0; entry < TLB0_ENTRIES_PER_WAY; entry++) {
  802. mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way);
  803. mtspr(SPR_MAS0, mas0);
  804. mas2 = entry << MAS2_TLB0_ENTRY_IDX_SHIFT;
  805. mtspr(SPR_MAS2, mas2);
  806. __asm __volatile("isync; tlbre");
  807. mas1 = mfspr(SPR_MAS1);
  808. if (!(mas1 & MAS1_VALID))
  809. continue;
  810. if (((mas1 & MAS1_TID_MASK) >> MAS1_TID_SHIFT) != tid)
  811. continue;
  812. mas1 &= ~MAS1_VALID;
  813. mtspr(SPR_MAS1, mas1);
  814. __asm __volatile("isync; tlbwe; isync; msync");
  815. }
  816. __asm __volatile("wrtee %0" :: "r"(msr));
  817. }