pmap.c 78 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109
  1. /*-
  2. * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  3. *
  4. * Copyright (C) 2007-2009 Semihalf, Rafal Jaworowski <raj@semihalf.com>
  5. * Copyright (C) 2006 Semihalf, Marian Balakowicz <m8@semihalf.com>
  6. * All rights reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. * 1. Redistributions of source code must retain the above copyright
  12. * notice, this list of conditions and the following disclaimer.
  13. * 2. Redistributions in binary form must reproduce the above copyright
  14. * notice, this list of conditions and the following disclaimer in the
  15. * documentation and/or other materials provided with the distribution.
  16. *
  17. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  18. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  19. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
  20. * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  21. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
  22. * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  23. * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  24. * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  25. * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  26. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27. *
  28. * Some hw specific parts of this pmap were derived or influenced
  29. * by NetBSD's ibm4xx pmap module. More generic code is shared with
  30. * a few other pmap modules from the FreeBSD tree.
  31. */
  32. /*
  33. * VM layout notes:
  34. *
  35. * Kernel and user threads run within one common virtual address space
  36. * defined by AS=0.
  37. *
  38. * 32-bit pmap:
  39. * Virtual address space layout:
  40. * -----------------------------
  41. * 0x0000_0000 - 0x7fff_ffff : user process
  42. * 0x8000_0000 - 0xbfff_ffff : pmap_mapdev()-ed area (PCI/PCIE etc.)
  43. * 0xc000_0000 - 0xc0ff_ffff : kernel reserved
  44. * 0xc000_0000 - data_end : kernel code+data, env, metadata etc.
  45. * 0xc100_0000 - 0xffff_ffff : KVA
  46. * 0xc100_0000 - 0xc100_3fff : reserved for page zero/copy
  47. * 0xc100_4000 - 0xc200_3fff : reserved for ptbl bufs
  48. * 0xc200_4000 - 0xc200_8fff : guard page + kstack0
  49. * 0xc200_9000 - 0xfeef_ffff : actual free KVA space
  50. *
  51. * 64-bit pmap:
  52. * Virtual address space layout:
  53. * -----------------------------
  54. * 0x0000_0000_0000_0000 - 0xbfff_ffff_ffff_ffff : user process
  55. * 0x0000_0000_0000_0000 - 0x8fff_ffff_ffff_ffff : text, data, heap, maps, libraries
  56. * 0x9000_0000_0000_0000 - 0xafff_ffff_ffff_ffff : mmio region
  57. * 0xb000_0000_0000_0000 - 0xbfff_ffff_ffff_ffff : stack
  58. * 0xc000_0000_0000_0000 - 0xcfff_ffff_ffff_ffff : kernel reserved
  59. * 0xc000_0000_0000_0000 - endkernel-1 : kernel code & data
  60. * endkernel - msgbufp-1 : flat device tree
  61. * msgbufp - kernel_pdir-1 : message buffer
  62. * kernel_pdir - kernel_pp2d-1 : kernel page directory
  63. * kernel_pp2d - . : kernel pointers to page directory
  64. * pmap_zero_copy_min - crashdumpmap-1 : reserved for page zero/copy
  65. * crashdumpmap - ptbl_buf_pool_vabase-1 : reserved for ptbl bufs
  66. * ptbl_buf_pool_vabase - virtual_avail-1 : user page directories and page tables
  67. * virtual_avail - 0xcfff_ffff_ffff_ffff : actual free KVA space
  68. * 0xd000_0000_0000_0000 - 0xdfff_ffff_ffff_ffff : coprocessor region
  69. * 0xe000_0000_0000_0000 - 0xefff_ffff_ffff_ffff : mmio region
  70. * 0xf000_0000_0000_0000 - 0xffff_ffff_ffff_ffff : direct map
  71. * 0xf000_0000_0000_0000 - +Maxmem : physmem map
  72. * - 0xffff_ffff_ffff_ffff : device direct map
  73. */
  74. #include <sys/cdefs.h>
  75. __FBSDID("$FreeBSD$");
  76. #include "opt_ddb.h"
  77. #include "opt_kstack_pages.h"
  78. #include <sys/param.h>
  79. #include <sys/conf.h>
  80. #include <sys/malloc.h>
  81. #include <sys/ktr.h>
  82. #include <sys/proc.h>
  83. #include <sys/user.h>
  84. #include <sys/queue.h>
  85. #include <sys/systm.h>
  86. #include <sys/kernel.h>
  87. #include <sys/kerneldump.h>
  88. #include <sys/linker.h>
  89. #include <sys/msgbuf.h>
  90. #include <sys/lock.h>
  91. #include <sys/mutex.h>
  92. #include <sys/rwlock.h>
  93. #include <sys/sched.h>
  94. #include <sys/smp.h>
  95. #include <sys/vmmeter.h>
  96. #include <vm/vm.h>
  97. #include <vm/vm_param.h>
  98. #include <vm/vm_page.h>
  99. #include <vm/vm_kern.h>
  100. #include <vm/vm_pageout.h>
  101. #include <vm/vm_extern.h>
  102. #include <vm/vm_object.h>
  103. #include <vm/vm_map.h>
  104. #include <vm/vm_pager.h>
  105. #include <vm/vm_phys.h>
  106. #include <vm/vm_pagequeue.h>
  107. #include <vm/vm_dumpset.h>
  108. #include <vm/uma.h>
  109. #include <machine/_inttypes.h>
  110. #include <machine/cpu.h>
  111. #include <machine/pcb.h>
  112. #include <machine/platform.h>
  113. #include <machine/tlb.h>
  114. #include <machine/spr.h>
  115. #include <machine/md_var.h>
  116. #include <machine/mmuvar.h>
  117. #include <machine/pmap.h>
  118. #include <machine/pte.h>
  119. #include <ddb/ddb.h>
  120. #define SPARSE_MAPDEV
  121. /* Use power-of-two mappings in mmu_booke_mapdev(), to save entries. */
  122. #define POW2_MAPPINGS
  123. #ifdef DEBUG
  124. #define debugf(fmt, args...) printf(fmt, ##args)
  125. #else
  126. #define debugf(fmt, args...)
  127. #endif
  128. #ifdef __powerpc64__
  129. #define PRI0ptrX "016lx"
  130. #else
  131. #define PRI0ptrX "08x"
  132. #endif
  133. #define TODO panic("%s: not implemented", __func__);
  134. extern unsigned char _etext[];
  135. extern unsigned char _end[];
  136. extern uint32_t *bootinfo;
  137. vm_paddr_t kernload;
  138. vm_offset_t kernstart;
  139. vm_size_t kernsize;
  140. /* Message buffer and tables. */
  141. static vm_offset_t data_start;
  142. static vm_size_t data_end;
  143. /* Phys/avail memory regions. */
  144. static struct mem_region *availmem_regions;
  145. static int availmem_regions_sz;
  146. static struct mem_region *physmem_regions;
  147. static int physmem_regions_sz;
  148. #ifndef __powerpc64__
  149. /* Reserved KVA space and mutex for mmu_booke_zero_page. */
  150. static vm_offset_t zero_page_va;
  151. static struct mtx zero_page_mutex;
  152. /* Reserved KVA space and mutex for mmu_booke_copy_page. */
  153. static vm_offset_t copy_page_src_va;
  154. static vm_offset_t copy_page_dst_va;
  155. static struct mtx copy_page_mutex;
  156. #endif
  157. static struct mtx tlbivax_mutex;
  158. /**************************************************************************/
  159. /* PMAP */
  160. /**************************************************************************/
  161. static int mmu_booke_enter_locked(pmap_t, vm_offset_t, vm_page_t,
  162. vm_prot_t, u_int flags, int8_t psind);
  163. unsigned int kptbl_min; /* Index of the first kernel ptbl. */
  164. static uma_zone_t ptbl_root_zone;
  165. /*
  166. * If user pmap is processed with mmu_booke_remove and the resident count
  167. * drops to 0, there are no more pages to remove, so we need not continue.
  168. */
  169. #define PMAP_REMOVE_DONE(pmap) \
  170. ((pmap) != kernel_pmap && (pmap)->pm_stats.resident_count == 0)
  171. #if defined(COMPAT_FREEBSD32) || !defined(__powerpc64__)
  172. extern int elf32_nxstack;
  173. #endif
  174. /**************************************************************************/
  175. /* TLB and TID handling */
  176. /**************************************************************************/
  177. /* Translation ID busy table */
  178. static volatile pmap_t tidbusy[MAXCPU][TID_MAX + 1];
  179. /*
  180. * TLB0 capabilities (entry, way numbers etc.). These can vary between e500
  181. * core revisions and should be read from h/w registers during early config.
  182. */
  183. uint32_t tlb0_entries;
  184. uint32_t tlb0_ways;
  185. uint32_t tlb0_entries_per_way;
  186. uint32_t tlb1_entries;
  187. #define TLB0_ENTRIES (tlb0_entries)
  188. #define TLB0_WAYS (tlb0_ways)
  189. #define TLB0_ENTRIES_PER_WAY (tlb0_entries_per_way)
  190. #define TLB1_ENTRIES (tlb1_entries)
  191. static tlbtid_t tid_alloc(struct pmap *);
  192. #ifdef DDB
  193. #ifdef __powerpc64__
  194. static void tlb_print_entry(int, uint32_t, uint64_t, uint32_t, uint32_t);
  195. #else
  196. static void tlb_print_entry(int, uint32_t, uint32_t, uint32_t, uint32_t);
  197. #endif
  198. #endif
  199. static void tlb1_read_entry(tlb_entry_t *, unsigned int);
  200. static void tlb1_write_entry(tlb_entry_t *, unsigned int);
  201. static int tlb1_iomapped(int, vm_paddr_t, vm_size_t, vm_offset_t *);
  202. static vm_size_t tlb1_mapin_region(vm_offset_t, vm_paddr_t, vm_size_t, int);
  203. static __inline uint32_t tlb_calc_wimg(vm_paddr_t pa, vm_memattr_t ma);
  204. static vm_size_t tsize2size(unsigned int);
  205. static unsigned int size2tsize(vm_size_t);
  206. static unsigned long ilog2(unsigned long);
  207. static void set_mas4_defaults(void);
  208. static inline void tlb0_flush_entry(vm_offset_t);
  209. static inline unsigned int tlb0_tableidx(vm_offset_t, unsigned int);
  210. /**************************************************************************/
  211. /* Page table management */
  212. /**************************************************************************/
  213. static struct rwlock_padalign pvh_global_lock;
  214. /* Data for the pv entry allocation mechanism */
  215. static uma_zone_t pvzone;
  216. static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
  217. #define PV_ENTRY_ZONE_MIN 2048 /* min pv entries in uma zone */
  218. #ifndef PMAP_SHPGPERPROC
  219. #define PMAP_SHPGPERPROC 200
  220. #endif
  221. static vm_paddr_t pte_vatopa(pmap_t, vm_offset_t);
  222. static int pte_enter(pmap_t, vm_page_t, vm_offset_t, uint32_t, boolean_t);
  223. static int pte_remove(pmap_t, vm_offset_t, uint8_t);
  224. static pte_t *pte_find(pmap_t, vm_offset_t);
  225. static void kernel_pte_alloc(vm_offset_t, vm_offset_t);
  226. static pv_entry_t pv_alloc(void);
  227. static void pv_free(pv_entry_t);
  228. static void pv_insert(pmap_t, vm_offset_t, vm_page_t);
  229. static void pv_remove(pmap_t, vm_offset_t, vm_page_t);
  230. static void booke_pmap_init_qpages(void);
  231. static inline void tlb_miss_lock(void);
  232. static inline void tlb_miss_unlock(void);
  233. #ifdef SMP
  234. extern tlb_entry_t __boot_tlb1[];
  235. void pmap_bootstrap_ap(volatile uint32_t *);
  236. #endif
  237. /*
  238. * Kernel MMU interface
  239. */
  240. static void mmu_booke_clear_modify(vm_page_t);
  241. static void mmu_booke_copy(pmap_t, pmap_t, vm_offset_t,
  242. vm_size_t, vm_offset_t);
  243. static void mmu_booke_copy_page(vm_page_t, vm_page_t);
  244. static void mmu_booke_copy_pages(vm_page_t *,
  245. vm_offset_t, vm_page_t *, vm_offset_t, int);
  246. static int mmu_booke_enter(pmap_t, vm_offset_t, vm_page_t,
  247. vm_prot_t, u_int flags, int8_t psind);
  248. static void mmu_booke_enter_object(pmap_t, vm_offset_t, vm_offset_t,
  249. vm_page_t, vm_prot_t);
  250. static void mmu_booke_enter_quick(pmap_t, vm_offset_t, vm_page_t,
  251. vm_prot_t);
  252. static vm_paddr_t mmu_booke_extract(pmap_t, vm_offset_t);
  253. static vm_page_t mmu_booke_extract_and_hold(pmap_t, vm_offset_t,
  254. vm_prot_t);
  255. static void mmu_booke_init(void);
  256. static boolean_t mmu_booke_is_modified(vm_page_t);
  257. static boolean_t mmu_booke_is_prefaultable(pmap_t, vm_offset_t);
  258. static boolean_t mmu_booke_is_referenced(vm_page_t);
  259. static int mmu_booke_ts_referenced(vm_page_t);
  260. static vm_offset_t mmu_booke_map(vm_offset_t *, vm_paddr_t, vm_paddr_t,
  261. int);
  262. static int mmu_booke_mincore(pmap_t, vm_offset_t,
  263. vm_paddr_t *);
  264. static void mmu_booke_object_init_pt(pmap_t, vm_offset_t,
  265. vm_object_t, vm_pindex_t, vm_size_t);
  266. static boolean_t mmu_booke_page_exists_quick(pmap_t, vm_page_t);
  267. static void mmu_booke_page_init(vm_page_t);
  268. static int mmu_booke_page_wired_mappings(vm_page_t);
  269. static int mmu_booke_pinit(pmap_t);
  270. static void mmu_booke_pinit0(pmap_t);
  271. static void mmu_booke_protect(pmap_t, vm_offset_t, vm_offset_t,
  272. vm_prot_t);
  273. static void mmu_booke_qenter(vm_offset_t, vm_page_t *, int);
  274. static void mmu_booke_qremove(vm_offset_t, int);
  275. static void mmu_booke_release(pmap_t);
  276. static void mmu_booke_remove(pmap_t, vm_offset_t, vm_offset_t);
  277. static void mmu_booke_remove_all(vm_page_t);
  278. static void mmu_booke_remove_write(vm_page_t);
  279. static void mmu_booke_unwire(pmap_t, vm_offset_t, vm_offset_t);
  280. static void mmu_booke_zero_page(vm_page_t);
  281. static void mmu_booke_zero_page_area(vm_page_t, int, int);
  282. static void mmu_booke_activate(struct thread *);
  283. static void mmu_booke_deactivate(struct thread *);
  284. static void mmu_booke_bootstrap(vm_offset_t, vm_offset_t);
  285. static void *mmu_booke_mapdev(vm_paddr_t, vm_size_t);
  286. static void *mmu_booke_mapdev_attr(vm_paddr_t, vm_size_t, vm_memattr_t);
  287. static void mmu_booke_unmapdev(vm_offset_t, vm_size_t);
  288. static vm_paddr_t mmu_booke_kextract(vm_offset_t);
  289. static void mmu_booke_kenter(vm_offset_t, vm_paddr_t);
  290. static void mmu_booke_kenter_attr(vm_offset_t, vm_paddr_t, vm_memattr_t);
  291. static void mmu_booke_kremove(vm_offset_t);
  292. static boolean_t mmu_booke_dev_direct_mapped(vm_paddr_t, vm_size_t);
  293. static void mmu_booke_sync_icache(pmap_t, vm_offset_t,
  294. vm_size_t);
  295. static void mmu_booke_dumpsys_map(vm_paddr_t pa, size_t,
  296. void **);
  297. static void mmu_booke_dumpsys_unmap(vm_paddr_t pa, size_t,
  298. void *);
  299. static void mmu_booke_scan_init(void);
  300. static vm_offset_t mmu_booke_quick_enter_page(vm_page_t m);
  301. static void mmu_booke_quick_remove_page(vm_offset_t addr);
  302. static int mmu_booke_change_attr(vm_offset_t addr,
  303. vm_size_t sz, vm_memattr_t mode);
  304. static int mmu_booke_decode_kernel_ptr(vm_offset_t addr,
  305. int *is_user, vm_offset_t *decoded_addr);
  306. static void mmu_booke_page_array_startup(long);
  307. static boolean_t mmu_booke_page_is_mapped(vm_page_t m);
  308. static struct pmap_funcs mmu_booke_methods = {
  309. /* pmap dispatcher interface */
  310. .clear_modify = mmu_booke_clear_modify,
  311. .copy = mmu_booke_copy,
  312. .copy_page = mmu_booke_copy_page,
  313. .copy_pages = mmu_booke_copy_pages,
  314. .enter = mmu_booke_enter,
  315. .enter_object = mmu_booke_enter_object,
  316. .enter_quick = mmu_booke_enter_quick,
  317. .extract = mmu_booke_extract,
  318. .extract_and_hold = mmu_booke_extract_and_hold,
  319. .init = mmu_booke_init,
  320. .is_modified = mmu_booke_is_modified,
  321. .is_prefaultable = mmu_booke_is_prefaultable,
  322. .is_referenced = mmu_booke_is_referenced,
  323. .ts_referenced = mmu_booke_ts_referenced,
  324. .map = mmu_booke_map,
  325. .mincore = mmu_booke_mincore,
  326. .object_init_pt = mmu_booke_object_init_pt,
  327. .page_exists_quick = mmu_booke_page_exists_quick,
  328. .page_init = mmu_booke_page_init,
  329. .page_wired_mappings = mmu_booke_page_wired_mappings,
  330. .pinit = mmu_booke_pinit,
  331. .pinit0 = mmu_booke_pinit0,
  332. .protect = mmu_booke_protect,
  333. .qenter = mmu_booke_qenter,
  334. .qremove = mmu_booke_qremove,
  335. .release = mmu_booke_release,
  336. .remove = mmu_booke_remove,
  337. .remove_all = mmu_booke_remove_all,
  338. .remove_write = mmu_booke_remove_write,
  339. .sync_icache = mmu_booke_sync_icache,
  340. .unwire = mmu_booke_unwire,
  341. .zero_page = mmu_booke_zero_page,
  342. .zero_page_area = mmu_booke_zero_page_area,
  343. .activate = mmu_booke_activate,
  344. .deactivate = mmu_booke_deactivate,
  345. .quick_enter_page = mmu_booke_quick_enter_page,
  346. .quick_remove_page = mmu_booke_quick_remove_page,
  347. .page_array_startup = mmu_booke_page_array_startup,
  348. .page_is_mapped = mmu_booke_page_is_mapped,
  349. /* Internal interfaces */
  350. .bootstrap = mmu_booke_bootstrap,
  351. .dev_direct_mapped = mmu_booke_dev_direct_mapped,
  352. .mapdev = mmu_booke_mapdev,
  353. .mapdev_attr = mmu_booke_mapdev_attr,
  354. .kenter = mmu_booke_kenter,
  355. .kenter_attr = mmu_booke_kenter_attr,
  356. .kextract = mmu_booke_kextract,
  357. .kremove = mmu_booke_kremove,
  358. .unmapdev = mmu_booke_unmapdev,
  359. .change_attr = mmu_booke_change_attr,
  360. .decode_kernel_ptr = mmu_booke_decode_kernel_ptr,
  361. /* dumpsys() support */
  362. .dumpsys_map_chunk = mmu_booke_dumpsys_map,
  363. .dumpsys_unmap_chunk = mmu_booke_dumpsys_unmap,
  364. .dumpsys_pa_init = mmu_booke_scan_init,
  365. };
  366. MMU_DEF(booke_mmu, MMU_TYPE_BOOKE, mmu_booke_methods);
  367. #ifdef __powerpc64__
  368. #include "pmap_64.c"
  369. #else
  370. #include "pmap_32.c"
  371. #endif
  372. static vm_offset_t tlb1_map_base = VM_MAPDEV_BASE;
  373. static __inline uint32_t
  374. tlb_calc_wimg(vm_paddr_t pa, vm_memattr_t ma)
  375. {
  376. uint32_t attrib;
  377. int i;
  378. if (ma != VM_MEMATTR_DEFAULT) {
  379. switch (ma) {
  380. case VM_MEMATTR_UNCACHEABLE:
  381. return (MAS2_I | MAS2_G);
  382. case VM_MEMATTR_WRITE_COMBINING:
  383. case VM_MEMATTR_WRITE_BACK:
  384. case VM_MEMATTR_PREFETCHABLE:
  385. return (MAS2_I);
  386. case VM_MEMATTR_WRITE_THROUGH:
  387. return (MAS2_W | MAS2_M);
  388. case VM_MEMATTR_CACHEABLE:
  389. return (MAS2_M);
  390. }
  391. }
  392. /*
  393. * Assume the page is cache inhibited and access is guarded unless
  394. * it's in our available memory array.
  395. */
  396. attrib = _TLB_ENTRY_IO;
  397. for (i = 0; i < physmem_regions_sz; i++) {
  398. if ((pa >= physmem_regions[i].mr_start) &&
  399. (pa < (physmem_regions[i].mr_start +
  400. physmem_regions[i].mr_size))) {
  401. attrib = _TLB_ENTRY_MEM;
  402. break;
  403. }
  404. }
  405. return (attrib);
  406. }
  407. static inline void
  408. tlb_miss_lock(void)
  409. {
  410. #ifdef SMP
  411. struct pcpu *pc;
  412. if (!smp_started)
  413. return;
  414. STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
  415. if (pc != pcpup) {
  416. CTR3(KTR_PMAP, "%s: tlb miss LOCK of CPU=%d, "
  417. "tlb_lock=%p", __func__, pc->pc_cpuid, pc->pc_booke.tlb_lock);
  418. KASSERT((pc->pc_cpuid != PCPU_GET(cpuid)),
  419. ("tlb_miss_lock: tried to lock self"));
  420. tlb_lock(pc->pc_booke.tlb_lock);
  421. CTR1(KTR_PMAP, "%s: locked", __func__);
  422. }
  423. }
  424. #endif
  425. }
  426. static inline void
  427. tlb_miss_unlock(void)
  428. {
  429. #ifdef SMP
  430. struct pcpu *pc;
  431. if (!smp_started)
  432. return;
  433. STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
  434. if (pc != pcpup) {
  435. CTR2(KTR_PMAP, "%s: tlb miss UNLOCK of CPU=%d",
  436. __func__, pc->pc_cpuid);
  437. tlb_unlock(pc->pc_booke.tlb_lock);
  438. CTR1(KTR_PMAP, "%s: unlocked", __func__);
  439. }
  440. }
  441. #endif
  442. }
  443. /* Return number of entries in TLB0. */
  444. static __inline void
  445. tlb0_get_tlbconf(void)
  446. {
  447. uint32_t tlb0_cfg;
  448. tlb0_cfg = mfspr(SPR_TLB0CFG);
  449. tlb0_entries = tlb0_cfg & TLBCFG_NENTRY_MASK;
  450. tlb0_ways = (tlb0_cfg & TLBCFG_ASSOC_MASK) >> TLBCFG_ASSOC_SHIFT;
  451. tlb0_entries_per_way = tlb0_entries / tlb0_ways;
  452. }
  453. /* Return number of entries in TLB1. */
  454. static __inline void
  455. tlb1_get_tlbconf(void)
  456. {
  457. uint32_t tlb1_cfg;
  458. tlb1_cfg = mfspr(SPR_TLB1CFG);
  459. tlb1_entries = tlb1_cfg & TLBCFG_NENTRY_MASK;
  460. }
  461. /**************************************************************************/
  462. /* Page table related */
  463. /**************************************************************************/
  464. /* Allocate pv_entry structure. */
  465. pv_entry_t
  466. pv_alloc(void)
  467. {
  468. pv_entry_t pv;
  469. pv_entry_count++;
  470. if (pv_entry_count > pv_entry_high_water)
  471. pagedaemon_wakeup(0); /* XXX powerpc NUMA */
  472. pv = uma_zalloc(pvzone, M_NOWAIT);
  473. return (pv);
  474. }
  475. /* Free pv_entry structure. */
  476. static __inline void
  477. pv_free(pv_entry_t pve)
  478. {
  479. pv_entry_count--;
  480. uma_zfree(pvzone, pve);
  481. }
  482. /* Allocate and initialize pv_entry structure. */
  483. static void
  484. pv_insert(pmap_t pmap, vm_offset_t va, vm_page_t m)
  485. {
  486. pv_entry_t pve;
  487. //int su = (pmap == kernel_pmap);
  488. //debugf("pv_insert: s (su = %d pmap = 0x%08x va = 0x%08x m = 0x%08x)\n", su,
  489. // (u_int32_t)pmap, va, (u_int32_t)m);
  490. pve = pv_alloc();
  491. if (pve == NULL)
  492. panic("pv_insert: no pv entries!");
  493. pve->pv_pmap = pmap;
  494. pve->pv_va = va;
  495. /* add to pv_list */
  496. PMAP_LOCK_ASSERT(pmap, MA_OWNED);
  497. rw_assert(&pvh_global_lock, RA_WLOCKED);
  498. TAILQ_INSERT_TAIL(&m->md.pv_list, pve, pv_link);
  499. //debugf("pv_insert: e\n");
  500. }
  501. /* Destroy pv entry. */
  502. static void
  503. pv_remove(pmap_t pmap, vm_offset_t va, vm_page_t m)
  504. {
  505. pv_entry_t pve;
  506. //int su = (pmap == kernel_pmap);
  507. //debugf("pv_remove: s (su = %d pmap = 0x%08x va = 0x%08x)\n", su, (u_int32_t)pmap, va);
  508. PMAP_LOCK_ASSERT(pmap, MA_OWNED);
  509. rw_assert(&pvh_global_lock, RA_WLOCKED);
  510. /* find pv entry */
  511. TAILQ_FOREACH(pve, &m->md.pv_list, pv_link) {
  512. if ((pmap == pve->pv_pmap) && (va == pve->pv_va)) {
  513. /* remove from pv_list */
  514. TAILQ_REMOVE(&m->md.pv_list, pve, pv_link);
  515. if (TAILQ_EMPTY(&m->md.pv_list))
  516. vm_page_aflag_clear(m, PGA_WRITEABLE);
  517. /* free pv entry struct */
  518. pv_free(pve);
  519. break;
  520. }
  521. }
  522. //debugf("pv_remove: e\n");
  523. }
  524. /**************************************************************************/
  525. /* PMAP related */
  526. /**************************************************************************/
  527. /*
  528. * This is called during booke_init, before the system is really initialized.
  529. */
  530. static void
  531. mmu_booke_bootstrap(vm_offset_t start, vm_offset_t kernelend)
  532. {
  533. vm_paddr_t phys_kernelend;
  534. struct mem_region *mp, *mp1;
  535. int cnt, i, j;
  536. vm_paddr_t s, e, sz;
  537. vm_paddr_t physsz, hwphyssz;
  538. u_int phys_avail_count;
  539. vm_size_t kstack0_sz;
  540. vm_paddr_t kstack0_phys;
  541. vm_offset_t kstack0;
  542. void *dpcpu;
  543. debugf("mmu_booke_bootstrap: entered\n");
  544. /* Set interesting system properties */
  545. #ifdef __powerpc64__
  546. hw_direct_map = 1;
  547. #else
  548. hw_direct_map = 0;
  549. #endif
  550. #if defined(COMPAT_FREEBSD32) || !defined(__powerpc64__)
  551. elf32_nxstack = 1;
  552. #endif
  553. /* Initialize invalidation mutex */
  554. mtx_init(&tlbivax_mutex, "tlbivax", NULL, MTX_SPIN);
  555. /* Read TLB0 size and associativity. */
  556. tlb0_get_tlbconf();
  557. /*
  558. * Align kernel start and end address (kernel image).
  559. * Note that kernel end does not necessarily relate to kernsize.
  560. * kernsize is the size of the kernel that is actually mapped.
  561. */
  562. data_start = round_page(kernelend);
  563. data_end = data_start;
  564. /* Allocate the dynamic per-cpu area. */
  565. dpcpu = (void *)data_end;
  566. data_end += DPCPU_SIZE;
  567. /* Allocate space for the message buffer. */
  568. msgbufp = (struct msgbuf *)data_end;
  569. data_end += msgbufsize;
  570. debugf(" msgbufp at 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n",
  571. (uintptr_t)msgbufp, data_end);
  572. data_end = round_page(data_end);
  573. data_end = round_page(mmu_booke_alloc_kernel_pgtables(data_end));
  574. /* Retrieve phys/avail mem regions */
  575. mem_regions(&physmem_regions, &physmem_regions_sz,
  576. &availmem_regions, &availmem_regions_sz);
  577. if (PHYS_AVAIL_ENTRIES < availmem_regions_sz)
  578. panic("mmu_booke_bootstrap: phys_avail too small");
  579. data_end = round_page(data_end);
  580. vm_page_array = (vm_page_t)data_end;
  581. /*
  582. * Get a rough idea (upper bound) on the size of the page array. The
  583. * vm_page_array will not handle any more pages than we have in the
  584. * avail_regions array, and most likely much less.
  585. */
  586. sz = 0;
  587. for (mp = availmem_regions; mp->mr_size; mp++) {
  588. sz += mp->mr_size;
  589. }
  590. sz = (round_page(sz) / (PAGE_SIZE + sizeof(struct vm_page)));
  591. data_end += round_page(sz * sizeof(struct vm_page));
  592. /* Pre-round up to 1MB. This wastes some space, but saves TLB entries */
  593. data_end = roundup2(data_end, 1 << 20);
  594. debugf(" data_end: 0x%"PRI0ptrX"\n", data_end);
  595. debugf(" kernstart: %#zx\n", kernstart);
  596. debugf(" kernsize: %#zx\n", kernsize);
  597. if (data_end - kernstart > kernsize) {
  598. kernsize += tlb1_mapin_region(kernstart + kernsize,
  599. kernload + kernsize, (data_end - kernstart) - kernsize,
  600. _TLB_ENTRY_MEM);
  601. }
  602. data_end = kernstart + kernsize;
  603. debugf(" updated data_end: 0x%"PRI0ptrX"\n", data_end);
  604. /*
  605. * Clear the structures - note we can only do it safely after the
  606. * possible additional TLB1 translations are in place (above) so that
  607. * all range up to the currently calculated 'data_end' is covered.
  608. */
  609. bzero((void *)data_start, data_end - data_start);
  610. dpcpu_init(dpcpu, 0);
  611. /*******************************************************/
  612. /* Set the start and end of kva. */
  613. /*******************************************************/
  614. virtual_avail = round_page(data_end);
  615. virtual_end = VM_MAX_KERNEL_ADDRESS;
  616. #ifndef __powerpc64__
  617. /* Allocate KVA space for page zero/copy operations. */
  618. zero_page_va = virtual_avail;
  619. virtual_avail += PAGE_SIZE;
  620. copy_page_src_va = virtual_avail;
  621. virtual_avail += PAGE_SIZE;
  622. copy_page_dst_va = virtual_avail;
  623. virtual_avail += PAGE_SIZE;
  624. debugf("zero_page_va = 0x%"PRI0ptrX"\n", zero_page_va);
  625. debugf("copy_page_src_va = 0x%"PRI0ptrX"\n", copy_page_src_va);
  626. debugf("copy_page_dst_va = 0x%"PRI0ptrX"\n", copy_page_dst_va);
  627. /* Initialize page zero/copy mutexes. */
  628. mtx_init(&zero_page_mutex, "mmu_booke_zero_page", NULL, MTX_DEF);
  629. mtx_init(&copy_page_mutex, "mmu_booke_copy_page", NULL, MTX_DEF);
  630. /* Allocate KVA space for ptbl bufs. */
  631. ptbl_buf_pool_vabase = virtual_avail;
  632. virtual_avail += PTBL_BUFS * PTBL_PAGES * PAGE_SIZE;
  633. debugf("ptbl_buf_pool_vabase = 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n",
  634. ptbl_buf_pool_vabase, virtual_avail);
  635. #endif
  636. /* Calculate corresponding physical addresses for the kernel region. */
  637. phys_kernelend = kernload + kernsize;
  638. debugf("kernel image and allocated data:\n");
  639. debugf(" kernload = 0x%09jx\n", (uintmax_t)kernload);
  640. debugf(" kernstart = 0x%"PRI0ptrX"\n", kernstart);
  641. debugf(" kernsize = 0x%"PRI0ptrX"\n", kernsize);
  642. /*
  643. * Remove kernel physical address range from avail regions list. Page
  644. * align all regions. Non-page aligned memory isn't very interesting
  645. * to us. Also, sort the entries for ascending addresses.
  646. */
  647. sz = 0;
  648. cnt = availmem_regions_sz;
  649. debugf("processing avail regions:\n");
  650. for (mp = availmem_regions; mp->mr_size; mp++) {
  651. s = mp->mr_start;
  652. e = mp->mr_start + mp->mr_size;
  653. debugf(" %09jx-%09jx -> ", (uintmax_t)s, (uintmax_t)e);
  654. /* Check whether this region holds all of the kernel. */
  655. if (s < kernload && e > phys_kernelend) {
  656. availmem_regions[cnt].mr_start = phys_kernelend;
  657. availmem_regions[cnt++].mr_size = e - phys_kernelend;
  658. e = kernload;
  659. }
  660. /* Look whether this regions starts within the kernel. */
  661. if (s >= kernload && s < phys_kernelend) {
  662. if (e <= phys_kernelend)
  663. goto empty;
  664. s = phys_kernelend;
  665. }
  666. /* Now look whether this region ends within the kernel. */
  667. if (e > kernload && e <= phys_kernelend) {
  668. if (s >= kernload)
  669. goto empty;
  670. e = kernload;
  671. }
  672. /* Now page align the start and size of the region. */
  673. s = round_page(s);
  674. e = trunc_page(e);
  675. if (e < s)
  676. e = s;
  677. sz = e - s;
  678. debugf("%09jx-%09jx = %jx\n",
  679. (uintmax_t)s, (uintmax_t)e, (uintmax_t)sz);
  680. /* Check whether some memory is left here. */
  681. if (sz == 0) {
  682. empty:
  683. memmove(mp, mp + 1,
  684. (cnt - (mp - availmem_regions)) * sizeof(*mp));
  685. cnt--;
  686. mp--;
  687. continue;
  688. }
  689. /* Do an insertion sort. */
  690. for (mp1 = availmem_regions; mp1 < mp; mp1++)
  691. if (s < mp1->mr_start)
  692. break;
  693. if (mp1 < mp) {
  694. memmove(mp1 + 1, mp1, (char *)mp - (char *)mp1);
  695. mp1->mr_start = s;
  696. mp1->mr_size = sz;
  697. } else {
  698. mp->mr_start = s;
  699. mp->mr_size = sz;
  700. }
  701. }
  702. availmem_regions_sz = cnt;
  703. /*******************************************************/
  704. /* Steal physical memory for kernel stack from the end */
  705. /* of the first avail region */
  706. /*******************************************************/
  707. kstack0_sz = kstack_pages * PAGE_SIZE;
  708. kstack0_phys = availmem_regions[0].mr_start +
  709. availmem_regions[0].mr_size;
  710. kstack0_phys -= kstack0_sz;
  711. availmem_regions[0].mr_size -= kstack0_sz;
  712. /*******************************************************/
  713. /* Fill in phys_avail table, based on availmem_regions */
  714. /*******************************************************/
  715. phys_avail_count = 0;
  716. physsz = 0;
  717. hwphyssz = 0;
  718. TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz);
  719. debugf("fill in phys_avail:\n");
  720. for (i = 0, j = 0; i < availmem_regions_sz; i++, j += 2) {
  721. debugf(" region: 0x%jx - 0x%jx (0x%jx)\n",
  722. (uintmax_t)availmem_regions[i].mr_start,
  723. (uintmax_t)availmem_regions[i].mr_start +
  724. availmem_regions[i].mr_size,
  725. (uintmax_t)availmem_regions[i].mr_size);
  726. if (hwphyssz != 0 &&
  727. (physsz + availmem_regions[i].mr_size) >= hwphyssz) {
  728. debugf(" hw.physmem adjust\n");
  729. if (physsz < hwphyssz) {
  730. phys_avail[j] = availmem_regions[i].mr_start;
  731. phys_avail[j + 1] =
  732. availmem_regions[i].mr_start +
  733. hwphyssz - physsz;
  734. physsz = hwphyssz;
  735. phys_avail_count++;
  736. dump_avail[j] = phys_avail[j];
  737. dump_avail[j + 1] = phys_avail[j + 1];
  738. }
  739. break;
  740. }
  741. phys_avail[j] = availmem_regions[i].mr_start;
  742. phys_avail[j + 1] = availmem_regions[i].mr_start +
  743. availmem_regions[i].mr_size;
  744. phys_avail_count++;
  745. physsz += availmem_regions[i].mr_size;
  746. dump_avail[j] = phys_avail[j];
  747. dump_avail[j + 1] = phys_avail[j + 1];
  748. }
  749. physmem = btoc(physsz);
  750. /* Calculate the last available physical address. */
  751. for (i = 0; phys_avail[i + 2] != 0; i += 2)
  752. ;
  753. Maxmem = powerpc_btop(phys_avail[i + 1]);
  754. debugf("Maxmem = 0x%08lx\n", Maxmem);
  755. debugf("phys_avail_count = %d\n", phys_avail_count);
  756. debugf("physsz = 0x%09jx physmem = %jd (0x%09jx)\n",
  757. (uintmax_t)physsz, (uintmax_t)physmem, (uintmax_t)physmem);
  758. #ifdef __powerpc64__
  759. /*
  760. * Map the physical memory contiguously in TLB1.
  761. * Round so it fits into a single mapping.
  762. */
  763. tlb1_mapin_region(DMAP_BASE_ADDRESS, 0,
  764. phys_avail[i + 1], _TLB_ENTRY_MEM);
  765. #endif
  766. /*******************************************************/
  767. /* Initialize (statically allocated) kernel pmap. */
  768. /*******************************************************/
  769. PMAP_LOCK_INIT(kernel_pmap);
  770. debugf("kernel_pmap = 0x%"PRI0ptrX"\n", (uintptr_t)kernel_pmap);
  771. kernel_pte_alloc(virtual_avail, kernstart);
  772. for (i = 0; i < MAXCPU; i++) {
  773. kernel_pmap->pm_tid[i] = TID_KERNEL;
  774. /* Initialize each CPU's tidbusy entry 0 with kernel_pmap */
  775. tidbusy[i][TID_KERNEL] = kernel_pmap;
  776. }
  777. /* Mark kernel_pmap active on all CPUs */
  778. CPU_FILL(&kernel_pmap->pm_active);
  779. /*
  780. * Initialize the global pv list lock.
  781. */
  782. rw_init(&pvh_global_lock, "pmap pv global");
  783. /*******************************************************/
  784. /* Final setup */
  785. /*******************************************************/
  786. /* Enter kstack0 into kernel map, provide guard page */
  787. kstack0 = virtual_avail + KSTACK_GUARD_PAGES * PAGE_SIZE;
  788. thread0.td_kstack = kstack0;
  789. thread0.td_kstack_pages = kstack_pages;
  790. debugf("kstack_sz = 0x%08jx\n", (uintmax_t)kstack0_sz);
  791. debugf("kstack0_phys at 0x%09jx - 0x%09jx\n",
  792. (uintmax_t)kstack0_phys, (uintmax_t)kstack0_phys + kstack0_sz);
  793. debugf("kstack0 at 0x%"PRI0ptrX" - 0x%"PRI0ptrX"\n",
  794. kstack0, kstack0 + kstack0_sz);
  795. virtual_avail += KSTACK_GUARD_PAGES * PAGE_SIZE + kstack0_sz;
  796. for (i = 0; i < kstack_pages; i++) {
  797. mmu_booke_kenter(kstack0, kstack0_phys);
  798. kstack0 += PAGE_SIZE;
  799. kstack0_phys += PAGE_SIZE;
  800. }
  801. pmap_bootstrapped = 1;
  802. debugf("virtual_avail = %"PRI0ptrX"\n", virtual_avail);
  803. debugf("virtual_end = %"PRI0ptrX"\n", virtual_end);
  804. debugf("mmu_booke_bootstrap: exit\n");
  805. }
  806. #ifdef SMP
  807. void
  808. tlb1_ap_prep(void)
  809. {
  810. tlb_entry_t *e, tmp;
  811. unsigned int i;
  812. /* Prepare TLB1 image for AP processors */
  813. e = __boot_tlb1;
  814. for (i = 0; i < TLB1_ENTRIES; i++) {
  815. tlb1_read_entry(&tmp, i);
  816. if ((tmp.mas1 & MAS1_VALID) && (tmp.mas2 & _TLB_ENTRY_SHARED))
  817. memcpy(e++, &tmp, sizeof(tmp));
  818. }
  819. }
  820. void
  821. pmap_bootstrap_ap(volatile uint32_t *trcp __unused)
  822. {
  823. int i;
  824. /*
  825. * Finish TLB1 configuration: the BSP already set up its TLB1 and we
  826. * have the snapshot of its contents in the s/w __boot_tlb1[] table
  827. * created by tlb1_ap_prep(), so use these values directly to
  828. * (re)program AP's TLB1 hardware.
  829. *
  830. * Start at index 1 because index 0 has the kernel map.
  831. */
  832. for (i = 1; i < TLB1_ENTRIES; i++) {
  833. if (__boot_tlb1[i].mas1 & MAS1_VALID)
  834. tlb1_write_entry(&__boot_tlb1[i], i);
  835. }
  836. set_mas4_defaults();
  837. }
  838. #endif
  839. static void
  840. booke_pmap_init_qpages(void)
  841. {
  842. struct pcpu *pc;
  843. int i;
  844. CPU_FOREACH(i) {
  845. pc = pcpu_find(i);
  846. pc->pc_qmap_addr = kva_alloc(PAGE_SIZE);
  847. if (pc->pc_qmap_addr == 0)
  848. panic("pmap_init_qpages: unable to allocate KVA");
  849. }
  850. }
  851. SYSINIT(qpages_init, SI_SUB_CPU, SI_ORDER_ANY, booke_pmap_init_qpages, NULL);
  852. /*
  853. * Get the physical page address for the given pmap/virtual address.
  854. */
  855. static vm_paddr_t
  856. mmu_booke_extract(pmap_t pmap, vm_offset_t va)
  857. {
  858. vm_paddr_t pa;
  859. PMAP_LOCK(pmap);
  860. pa = pte_vatopa(pmap, va);
  861. PMAP_UNLOCK(pmap);
  862. return (pa);
  863. }
  864. /*
  865. * Extract the physical page address associated with the given
  866. * kernel virtual address.
  867. */
  868. static vm_paddr_t
  869. mmu_booke_kextract(vm_offset_t va)
  870. {
  871. tlb_entry_t e;
  872. vm_paddr_t p = 0;
  873. int i;
  874. #ifdef __powerpc64__
  875. if (va >= DMAP_BASE_ADDRESS && va <= DMAP_MAX_ADDRESS)
  876. return (DMAP_TO_PHYS(va));
  877. #endif
  878. if (va >= VM_MIN_KERNEL_ADDRESS && va <= VM_MAX_KERNEL_ADDRESS)
  879. p = pte_vatopa(kernel_pmap, va);
  880. if (p == 0) {
  881. /* Check TLB1 mappings */
  882. for (i = 0; i < TLB1_ENTRIES; i++) {
  883. tlb1_read_entry(&e, i);
  884. if (!(e.mas1 & MAS1_VALID))
  885. continue;
  886. if (va >= e.virt && va < e.virt + e.size)
  887. return (e.phys + (va - e.virt));
  888. }
  889. }
  890. return (p);
  891. }
  892. /*
  893. * Initialize the pmap module.
  894. * Called by vm_init, to initialize any structures that the pmap
  895. * system needs to map virtual memory.
  896. */
  897. static void
  898. mmu_booke_init()
  899. {
  900. int shpgperproc = PMAP_SHPGPERPROC;
  901. /*
  902. * Initialize the address space (zone) for the pv entries. Set a
  903. * high water mark so that the system can recover from excessive
  904. * numbers of pv entries.
  905. */
  906. pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL,
  907. NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
  908. TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
  909. pv_entry_max = shpgperproc * maxproc + vm_cnt.v_page_count;
  910. TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
  911. pv_entry_high_water = 9 * (pv_entry_max / 10);
  912. uma_zone_reserve_kva(pvzone, pv_entry_max);
  913. /* Pre-fill pvzone with initial number of pv entries. */
  914. uma_prealloc(pvzone, PV_ENTRY_ZONE_MIN);
  915. /* Create a UMA zone for page table roots. */
  916. ptbl_root_zone = uma_zcreate("pmap root", PMAP_ROOT_SIZE,
  917. NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, UMA_ZONE_VM);
  918. /* Initialize ptbl allocation. */
  919. ptbl_init();
  920. }
  921. /*
  922. * Map a list of wired pages into kernel virtual address space. This is
  923. * intended for temporary mappings which do not need page modification or
  924. * references recorded. Existing mappings in the region are overwritten.
  925. */
  926. static void
  927. mmu_booke_qenter(vm_offset_t sva, vm_page_t *m, int count)
  928. {
  929. vm_offset_t va;
  930. va = sva;
  931. while (count-- > 0) {
  932. mmu_booke_kenter(va, VM_PAGE_TO_PHYS(*m));
  933. va += PAGE_SIZE;
  934. m++;
  935. }
  936. }
  937. /*
  938. * Remove page mappings from kernel virtual address space. Intended for
  939. * temporary mappings entered by mmu_booke_qenter.
  940. */
  941. static void
  942. mmu_booke_qremove(vm_offset_t sva, int count)
  943. {
  944. vm_offset_t va;
  945. va = sva;
  946. while (count-- > 0) {
  947. mmu_booke_kremove(va);
  948. va += PAGE_SIZE;
  949. }
  950. }
  951. /*
  952. * Map a wired page into kernel virtual address space.
  953. */
  954. static void
  955. mmu_booke_kenter(vm_offset_t va, vm_paddr_t pa)
  956. {
  957. mmu_booke_kenter_attr(va, pa, VM_MEMATTR_DEFAULT);
  958. }
  959. static void
  960. mmu_booke_kenter_attr(vm_offset_t va, vm_paddr_t pa, vm_memattr_t ma)
  961. {
  962. uint32_t flags;
  963. pte_t *pte;
  964. KASSERT(((va >= VM_MIN_KERNEL_ADDRESS) &&
  965. (va <= VM_MAX_KERNEL_ADDRESS)), ("mmu_booke_kenter: invalid va"));
  966. flags = PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID;
  967. flags |= tlb_calc_wimg(pa, ma) << PTE_MAS2_SHIFT;
  968. flags |= PTE_PS_4KB;
  969. pte = pte_find(kernel_pmap, va);
  970. KASSERT((pte != NULL), ("mmu_booke_kenter: invalid va. NULL PTE"));
  971. mtx_lock_spin(&tlbivax_mutex);
  972. tlb_miss_lock();
  973. if (PTE_ISVALID(pte)) {
  974. CTR1(KTR_PMAP, "%s: replacing entry!", __func__);
  975. /* Flush entry from TLB0 */
  976. tlb0_flush_entry(va);
  977. }
  978. *pte = PTE_RPN_FROM_PA(pa) | flags;
  979. //debugf("mmu_booke_kenter: pdir_idx = %d ptbl_idx = %d va=0x%08x "
  980. // "pa=0x%08x rpn=0x%08x flags=0x%08x\n",
  981. // pdir_idx, ptbl_idx, va, pa, pte->rpn, pte->flags);
  982. /* Flush the real memory from the instruction cache. */
  983. if ((flags & (PTE_I | PTE_G)) == 0)
  984. __syncicache((void *)va, PAGE_SIZE);
  985. tlb_miss_unlock();
  986. mtx_unlock_spin(&tlbivax_mutex);
  987. }
  988. /*
  989. * Remove a page from kernel page table.
  990. */
  991. static void
  992. mmu_booke_kremove(vm_offset_t va)
  993. {
  994. pte_t *pte;
  995. CTR2(KTR_PMAP,"%s: s (va = 0x%"PRI0ptrX")\n", __func__, va);
  996. KASSERT(((va >= VM_MIN_KERNEL_ADDRESS) &&
  997. (va <= VM_MAX_KERNEL_ADDRESS)),
  998. ("mmu_booke_kremove: invalid va"));
  999. pte = pte_find(kernel_pmap, va);
  1000. if (!PTE_ISVALID(pte)) {
  1001. CTR1(KTR_PMAP, "%s: invalid pte", __func__);
  1002. return;
  1003. }
  1004. mtx_lock_spin(&tlbivax_mutex);
  1005. tlb_miss_lock();
  1006. /* Invalidate entry in TLB0, update PTE. */
  1007. tlb0_flush_entry(va);
  1008. *pte = 0;
  1009. tlb_miss_unlock();
  1010. mtx_unlock_spin(&tlbivax_mutex);
  1011. }
  1012. /*
  1013. * Figure out where a given kernel pointer (usually in a fault) points
  1014. * to from the VM's perspective, potentially remapping into userland's
  1015. * address space.
  1016. */
  1017. static int
  1018. mmu_booke_decode_kernel_ptr(vm_offset_t addr, int *is_user,
  1019. vm_offset_t *decoded_addr)
  1020. {
  1021. if (trunc_page(addr) <= VM_MAXUSER_ADDRESS)
  1022. *is_user = 1;
  1023. else
  1024. *is_user = 0;
  1025. *decoded_addr = addr;
  1026. return (0);
  1027. }
  1028. static boolean_t
  1029. mmu_booke_page_is_mapped(vm_page_t m)
  1030. {
  1031. return (!TAILQ_EMPTY(&(m)->md.pv_list));
  1032. }
  1033. /*
  1034. * Initialize pmap associated with process 0.
  1035. */
  1036. static void
  1037. mmu_booke_pinit0(pmap_t pmap)
  1038. {
  1039. PMAP_LOCK_INIT(pmap);
  1040. mmu_booke_pinit(pmap);
  1041. PCPU_SET(curpmap, pmap);
  1042. }
  1043. /*
  1044. * Insert the given physical page at the specified virtual address in the
  1045. * target physical map with the protection requested. If specified the page
  1046. * will be wired down.
  1047. */
  1048. static int
  1049. mmu_booke_enter(pmap_t pmap, vm_offset_t va, vm_page_t m,
  1050. vm_prot_t prot, u_int flags, int8_t psind)
  1051. {
  1052. int error;
  1053. rw_wlock(&pvh_global_lock);
  1054. PMAP_LOCK(pmap);
  1055. error = mmu_booke_enter_locked(pmap, va, m, prot, flags, psind);
  1056. PMAP_UNLOCK(pmap);
  1057. rw_wunlock(&pvh_global_lock);
  1058. return (error);
  1059. }
  1060. static int
  1061. mmu_booke_enter_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
  1062. vm_prot_t prot, u_int pmap_flags, int8_t psind __unused)
  1063. {
  1064. pte_t *pte;
  1065. vm_paddr_t pa;
  1066. pte_t flags;
  1067. int error, su, sync;
  1068. pa = VM_PAGE_TO_PHYS(m);
  1069. su = (pmap == kernel_pmap);
  1070. sync = 0;
  1071. //debugf("mmu_booke_enter_locked: s (pmap=0x%08x su=%d tid=%d m=0x%08x va=0x%08x "
  1072. // "pa=0x%08x prot=0x%08x flags=%#x)\n",
  1073. // (u_int32_t)pmap, su, pmap->pm_tid,
  1074. // (u_int32_t)m, va, pa, prot, flags);
  1075. if (su) {
  1076. KASSERT(((va >= virtual_avail) &&
  1077. (va <= VM_MAX_KERNEL_ADDRESS)),
  1078. ("mmu_booke_enter_locked: kernel pmap, non kernel va"));
  1079. } else {
  1080. KASSERT((va <= VM_MAXUSER_ADDRESS),
  1081. ("mmu_booke_enter_locked: user pmap, non user va"));
  1082. }
  1083. if ((m->oflags & VPO_UNMANAGED) == 0) {
  1084. if ((pmap_flags & PMAP_ENTER_QUICK_LOCKED) == 0)
  1085. VM_PAGE_OBJECT_BUSY_ASSERT(m);
  1086. else
  1087. VM_OBJECT_ASSERT_LOCKED(m->object);
  1088. }
  1089. PMAP_LOCK_ASSERT(pmap, MA_OWNED);
  1090. /*
  1091. * If there is an existing mapping, and the physical address has not
  1092. * changed, must be protection or wiring change.
  1093. */
  1094. if (((pte = pte_find(pmap, va)) != NULL) &&
  1095. (PTE_ISVALID(pte)) && (PTE_PA(pte) == pa)) {
  1096. /*
  1097. * Before actually updating pte->flags we calculate and
  1098. * prepare its new value in a helper var.
  1099. */
  1100. flags = *pte;
  1101. flags &= ~(PTE_UW | PTE_UX | PTE_SW | PTE_SX | PTE_MODIFIED);
  1102. /* Wiring change, just update stats. */
  1103. if ((pmap_flags & PMAP_ENTER_WIRED) != 0) {
  1104. if (!PTE_ISWIRED(pte)) {
  1105. flags |= PTE_WIRED;
  1106. pmap->pm_stats.wired_count++;
  1107. }
  1108. } else {
  1109. if (PTE_ISWIRED(pte)) {
  1110. flags &= ~PTE_WIRED;
  1111. pmap->pm_stats.wired_count--;
  1112. }
  1113. }
  1114. if (prot & VM_PROT_WRITE) {
  1115. /* Add write permissions. */
  1116. flags |= PTE_SW;
  1117. if (!su)
  1118. flags |= PTE_UW;
  1119. if ((flags & PTE_MANAGED) != 0)
  1120. vm_page_aflag_set(m, PGA_WRITEABLE);
  1121. } else {
  1122. /* Handle modified pages, sense modify status. */
  1123. /*
  1124. * The PTE_MODIFIED flag could be set by underlying
  1125. * TLB misses since we last read it (above), possibly
  1126. * other CPUs could update it so we check in the PTE
  1127. * directly rather than rely on that saved local flags
  1128. * copy.
  1129. */
  1130. if (PTE_ISMODIFIED(pte))
  1131. vm_page_dirty(m);
  1132. }
  1133. if (prot & VM_PROT_EXECUTE) {
  1134. flags |= PTE_SX;
  1135. if (!su)
  1136. flags |= PTE_UX;
  1137. /*
  1138. * Check existing flags for execute permissions: if we
  1139. * are turning execute permissions on, icache should
  1140. * be flushed.
  1141. */
  1142. if ((*pte & (PTE_UX | PTE_SX)) == 0)
  1143. sync++;
  1144. }
  1145. flags &= ~PTE_REFERENCED;
  1146. /*
  1147. * The new flags value is all calculated -- only now actually
  1148. * update the PTE.
  1149. */
  1150. mtx_lock_spin(&tlbivax_mutex);
  1151. tlb_miss_lock();
  1152. tlb0_flush_entry(va);
  1153. *pte &= ~PTE_FLAGS_MASK;
  1154. *pte |= flags;
  1155. tlb_miss_unlock();
  1156. mtx_unlock_spin(&tlbivax_mutex);
  1157. } else {
  1158. /*
  1159. * If there is an existing mapping, but it's for a different
  1160. * physical address, pte_enter() will delete the old mapping.
  1161. */
  1162. //if ((pte != NULL) && PTE_ISVALID(pte))
  1163. // debugf("mmu_booke_enter_locked: replace\n");
  1164. //else
  1165. // debugf("mmu_booke_enter_locked: new\n");
  1166. /* Now set up the flags and install the new mapping. */
  1167. flags = (PTE_SR | PTE_VALID);
  1168. flags |= PTE_M;
  1169. if (!su)
  1170. flags |= PTE_UR;
  1171. if (prot & VM_PROT_WRITE) {
  1172. flags |= PTE_SW;
  1173. if (!su)
  1174. flags |= PTE_UW;
  1175. if ((m->oflags & VPO_UNMANAGED) == 0)
  1176. vm_page_aflag_set(m, PGA_WRITEABLE);
  1177. }
  1178. if (prot & VM_PROT_EXECUTE) {
  1179. flags |= PTE_SX;
  1180. if (!su)
  1181. flags |= PTE_UX;
  1182. }
  1183. /* If its wired update stats. */
  1184. if ((pmap_flags & PMAP_ENTER_WIRED) != 0)
  1185. flags |= PTE_WIRED;
  1186. error = pte_enter(pmap, m, va, flags,
  1187. (pmap_flags & PMAP_ENTER_NOSLEEP) != 0);
  1188. if (error != 0)
  1189. return (KERN_RESOURCE_SHORTAGE);
  1190. if ((flags & PMAP_ENTER_WIRED) != 0)
  1191. pmap->pm_stats.wired_count++;
  1192. /* Flush the real memory from the instruction cache. */
  1193. if (prot & VM_PROT_EXECUTE)
  1194. sync++;
  1195. }
  1196. if (sync && (su || pmap == PCPU_GET(curpmap))) {
  1197. __syncicache((void *)va, PAGE_SIZE);
  1198. sync = 0;
  1199. }
  1200. return (KERN_SUCCESS);
  1201. }
  1202. /*
  1203. * Maps a sequence of resident pages belonging to the same object.
  1204. * The sequence begins with the given page m_start. This page is
  1205. * mapped at the given virtual address start. Each subsequent page is
  1206. * mapped at a virtual address that is offset from start by the same
  1207. * amount as the page is offset from m_start within the object. The
  1208. * last page in the sequence is the page with the largest offset from
  1209. * m_start that can be mapped at a virtual address less than the given
  1210. * virtual address end. Not every virtual page between start and end
  1211. * is mapped; only those for which a resident page exists with the
  1212. * corresponding offset from m_start are mapped.
  1213. */
  1214. static void
  1215. mmu_booke_enter_object(pmap_t pmap, vm_offset_t start,
  1216. vm_offset_t end, vm_page_t m_start, vm_prot_t prot)
  1217. {
  1218. vm_page_t m;
  1219. vm_pindex_t diff, psize;
  1220. VM_OBJECT_ASSERT_LOCKED(m_start->object);
  1221. psize = atop(end - start);
  1222. m = m_start;
  1223. rw_wlock(&pvh_global_lock);
  1224. PMAP_LOCK(pmap);
  1225. while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
  1226. mmu_booke_enter_locked(pmap, start + ptoa(diff), m,
  1227. prot & (VM_PROT_READ | VM_PROT_EXECUTE),
  1228. PMAP_ENTER_NOSLEEP | PMAP_ENTER_QUICK_LOCKED, 0);
  1229. m = TAILQ_NEXT(m, listq);
  1230. }
  1231. PMAP_UNLOCK(pmap);
  1232. rw_wunlock(&pvh_global_lock);
  1233. }
  1234. static void
  1235. mmu_booke_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m,
  1236. vm_prot_t prot)
  1237. {
  1238. rw_wlock(&pvh_global_lock);
  1239. PMAP_LOCK(pmap);
  1240. mmu_booke_enter_locked(pmap, va, m,
  1241. prot & (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP |
  1242. PMAP_ENTER_QUICK_LOCKED, 0);
  1243. PMAP_UNLOCK(pmap);
  1244. rw_wunlock(&pvh_global_lock);
  1245. }
  1246. /*
  1247. * Remove the given range of addresses from the specified map.
  1248. *
  1249. * It is assumed that the start and end are properly rounded to the page size.
  1250. */
  1251. static void
  1252. mmu_booke_remove(pmap_t pmap, vm_offset_t va, vm_offset_t endva)
  1253. {
  1254. pte_t *pte;
  1255. uint8_t hold_flag;
  1256. int su = (pmap == kernel_pmap);
  1257. //debugf("mmu_booke_remove: s (su = %d pmap=0x%08x tid=%d va=0x%08x endva=0x%08x)\n",
  1258. // su, (u_int32_t)pmap, pmap->pm_tid, va, endva);
  1259. if (su) {
  1260. KASSERT(((va >= virtual_avail) &&
  1261. (va <= VM_MAX_KERNEL_ADDRESS)),
  1262. ("mmu_booke_remove: kernel pmap, non kernel va"));
  1263. } else {
  1264. KASSERT((va <= VM_MAXUSER_ADDRESS),
  1265. ("mmu_booke_remove: user pmap, non user va"));
  1266. }
  1267. if (PMAP_REMOVE_DONE(pmap)) {
  1268. //debugf("mmu_booke_remove: e (empty)\n");
  1269. return;
  1270. }
  1271. hold_flag = PTBL_HOLD_FLAG(pmap);
  1272. //debugf("mmu_booke_remove: hold_flag = %d\n", hold_flag);
  1273. rw_wlock(&pvh_global_lock);
  1274. PMAP_LOCK(pmap);
  1275. for (; va < endva; va += PAGE_SIZE) {
  1276. pte = pte_find_next(pmap, &va);
  1277. if ((pte == NULL) || !PTE_ISVALID(pte))
  1278. break;
  1279. if (va >= endva)
  1280. break;
  1281. pte_remove(pmap, va, hold_flag);
  1282. }
  1283. PMAP_UNLOCK(pmap);
  1284. rw_wunlock(&pvh_global_lock);
  1285. //debugf("mmu_booke_remove: e\n");
  1286. }
  1287. /*
  1288. * Remove physical page from all pmaps in which it resides.
  1289. */
  1290. static void
  1291. mmu_booke_remove_all(vm_page_t m)
  1292. {
  1293. pv_entry_t pv, pvn;
  1294. uint8_t hold_flag;
  1295. rw_wlock(&pvh_global_lock);
  1296. TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_link, pvn) {
  1297. PMAP_LOCK(pv->pv_pmap);
  1298. hold_flag = PTBL_HOLD_FLAG(pv->pv_pmap);
  1299. pte_remove(pv->pv_pmap, pv->pv_va, hold_flag);
  1300. PMAP_UNLOCK(pv->pv_pmap);
  1301. }
  1302. vm_page_aflag_clear(m, PGA_WRITEABLE);
  1303. rw_wunlock(&pvh_global_lock);
  1304. }
  1305. /*
  1306. * Map a range of physical addresses into kernel virtual address space.
  1307. */
  1308. static vm_offset_t
  1309. mmu_booke_map(vm_offset_t *virt, vm_paddr_t pa_start,
  1310. vm_paddr_t pa_end, int prot)
  1311. {
  1312. vm_offset_t sva = *virt;
  1313. vm_offset_t va = sva;
  1314. #ifdef __powerpc64__
  1315. /* XXX: Handle memory not starting at 0x0. */
  1316. if (pa_end < ctob(Maxmem))
  1317. return (PHYS_TO_DMAP(pa_start));
  1318. #endif
  1319. while (pa_start < pa_end) {
  1320. mmu_booke_kenter(va, pa_start);
  1321. va += PAGE_SIZE;
  1322. pa_start += PAGE_SIZE;
  1323. }
  1324. *virt = va;
  1325. return (sva);
  1326. }
  1327. /*
  1328. * The pmap must be activated before it's address space can be accessed in any
  1329. * way.
  1330. */
  1331. static void
  1332. mmu_booke_activate(struct thread *td)
  1333. {
  1334. pmap_t pmap;
  1335. u_int cpuid;
  1336. pmap = &td->td_proc->p_vmspace->vm_pmap;
  1337. CTR5(KTR_PMAP, "%s: s (td = %p, proc = '%s', id = %d, pmap = 0x%"PRI0ptrX")",
  1338. __func__, td, td->td_proc->p_comm, td->td_proc->p_pid, pmap);
  1339. KASSERT((pmap != kernel_pmap), ("mmu_booke_activate: kernel_pmap!"));
  1340. sched_pin();
  1341. cpuid = PCPU_GET(cpuid);
  1342. CPU_SET_ATOMIC(cpuid, &pmap->pm_active);
  1343. PCPU_SET(curpmap, pmap);
  1344. if (pmap->pm_tid[cpuid] == TID_NONE)
  1345. tid_alloc(pmap);
  1346. /* Load PID0 register with pmap tid value. */
  1347. mtspr(SPR_PID0, pmap->pm_tid[cpuid]);
  1348. __asm __volatile("isync");
  1349. mtspr(SPR_DBCR0, td->td_pcb->pcb_cpu.booke.dbcr0);
  1350. sched_unpin();
  1351. CTR3(KTR_PMAP, "%s: e (tid = %d for '%s')", __func__,
  1352. pmap->pm_tid[PCPU_GET(cpuid)], td->td_proc->p_comm);
  1353. }
  1354. /*
  1355. * Deactivate the specified process's address space.
  1356. */
  1357. static void
  1358. mmu_booke_deactivate(struct thread *td)
  1359. {
  1360. pmap_t pmap;
  1361. pmap = &td->td_proc->p_vmspace->vm_pmap;
  1362. CTR5(KTR_PMAP, "%s: td=%p, proc = '%s', id = %d, pmap = 0x%"PRI0ptrX,
  1363. __func__, td, td->td_proc->p_comm, td->td_proc->p_pid, pmap);
  1364. td->td_pcb->pcb_cpu.booke.dbcr0 = mfspr(SPR_DBCR0);
  1365. CPU_CLR_ATOMIC(PCPU_GET(cpuid), &pmap->pm_active);
  1366. PCPU_SET(curpmap, NULL);
  1367. }
  1368. /*
  1369. * Copy the range specified by src_addr/len
  1370. * from the source map to the range dst_addr/len
  1371. * in the destination map.
  1372. *
  1373. * This routine is only advisory and need not do anything.
  1374. */
  1375. static void
  1376. mmu_booke_copy(pmap_t dst_pmap, pmap_t src_pmap,
  1377. vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr)
  1378. {
  1379. }
  1380. /*
  1381. * Set the physical protection on the specified range of this map as requested.
  1382. */
  1383. static void
  1384. mmu_booke_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
  1385. vm_prot_t prot)
  1386. {
  1387. vm_offset_t va;
  1388. vm_page_t m;
  1389. pte_t *pte;
  1390. if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
  1391. mmu_booke_remove(pmap, sva, eva);
  1392. return;
  1393. }
  1394. if (prot & VM_PROT_WRITE)
  1395. return;
  1396. PMAP_LOCK(pmap);
  1397. for (va = sva; va < eva; va += PAGE_SIZE) {
  1398. if ((pte = pte_find(pmap, va)) != NULL) {
  1399. if (PTE_ISVALID(pte)) {
  1400. m = PHYS_TO_VM_PAGE(PTE_PA(pte));
  1401. mtx_lock_spin(&tlbivax_mutex);
  1402. tlb_miss_lock();
  1403. /* Handle modified pages. */
  1404. if (PTE_ISMODIFIED(pte) && PTE_ISMANAGED(pte))
  1405. vm_page_dirty(m);
  1406. tlb0_flush_entry(va);
  1407. *pte &= ~(PTE_UW | PTE_SW | PTE_MODIFIED);
  1408. tlb_miss_unlock();
  1409. mtx_unlock_spin(&tlbivax_mutex);
  1410. }
  1411. }
  1412. }
  1413. PMAP_UNLOCK(pmap);
  1414. }
  1415. /*
  1416. * Clear the write and modified bits in each of the given page's mappings.
  1417. */
  1418. static void
  1419. mmu_booke_remove_write(vm_page_t m)
  1420. {
  1421. pv_entry_t pv;
  1422. pte_t *pte;
  1423. KASSERT((m->oflags & VPO_UNMANAGED) == 0,
  1424. ("mmu_booke_remove_write: page %p is not managed", m));
  1425. vm_page_assert_busied(m);
  1426. if (!pmap_page_is_write_mapped(m))
  1427. return;
  1428. rw_wlock(&pvh_global_lock);
  1429. TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
  1430. PMAP_LOCK(pv->pv_pmap);
  1431. if ((pte = pte_find(pv->pv_pmap, pv->pv_va)) != NULL) {
  1432. if (PTE_ISVALID(pte)) {
  1433. m = PHYS_TO_VM_PAGE(PTE_PA(pte));
  1434. mtx_lock_spin(&tlbivax_mutex);
  1435. tlb_miss_lock();
  1436. /* Handle modified pages. */
  1437. if (PTE_ISMODIFIED(pte))
  1438. vm_page_dirty(m);
  1439. /* Flush mapping from TLB0. */
  1440. *pte &= ~(PTE_UW | PTE_SW | PTE_MODIFIED);
  1441. tlb_miss_unlock();
  1442. mtx_unlock_spin(&tlbivax_mutex);
  1443. }
  1444. }
  1445. PMAP_UNLOCK(pv->pv_pmap);
  1446. }
  1447. vm_page_aflag_clear(m, PGA_WRITEABLE);
  1448. rw_wunlock(&pvh_global_lock);
  1449. }
  1450. /*
  1451. * Atomically extract and hold the physical page with the given
  1452. * pmap and virtual address pair if that mapping permits the given
  1453. * protection.
  1454. */
  1455. static vm_page_t
  1456. mmu_booke_extract_and_hold(pmap_t pmap, vm_offset_t va,
  1457. vm_prot_t prot)
  1458. {
  1459. pte_t *pte;
  1460. vm_page_t m;
  1461. uint32_t pte_wbit;
  1462. m = NULL;
  1463. PMAP_LOCK(pmap);
  1464. pte = pte_find(pmap, va);
  1465. if ((pte != NULL) && PTE_ISVALID(pte)) {
  1466. if (pmap == kernel_pmap)
  1467. pte_wbit = PTE_SW;
  1468. else
  1469. pte_wbit = PTE_UW;
  1470. if ((*pte & pte_wbit) != 0 || (prot & VM_PROT_WRITE) == 0) {
  1471. m = PHYS_TO_VM_PAGE(PTE_PA(pte));
  1472. if (!vm_page_wire_mapped(m))
  1473. m = NULL;
  1474. }
  1475. }
  1476. PMAP_UNLOCK(pmap);
  1477. return (m);
  1478. }
  1479. /*
  1480. * Initialize a vm_page's machine-dependent fields.
  1481. */
  1482. static void
  1483. mmu_booke_page_init(vm_page_t m)
  1484. {
  1485. m->md.pv_tracked = 0;
  1486. TAILQ_INIT(&m->md.pv_list);
  1487. }
  1488. /*
  1489. * Return whether or not the specified physical page was modified
  1490. * in any of physical maps.
  1491. */
  1492. static boolean_t
  1493. mmu_booke_is_modified(vm_page_t m)
  1494. {
  1495. pte_t *pte;
  1496. pv_entry_t pv;
  1497. boolean_t rv;
  1498. KASSERT((m->oflags & VPO_UNMANAGED) == 0,
  1499. ("mmu_booke_is_modified: page %p is not managed", m));
  1500. rv = FALSE;
  1501. /*
  1502. * If the page is not busied then this check is racy.
  1503. */
  1504. if (!pmap_page_is_write_mapped(m))
  1505. return (FALSE);
  1506. rw_wlock(&pvh_global_lock);
  1507. TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
  1508. PMAP_LOCK(pv->pv_pmap);
  1509. if ((pte = pte_find(pv->pv_pmap, pv->pv_va)) != NULL &&
  1510. PTE_ISVALID(pte)) {
  1511. if (PTE_ISMODIFIED(pte))
  1512. rv = TRUE;
  1513. }
  1514. PMAP_UNLOCK(pv->pv_pmap);
  1515. if (rv)
  1516. break;
  1517. }
  1518. rw_wunlock(&pvh_global_lock);
  1519. return (rv);
  1520. }
  1521. /*
  1522. * Return whether or not the specified virtual address is eligible
  1523. * for prefault.
  1524. */
  1525. static boolean_t
  1526. mmu_booke_is_prefaultable(pmap_t pmap, vm_offset_t addr)
  1527. {
  1528. return (FALSE);
  1529. }
  1530. /*
  1531. * Return whether or not the specified physical page was referenced
  1532. * in any physical maps.
  1533. */
  1534. static boolean_t
  1535. mmu_booke_is_referenced(vm_page_t m)
  1536. {
  1537. pte_t *pte;
  1538. pv_entry_t pv;
  1539. boolean_t rv;
  1540. KASSERT((m->oflags & VPO_UNMANAGED) == 0,
  1541. ("mmu_booke_is_referenced: page %p is not managed", m));
  1542. rv = FALSE;
  1543. rw_wlock(&pvh_global_lock);
  1544. TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
  1545. PMAP_LOCK(pv->pv_pmap);
  1546. if ((pte = pte_find(pv->pv_pmap, pv->pv_va)) != NULL &&
  1547. PTE_ISVALID(pte)) {
  1548. if (PTE_ISREFERENCED(pte))
  1549. rv = TRUE;
  1550. }
  1551. PMAP_UNLOCK(pv->pv_pmap);
  1552. if (rv)
  1553. break;
  1554. }
  1555. rw_wunlock(&pvh_global_lock);
  1556. return (rv);
  1557. }
  1558. /*
  1559. * Clear the modify bits on the specified physical page.
  1560. */
  1561. static void
  1562. mmu_booke_clear_modify(vm_page_t m)
  1563. {
  1564. pte_t *pte;
  1565. pv_entry_t pv;
  1566. KASSERT((m->oflags & VPO_UNMANAGED) == 0,
  1567. ("mmu_booke_clear_modify: page %p is not managed", m));
  1568. vm_page_assert_busied(m);
  1569. if (!pmap_page_is_write_mapped(m))
  1570. return;
  1571. rw_wlock(&pvh_global_lock);
  1572. TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
  1573. PMAP_LOCK(pv->pv_pmap);
  1574. if ((pte = pte_find(pv->pv_pmap, pv->pv_va)) != NULL &&
  1575. PTE_ISVALID(pte)) {
  1576. mtx_lock_spin(&tlbivax_mutex);
  1577. tlb_miss_lock();
  1578. if (*pte & (PTE_SW | PTE_UW | PTE_MODIFIED)) {
  1579. tlb0_flush_entry(pv->pv_va);
  1580. *pte &= ~(PTE_SW | PTE_UW | PTE_MODIFIED |
  1581. PTE_REFERENCED);
  1582. }
  1583. tlb_miss_unlock();
  1584. mtx_unlock_spin(&tlbivax_mutex);
  1585. }
  1586. PMAP_UNLOCK(pv->pv_pmap);
  1587. }
  1588. rw_wunlock(&pvh_global_lock);
  1589. }
  1590. /*
  1591. * Return a count of reference bits for a page, clearing those bits.
  1592. * It is not necessary for every reference bit to be cleared, but it
  1593. * is necessary that 0 only be returned when there are truly no
  1594. * reference bits set.
  1595. *
  1596. * As an optimization, update the page's dirty field if a modified bit is
  1597. * found while counting reference bits. This opportunistic update can be
  1598. * performed at low cost and can eliminate the need for some future calls
  1599. * to pmap_is_modified(). However, since this function stops after
  1600. * finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some
  1601. * dirty pages. Those dirty pages will only be detected by a future call
  1602. * to pmap_is_modified().
  1603. */
  1604. static int
  1605. mmu_booke_ts_referenced(vm_page_t m)
  1606. {
  1607. pte_t *pte;
  1608. pv_entry_t pv;
  1609. int count;
  1610. KASSERT((m->oflags & VPO_UNMANAGED) == 0,
  1611. ("mmu_booke_ts_referenced: page %p is not managed", m));
  1612. count = 0;
  1613. rw_wlock(&pvh_global_lock);
  1614. TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
  1615. PMAP_LOCK(pv->pv_pmap);
  1616. if ((pte = pte_find(pv->pv_pmap, pv->pv_va)) != NULL &&
  1617. PTE_ISVALID(pte)) {
  1618. if (PTE_ISMODIFIED(pte))
  1619. vm_page_dirty(m);
  1620. if (PTE_ISREFERENCED(pte)) {
  1621. mtx_lock_spin(&tlbivax_mutex);
  1622. tlb_miss_lock();
  1623. tlb0_flush_entry(pv->pv_va);
  1624. *pte &= ~PTE_REFERENCED;
  1625. tlb_miss_unlock();
  1626. mtx_unlock_spin(&tlbivax_mutex);
  1627. if (++count >= PMAP_TS_REFERENCED_MAX) {
  1628. PMAP_UNLOCK(pv->pv_pmap);
  1629. break;
  1630. }
  1631. }
  1632. }
  1633. PMAP_UNLOCK(pv->pv_pmap);
  1634. }
  1635. rw_wunlock(&pvh_global_lock);
  1636. return (count);
  1637. }
  1638. /*
  1639. * Clear the wired attribute from the mappings for the specified range of
  1640. * addresses in the given pmap. Every valid mapping within that range must
  1641. * have the wired attribute set. In contrast, invalid mappings cannot have
  1642. * the wired attribute set, so they are ignored.
  1643. *
  1644. * The wired attribute of the page table entry is not a hardware feature, so
  1645. * there is no need to invalidate any TLB entries.
  1646. */
  1647. static void
  1648. mmu_booke_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
  1649. {
  1650. vm_offset_t va;
  1651. pte_t *pte;
  1652. PMAP_LOCK(pmap);
  1653. for (va = sva; va < eva; va += PAGE_SIZE) {
  1654. if ((pte = pte_find(pmap, va)) != NULL &&
  1655. PTE_ISVALID(pte)) {
  1656. if (!PTE_ISWIRED(pte))
  1657. panic("mmu_booke_unwire: pte %p isn't wired",
  1658. pte);
  1659. *pte &= ~PTE_WIRED;
  1660. pmap->pm_stats.wired_count--;
  1661. }
  1662. }
  1663. PMAP_UNLOCK(pmap);
  1664. }
  1665. /*
  1666. * Return true if the pmap's pv is one of the first 16 pvs linked to from this
  1667. * page. This count may be changed upwards or downwards in the future; it is
  1668. * only necessary that true be returned for a small subset of pmaps for proper
  1669. * page aging.
  1670. */
  1671. static boolean_t
  1672. mmu_booke_page_exists_quick(pmap_t pmap, vm_page_t m)
  1673. {
  1674. pv_entry_t pv;
  1675. int loops;
  1676. boolean_t rv;
  1677. KASSERT((m->oflags & VPO_UNMANAGED) == 0,
  1678. ("mmu_booke_page_exists_quick: page %p is not managed", m));
  1679. loops = 0;
  1680. rv = FALSE;
  1681. rw_wlock(&pvh_global_lock);
  1682. TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
  1683. if (pv->pv_pmap == pmap) {
  1684. rv = TRUE;
  1685. break;
  1686. }
  1687. if (++loops >= 16)
  1688. break;
  1689. }
  1690. rw_wunlock(&pvh_global_lock);
  1691. return (rv);
  1692. }
  1693. /*
  1694. * Return the number of managed mappings to the given physical page that are
  1695. * wired.
  1696. */
  1697. static int
  1698. mmu_booke_page_wired_mappings(vm_page_t m)
  1699. {
  1700. pv_entry_t pv;
  1701. pte_t *pte;
  1702. int count = 0;
  1703. if ((m->oflags & VPO_UNMANAGED) != 0)
  1704. return (count);
  1705. rw_wlock(&pvh_global_lock);
  1706. TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
  1707. PMAP_LOCK(pv->pv_pmap);
  1708. if ((pte = pte_find(pv->pv_pmap, pv->pv_va)) != NULL)
  1709. if (PTE_ISVALID(pte) && PTE_ISWIRED(pte))
  1710. count++;
  1711. PMAP_UNLOCK(pv->pv_pmap);
  1712. }
  1713. rw_wunlock(&pvh_global_lock);
  1714. return (count);
  1715. }
  1716. static int
  1717. mmu_booke_dev_direct_mapped(vm_paddr_t pa, vm_size_t size)
  1718. {
  1719. int i;
  1720. vm_offset_t va;
  1721. /*
  1722. * This currently does not work for entries that
  1723. * overlap TLB1 entries.
  1724. */
  1725. for (i = 0; i < TLB1_ENTRIES; i ++) {
  1726. if (tlb1_iomapped(i, pa, size, &va) == 0)
  1727. return (0);
  1728. }
  1729. return (EFAULT);
  1730. }
  1731. void
  1732. mmu_booke_dumpsys_map(vm_paddr_t pa, size_t sz, void **va)
  1733. {
  1734. vm_paddr_t ppa;
  1735. vm_offset_t ofs;
  1736. vm_size_t gran;
  1737. /* Minidumps are based on virtual memory addresses. */
  1738. if (do_minidump) {
  1739. *va = (void *)(vm_offset_t)pa;
  1740. return;
  1741. }
  1742. /* Raw physical memory dumps don't have a virtual address. */
  1743. /* We always map a 256MB page at 256M. */
  1744. gran = 256 * 1024 * 1024;
  1745. ppa = rounddown2(pa, gran);
  1746. ofs = pa - ppa;
  1747. *va = (void *)gran;
  1748. tlb1_set_entry((vm_offset_t)va, ppa, gran, _TLB_ENTRY_IO);
  1749. if (sz > (gran - ofs))
  1750. tlb1_set_entry((vm_offset_t)(va + gran), ppa + gran, gran,
  1751. _TLB_ENTRY_IO);
  1752. }
  1753. void
  1754. mmu_booke_dumpsys_unmap(vm_paddr_t pa, size_t sz, void *va)
  1755. {
  1756. vm_paddr_t ppa;
  1757. vm_offset_t ofs;
  1758. vm_size_t gran;
  1759. tlb_entry_t e;
  1760. int i;
  1761. /* Minidumps are based on virtual memory addresses. */
  1762. /* Nothing to do... */
  1763. if (do_minidump)
  1764. return;
  1765. for (i = 0; i < TLB1_ENTRIES; i++) {
  1766. tlb1_read_entry(&e, i);
  1767. if (!(e.mas1 & MAS1_VALID))
  1768. break;
  1769. }
  1770. /* Raw physical memory dumps don't have a virtual address. */
  1771. i--;
  1772. e.mas1 = 0;
  1773. e.mas2 = 0;
  1774. e.mas3 = 0;
  1775. tlb1_write_entry(&e, i);
  1776. gran = 256 * 1024 * 1024;
  1777. ppa = rounddown2(pa, gran);
  1778. ofs = pa - ppa;
  1779. if (sz > (gran - ofs)) {
  1780. i--;
  1781. e.mas1 = 0;
  1782. e.mas2 = 0;
  1783. e.mas3 = 0;
  1784. tlb1_write_entry(&e, i);
  1785. }
  1786. }
  1787. extern struct dump_pa dump_map[PHYS_AVAIL_SZ + 1];
  1788. void
  1789. mmu_booke_scan_init()
  1790. {
  1791. vm_offset_t va;
  1792. pte_t *pte;
  1793. int i;
  1794. if (!do_minidump) {
  1795. /* Initialize phys. segments for dumpsys(). */
  1796. memset(&dump_map, 0, sizeof(dump_map));
  1797. mem_regions(&physmem_regions, &physmem_regions_sz, &availmem_regions,
  1798. &availmem_regions_sz);
  1799. for (i = 0; i < physmem_regions_sz; i++) {
  1800. dump_map[i].pa_start = physmem_regions[i].mr_start;
  1801. dump_map[i].pa_size = physmem_regions[i].mr_size;
  1802. }
  1803. return;
  1804. }
  1805. /* Virtual segments for minidumps: */
  1806. memset(&dump_map, 0, sizeof(dump_map));
  1807. /* 1st: kernel .data and .bss. */
  1808. dump_map[0].pa_start = trunc_page((uintptr_t)_etext);
  1809. dump_map[0].pa_size =
  1810. round_page((uintptr_t)_end) - dump_map[0].pa_start;
  1811. /* 2nd: msgbuf and tables (see pmap_bootstrap()). */
  1812. dump_map[1].pa_start = data_start;
  1813. dump_map[1].pa_size = data_end - data_start;
  1814. /* 3rd: kernel VM. */
  1815. va = dump_map[1].pa_start + dump_map[1].pa_size;
  1816. /* Find start of next chunk (from va). */
  1817. while (va < virtual_end) {
  1818. /* Don't dump the buffer cache. */
  1819. if (va >= kmi.buffer_sva && va < kmi.buffer_eva) {
  1820. va = kmi.buffer_eva;
  1821. continue;
  1822. }
  1823. pte = pte_find(kernel_pmap, va);
  1824. if (pte != NULL && PTE_ISVALID(pte))
  1825. break;
  1826. va += PAGE_SIZE;
  1827. }
  1828. if (va < virtual_end) {
  1829. dump_map[2].pa_start = va;
  1830. va += PAGE_SIZE;
  1831. /* Find last page in chunk. */
  1832. while (va < virtual_end) {
  1833. /* Don't run into the buffer cache. */
  1834. if (va == kmi.buffer_sva)
  1835. break;
  1836. pte = pte_find(kernel_pmap, va);
  1837. if (pte == NULL || !PTE_ISVALID(pte))
  1838. break;
  1839. va += PAGE_SIZE;
  1840. }
  1841. dump_map[2].pa_size = va - dump_map[2].pa_start;
  1842. }
  1843. }
  1844. /*
  1845. * Map a set of physical memory pages into the kernel virtual address space.
  1846. * Return a pointer to where it is mapped. This routine is intended to be used
  1847. * for mapping device memory, NOT real memory.
  1848. */
  1849. static void *
  1850. mmu_booke_mapdev(vm_paddr_t pa, vm_size_t size)
  1851. {
  1852. return (mmu_booke_mapdev_attr(pa, size, VM_MEMATTR_DEFAULT));
  1853. }
  1854. static int
  1855. tlb1_find_pa(vm_paddr_t pa, tlb_entry_t *e)
  1856. {
  1857. int i;
  1858. for (i = 0; i < TLB1_ENTRIES; i++) {
  1859. tlb1_read_entry(e, i);
  1860. if ((e->mas1 & MAS1_VALID) == 0)
  1861. continue;
  1862. if (e->phys == pa)
  1863. return (i);
  1864. }
  1865. return (-1);
  1866. }
  1867. static void *
  1868. mmu_booke_mapdev_attr(vm_paddr_t pa, vm_size_t size, vm_memattr_t ma)
  1869. {
  1870. tlb_entry_t e;
  1871. vm_paddr_t tmppa;
  1872. #ifndef __powerpc64__
  1873. uintptr_t tmpva;
  1874. #endif
  1875. uintptr_t va, retva;
  1876. vm_size_t sz;
  1877. int i;
  1878. int wimge;
  1879. /*
  1880. * Check if this is premapped in TLB1.
  1881. */
  1882. sz = size;
  1883. tmppa = pa;
  1884. va = ~0;
  1885. wimge = tlb_calc_wimg(pa, ma);
  1886. for (i = 0; i < TLB1_ENTRIES; i++) {
  1887. tlb1_read_entry(&e, i);
  1888. if (!(e.mas1 & MAS1_VALID))
  1889. continue;
  1890. if (wimge != (e.mas2 & (MAS2_WIMGE_MASK & ~_TLB_ENTRY_SHARED)))
  1891. continue;
  1892. if (tmppa >= e.phys && tmppa < e.phys + e.size) {
  1893. va = e.virt + (pa - e.phys);
  1894. tmppa = e.phys + e.size;
  1895. sz -= MIN(sz, e.size - (pa - e.phys));
  1896. while (sz > 0 && (i = tlb1_find_pa(tmppa, &e)) != -1) {
  1897. if (wimge != (e.mas2 & (MAS2_WIMGE_MASK & ~_TLB_ENTRY_SHARED)))
  1898. break;
  1899. sz -= MIN(sz, e.size);
  1900. tmppa = e.phys + e.size;
  1901. }
  1902. if (sz != 0)
  1903. break;
  1904. return ((void *)va);
  1905. }
  1906. }
  1907. size = roundup(size, PAGE_SIZE);
  1908. #ifdef __powerpc64__
  1909. KASSERT(pa < VM_MAPDEV_PA_MAX,
  1910. ("Unsupported physical address! %lx", pa));
  1911. va = VM_MAPDEV_BASE + pa;
  1912. retva = va;
  1913. #ifdef POW2_MAPPINGS
  1914. /*
  1915. * Align the mapping to a power of 2 size, taking into account that we
  1916. * may need to increase the size multiple times to satisfy the size and
  1917. * alignment requirements.
  1918. *
  1919. * This works in the general case because it's very rare (near never?)
  1920. * to have different access properties (WIMG) within a single
  1921. * power-of-two region. If a design does call for that, POW2_MAPPINGS
  1922. * can be undefined, and exact mappings will be used instead.
  1923. */
  1924. sz = size;
  1925. size = roundup2(size, 1 << ilog2(size));
  1926. while (rounddown2(va, size) + size < va + sz)
  1927. size <<= 1;
  1928. va = rounddown2(va, size);
  1929. pa = rounddown2(pa, size);
  1930. #endif
  1931. #else
  1932. /*
  1933. * The device mapping area is between VM_MAXUSER_ADDRESS and
  1934. * VM_MIN_KERNEL_ADDRESS. This gives 1GB of device addressing.
  1935. */
  1936. #ifdef SPARSE_MAPDEV
  1937. /*
  1938. * With a sparse mapdev, align to the largest starting region. This
  1939. * could feasibly be optimized for a 'best-fit' alignment, but that
  1940. * calculation could be very costly.
  1941. * Align to the smaller of:
  1942. * - first set bit in overlap of (pa & size mask)
  1943. * - largest size envelope
  1944. *
  1945. * It's possible the device mapping may start at a PA that's not larger
  1946. * than the size mask, so we need to offset in to maximize the TLB entry
  1947. * range and minimize the number of used TLB entries.
  1948. */
  1949. do {
  1950. tmpva = tlb1_map_base;
  1951. sz = ffsl((~((1 << flsl(size-1)) - 1)) & pa);
  1952. sz = sz ? min(roundup(sz + 3, 4), flsl(size) - 1) : flsl(size) - 1;
  1953. va = roundup(tlb1_map_base, 1 << sz) | (((1 << sz) - 1) & pa);
  1954. } while (!atomic_cmpset_int(&tlb1_map_base, tmpva, va + size));
  1955. #endif
  1956. va = atomic_fetchadd_int(&tlb1_map_base, size);
  1957. retva = va;
  1958. #endif
  1959. if (tlb1_mapin_region(va, pa, size, tlb_calc_wimg(pa, ma)) != size)
  1960. return (NULL);
  1961. return ((void *)retva);
  1962. }
  1963. /*
  1964. * 'Unmap' a range mapped by mmu_booke_mapdev().
  1965. */
  1966. static void
  1967. mmu_booke_unmapdev(vm_offset_t va, vm_size_t size)
  1968. {
  1969. #ifdef SUPPORTS_SHRINKING_TLB1
  1970. vm_offset_t base, offset;
  1971. /*
  1972. * Unmap only if this is inside kernel virtual space.
  1973. */
  1974. if ((va >= VM_MIN_KERNEL_ADDRESS) && (va <= VM_MAX_KERNEL_ADDRESS)) {
  1975. base = trunc_page(va);
  1976. offset = va & PAGE_MASK;
  1977. size = roundup(offset + size, PAGE_SIZE);
  1978. mmu_booke_qremove(base, atop(size));
  1979. kva_free(base, size);
  1980. }
  1981. #endif
  1982. }
  1983. /*
  1984. * mmu_booke_object_init_pt preloads the ptes for a given object into the
  1985. * specified pmap. This eliminates the blast of soft faults on process startup
  1986. * and immediately after an mmap.
  1987. */
  1988. static void
  1989. mmu_booke_object_init_pt(pmap_t pmap, vm_offset_t addr,
  1990. vm_object_t object, vm_pindex_t pindex, vm_size_t size)
  1991. {
  1992. VM_OBJECT_ASSERT_WLOCKED(object);
  1993. KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
  1994. ("mmu_booke_object_init_pt: non-device object"));
  1995. }
  1996. /*
  1997. * Perform the pmap work for mincore.
  1998. */
  1999. static int
  2000. mmu_booke_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *pap)
  2001. {
  2002. /* XXX: this should be implemented at some point */
  2003. return (0);
  2004. }
  2005. static int
  2006. mmu_booke_change_attr(vm_offset_t addr, vm_size_t sz, vm_memattr_t mode)
  2007. {
  2008. vm_offset_t va;
  2009. pte_t *pte;
  2010. int i, j;
  2011. tlb_entry_t e;
  2012. addr = trunc_page(addr);
  2013. /* Only allow changes to mapped kernel addresses. This includes:
  2014. * - KVA
  2015. * - DMAP (powerpc64)
  2016. * - Device mappings
  2017. */
  2018. if (addr <= VM_MAXUSER_ADDRESS ||
  2019. #ifdef __powerpc64__
  2020. (addr >= tlb1_map_base && addr < DMAP_BASE_ADDRESS) ||
  2021. (addr > DMAP_MAX_ADDRESS && addr < VM_MIN_KERNEL_ADDRESS) ||
  2022. #else
  2023. (addr >= tlb1_map_base && addr < VM_MIN_KERNEL_ADDRESS) ||
  2024. #endif
  2025. (addr > VM_MAX_KERNEL_ADDRESS))
  2026. return (EINVAL);
  2027. /* Check TLB1 mappings */
  2028. for (i = 0; i < TLB1_ENTRIES; i++) {
  2029. tlb1_read_entry(&e, i);
  2030. if (!(e.mas1 & MAS1_VALID))
  2031. continue;
  2032. if (addr >= e.virt && addr < e.virt + e.size)
  2033. break;
  2034. }
  2035. if (i < TLB1_ENTRIES) {
  2036. /* Only allow full mappings to be modified for now. */
  2037. /* Validate the range. */
  2038. for (j = i, va = addr; va < addr + sz; va += e.size, j++) {
  2039. tlb1_read_entry(&e, j);
  2040. if (va != e.virt || (sz - (va - addr) < e.size))
  2041. return (EINVAL);
  2042. }
  2043. for (va = addr; va < addr + sz; va += e.size, i++) {
  2044. tlb1_read_entry(&e, i);
  2045. e.mas2 &= ~MAS2_WIMGE_MASK;
  2046. e.mas2 |= tlb_calc_wimg(e.phys, mode);
  2047. /*
  2048. * Write it out to the TLB. Should really re-sync with other
  2049. * cores.
  2050. */
  2051. tlb1_write_entry(&e, i);
  2052. }
  2053. return (0);
  2054. }
  2055. /* Not in TLB1, try through pmap */
  2056. /* First validate the range. */
  2057. for (va = addr; va < addr + sz; va += PAGE_SIZE) {
  2058. pte = pte_find(kernel_pmap, va);
  2059. if (pte == NULL || !PTE_ISVALID(pte))
  2060. return (EINVAL);
  2061. }
  2062. mtx_lock_spin(&tlbivax_mutex);
  2063. tlb_miss_lock();
  2064. for (va = addr; va < addr + sz; va += PAGE_SIZE) {
  2065. pte = pte_find(kernel_pmap, va);
  2066. *pte &= ~(PTE_MAS2_MASK << PTE_MAS2_SHIFT);
  2067. *pte |= tlb_calc_wimg(PTE_PA(pte), mode) << PTE_MAS2_SHIFT;
  2068. tlb0_flush_entry(va);
  2069. }
  2070. tlb_miss_unlock();
  2071. mtx_unlock_spin(&tlbivax_mutex);
  2072. return (0);
  2073. }
  2074. static void
  2075. mmu_booke_page_array_startup(long pages)
  2076. {
  2077. vm_page_array_size = pages;
  2078. }
  2079. /**************************************************************************/
  2080. /* TID handling */
  2081. /**************************************************************************/
  2082. /*
  2083. * Allocate a TID. If necessary, steal one from someone else.
  2084. * The new TID is flushed from the TLB before returning.
  2085. */
  2086. static tlbtid_t
  2087. tid_alloc(pmap_t pmap)
  2088. {
  2089. tlbtid_t tid;
  2090. int thiscpu;
  2091. KASSERT((pmap != kernel_pmap), ("tid_alloc: kernel pmap"));
  2092. CTR2(KTR_PMAP, "%s: s (pmap = %p)", __func__, pmap);
  2093. thiscpu = PCPU_GET(cpuid);
  2094. tid = PCPU_GET(booke.tid_next);
  2095. if (tid > TID_MAX)
  2096. tid = TID_MIN;
  2097. PCPU_SET(booke.tid_next, tid + 1);
  2098. /* If we are stealing TID then clear the relevant pmap's field */
  2099. if (tidbusy[thiscpu][tid] != NULL) {
  2100. CTR2(KTR_PMAP, "%s: warning: stealing tid %d", __func__, tid);
  2101. tidbusy[thiscpu][tid]->pm_tid[thiscpu] = TID_NONE;
  2102. /* Flush all entries from TLB0 matching this TID. */
  2103. tid_flush(tid);
  2104. }
  2105. tidbusy[thiscpu][tid] = pmap;
  2106. pmap->pm_tid[thiscpu] = tid;
  2107. __asm __volatile("msync; isync");
  2108. CTR3(KTR_PMAP, "%s: e (%02d next = %02d)", __func__, tid,
  2109. PCPU_GET(booke.tid_next));
  2110. return (tid);
  2111. }
  2112. /**************************************************************************/
  2113. /* TLB0 handling */
  2114. /**************************************************************************/
  2115. /* Convert TLB0 va and way number to tlb0[] table index. */
  2116. static inline unsigned int
  2117. tlb0_tableidx(vm_offset_t va, unsigned int way)
  2118. {
  2119. unsigned int idx;
  2120. idx = (way * TLB0_ENTRIES_PER_WAY);
  2121. idx += (va & MAS2_TLB0_ENTRY_IDX_MASK) >> MAS2_TLB0_ENTRY_IDX_SHIFT;
  2122. return (idx);
  2123. }
  2124. /*
  2125. * Invalidate TLB0 entry.
  2126. */
  2127. static inline void
  2128. tlb0_flush_entry(vm_offset_t va)
  2129. {
  2130. CTR2(KTR_PMAP, "%s: s va=0x%08x", __func__, va);
  2131. mtx_assert(&tlbivax_mutex, MA_OWNED);
  2132. __asm __volatile("tlbivax 0, %0" :: "r"(va & MAS2_EPN_MASK));
  2133. __asm __volatile("isync; msync");
  2134. __asm __volatile("tlbsync; msync");
  2135. CTR1(KTR_PMAP, "%s: e", __func__);
  2136. }
  2137. /**************************************************************************/
  2138. /* TLB1 handling */
  2139. /**************************************************************************/
  2140. /*
  2141. * TLB1 mapping notes:
  2142. *
  2143. * TLB1[0] Kernel text and data.
  2144. * TLB1[1-15] Additional kernel text and data mappings (if required), PCI
  2145. * windows, other devices mappings.
  2146. */
  2147. /*
  2148. * Read an entry from given TLB1 slot.
  2149. */
  2150. void
  2151. tlb1_read_entry(tlb_entry_t *entry, unsigned int slot)
  2152. {
  2153. register_t msr;
  2154. uint32_t mas0;
  2155. KASSERT((entry != NULL), ("%s(): Entry is NULL!", __func__));
  2156. msr = mfmsr();
  2157. __asm __volatile("wrteei 0");
  2158. mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(slot);
  2159. mtspr(SPR_MAS0, mas0);
  2160. __asm __volatile("isync; tlbre");
  2161. entry->mas1 = mfspr(SPR_MAS1);
  2162. entry->mas2 = mfspr(SPR_MAS2);
  2163. entry->mas3 = mfspr(SPR_MAS3);
  2164. switch ((mfpvr() >> 16) & 0xFFFF) {
  2165. case FSL_E500v2:
  2166. case FSL_E500mc:
  2167. case FSL_E5500:
  2168. case FSL_E6500:
  2169. entry->mas7 = mfspr(SPR_MAS7);
  2170. break;
  2171. default:
  2172. entry->mas7 = 0;
  2173. break;
  2174. }
  2175. __asm __volatile("wrtee %0" :: "r"(msr));
  2176. entry->virt = entry->mas2 & MAS2_EPN_MASK;
  2177. entry->phys = ((vm_paddr_t)(entry->mas7 & MAS7_RPN) << 32) |
  2178. (entry->mas3 & MAS3_RPN);
  2179. entry->size =
  2180. tsize2size((entry->mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT);
  2181. }
  2182. struct tlbwrite_args {
  2183. tlb_entry_t *e;
  2184. unsigned int idx;
  2185. };
  2186. static uint32_t
  2187. tlb1_find_free(void)
  2188. {
  2189. tlb_entry_t e;
  2190. int i;
  2191. for (i = 0; i < TLB1_ENTRIES; i++) {
  2192. tlb1_read_entry(&e, i);
  2193. if ((e.mas1 & MAS1_VALID) == 0)
  2194. return (i);
  2195. }
  2196. return (-1);
  2197. }
  2198. static void
  2199. tlb1_purge_va_range(vm_offset_t va, vm_size_t size)
  2200. {
  2201. tlb_entry_t e;
  2202. int i;
  2203. for (i = 0; i < TLB1_ENTRIES; i++) {
  2204. tlb1_read_entry(&e, i);
  2205. if ((e.mas1 & MAS1_VALID) == 0)
  2206. continue;
  2207. if ((e.mas2 & MAS2_EPN_MASK) >= va &&
  2208. (e.mas2 & MAS2_EPN_MASK) < va + size) {
  2209. mtspr(SPR_MAS1, e.mas1 & ~MAS1_VALID);
  2210. __asm __volatile("isync; tlbwe; isync; msync");
  2211. }
  2212. }
  2213. }
  2214. static void
  2215. tlb1_write_entry_int(void *arg)
  2216. {
  2217. struct tlbwrite_args *args = arg;
  2218. uint32_t idx, mas0;
  2219. idx = args->idx;
  2220. if (idx == -1) {
  2221. tlb1_purge_va_range(args->e->virt, args->e->size);
  2222. idx = tlb1_find_free();
  2223. if (idx == -1)
  2224. panic("No free TLB1 entries!\n");
  2225. }
  2226. /* Select entry */
  2227. mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(idx);
  2228. mtspr(SPR_MAS0, mas0);
  2229. mtspr(SPR_MAS1, args->e->mas1);
  2230. mtspr(SPR_MAS2, args->e->mas2);
  2231. mtspr(SPR_MAS3, args->e->mas3);
  2232. switch ((mfpvr() >> 16) & 0xFFFF) {
  2233. case FSL_E500mc:
  2234. case FSL_E5500:
  2235. case FSL_E6500:
  2236. mtspr(SPR_MAS8, 0);
  2237. /* FALLTHROUGH */
  2238. case FSL_E500v2:
  2239. mtspr(SPR_MAS7, args->e->mas7);
  2240. break;
  2241. default:
  2242. break;
  2243. }
  2244. __asm __volatile("isync; tlbwe; isync; msync");
  2245. }
  2246. static void
  2247. tlb1_write_entry_sync(void *arg)
  2248. {
  2249. /* Empty synchronization point for smp_rendezvous(). */
  2250. }
  2251. /*
  2252. * Write given entry to TLB1 hardware.
  2253. */
  2254. static void
  2255. tlb1_write_entry(tlb_entry_t *e, unsigned int idx)
  2256. {
  2257. struct tlbwrite_args args;
  2258. args.e = e;
  2259. args.idx = idx;
  2260. #ifdef SMP
  2261. if ((e->mas2 & _TLB_ENTRY_SHARED) && smp_started) {
  2262. mb();
  2263. smp_rendezvous(tlb1_write_entry_sync,
  2264. tlb1_write_entry_int,
  2265. tlb1_write_entry_sync, &args);
  2266. } else
  2267. #endif
  2268. {
  2269. register_t msr;
  2270. msr = mfmsr();
  2271. __asm __volatile("wrteei 0");
  2272. tlb1_write_entry_int(&args);
  2273. __asm __volatile("wrtee %0" :: "r"(msr));
  2274. }
  2275. }
  2276. /*
  2277. * Convert TLB TSIZE value to mapped region size.
  2278. */
  2279. static vm_size_t
  2280. tsize2size(unsigned int tsize)
  2281. {
  2282. /*
  2283. * size = 4^tsize KB
  2284. * size = 4^tsize * 2^10 = 2^(2 * tsize - 10)
  2285. */
  2286. return ((1 << (2 * tsize)) * 1024);
  2287. }
  2288. /*
  2289. * Convert region size (must be power of 4) to TLB TSIZE value.
  2290. */
  2291. static unsigned int
  2292. size2tsize(vm_size_t size)
  2293. {
  2294. return (ilog2(size) / 2 - 5);
  2295. }
  2296. /*
  2297. * Register permanent kernel mapping in TLB1.
  2298. *
  2299. * Entries are created starting from index 0 (current free entry is
  2300. * kept in tlb1_idx) and are not supposed to be invalidated.
  2301. */
  2302. int
  2303. tlb1_set_entry(vm_offset_t va, vm_paddr_t pa, vm_size_t size,
  2304. uint32_t flags)
  2305. {
  2306. tlb_entry_t e;
  2307. uint32_t ts, tid;
  2308. int tsize, index;
  2309. /* First try to update an existing entry. */
  2310. for (index = 0; index < TLB1_ENTRIES; index++) {
  2311. tlb1_read_entry(&e, index);
  2312. /* Check if we're just updating the flags, and update them. */
  2313. if (e.phys == pa && e.virt == va && e.size == size) {
  2314. e.mas2 = (va & MAS2_EPN_MASK) | flags;
  2315. tlb1_write_entry(&e, index);
  2316. return (0);
  2317. }
  2318. }
  2319. /* Convert size to TSIZE */
  2320. tsize = size2tsize(size);
  2321. tid = (TID_KERNEL << MAS1_TID_SHIFT) & MAS1_TID_MASK;
  2322. /* XXX TS is hard coded to 0 for now as we only use single address space */
  2323. ts = (0 << MAS1_TS_SHIFT) & MAS1_TS_MASK;
  2324. e.phys = pa;
  2325. e.virt = va;
  2326. e.size = size;
  2327. e.mas1 = MAS1_VALID | MAS1_IPROT | ts | tid;
  2328. e.mas1 |= ((tsize << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK);
  2329. e.mas2 = (va & MAS2_EPN_MASK) | flags;
  2330. /* Set supervisor RWX permission bits */
  2331. e.mas3 = (pa & MAS3_RPN) | MAS3_SR | MAS3_SW | MAS3_SX;
  2332. e.mas7 = (pa >> 32) & MAS7_RPN;
  2333. tlb1_write_entry(&e, -1);
  2334. return (0);
  2335. }
  2336. /*
  2337. * Map in contiguous RAM region into the TLB1.
  2338. */
  2339. static vm_size_t
  2340. tlb1_mapin_region(vm_offset_t va, vm_paddr_t pa, vm_size_t size, int wimge)
  2341. {
  2342. vm_offset_t base;
  2343. vm_size_t mapped, sz, ssize;
  2344. mapped = 0;
  2345. base = va;
  2346. ssize = size;
  2347. while (size > 0) {
  2348. sz = 1UL << (ilog2(size) & ~1);
  2349. /* Align size to PA */
  2350. if (pa % sz != 0) {
  2351. do {
  2352. sz >>= 2;
  2353. } while (pa % sz != 0);
  2354. }
  2355. /* Now align from there to VA */
  2356. if (va % sz != 0) {
  2357. do {
  2358. sz >>= 2;
  2359. } while (va % sz != 0);
  2360. }
  2361. #ifdef __powerpc64__
  2362. /*
  2363. * Clamp TLB1 entries to 4G.
  2364. *
  2365. * While the e6500 supports up to 1TB mappings, the e5500
  2366. * only supports up to 4G mappings. (0b1011)
  2367. *
  2368. * If any e6500 machines capable of supporting a very
  2369. * large amount of memory appear in the future, we can
  2370. * revisit this.
  2371. *
  2372. * For now, though, since we have plenty of space in TLB1,
  2373. * always avoid creating entries larger than 4GB.
  2374. */
  2375. sz = MIN(sz, 1UL << 32);
  2376. #endif
  2377. if (bootverbose)
  2378. printf("Wiring VA=%p to PA=%jx (size=%lx)\n",
  2379. (void *)va, (uintmax_t)pa, (long)sz);
  2380. if (tlb1_set_entry(va, pa, sz,
  2381. _TLB_ENTRY_SHARED | wimge) < 0)
  2382. return (mapped);
  2383. size -= sz;
  2384. pa += sz;
  2385. va += sz;
  2386. }
  2387. mapped = (va - base);
  2388. if (bootverbose)
  2389. printf("mapped size 0x%"PRIxPTR" (wasted space 0x%"PRIxPTR")\n",
  2390. mapped, mapped - ssize);
  2391. return (mapped);
  2392. }
  2393. /*
  2394. * TLB1 initialization routine, to be called after the very first
  2395. * assembler level setup done in locore.S.
  2396. */
  2397. void
  2398. tlb1_init()
  2399. {
  2400. vm_offset_t mas2;
  2401. uint32_t mas0, mas1, mas3, mas7;
  2402. uint32_t tsz;
  2403. tlb1_get_tlbconf();
  2404. mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(0);
  2405. mtspr(SPR_MAS0, mas0);
  2406. __asm __volatile("isync; tlbre");
  2407. mas1 = mfspr(SPR_MAS1);
  2408. mas2 = mfspr(SPR_MAS2);
  2409. mas3 = mfspr(SPR_MAS3);
  2410. mas7 = mfspr(SPR_MAS7);
  2411. kernload = ((vm_paddr_t)(mas7 & MAS7_RPN) << 32) |
  2412. (mas3 & MAS3_RPN);
  2413. tsz = (mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT;
  2414. kernsize += (tsz > 0) ? tsize2size(tsz) : 0;
  2415. kernstart = trunc_page(mas2);
  2416. /* Setup TLB miss defaults */
  2417. set_mas4_defaults();
  2418. }
  2419. /*
  2420. * pmap_early_io_unmap() should be used in short conjunction with
  2421. * pmap_early_io_map(), as in the following snippet:
  2422. *
  2423. * x = pmap_early_io_map(...);
  2424. * <do something with x>
  2425. * pmap_early_io_unmap(x, size);
  2426. *
  2427. * And avoiding more allocations between.
  2428. */
  2429. void
  2430. pmap_early_io_unmap(vm_offset_t va, vm_size_t size)
  2431. {
  2432. int i;
  2433. tlb_entry_t e;
  2434. vm_size_t isize;
  2435. size = roundup(size, PAGE_SIZE);
  2436. isize = size;
  2437. for (i = 0; i < TLB1_ENTRIES && size > 0; i++) {
  2438. tlb1_read_entry(&e, i);
  2439. if (!(e.mas1 & MAS1_VALID))
  2440. continue;
  2441. if (va <= e.virt && (va + isize) >= (e.virt + e.size)) {
  2442. size -= e.size;
  2443. e.mas1 &= ~MAS1_VALID;
  2444. tlb1_write_entry(&e, i);
  2445. }
  2446. }
  2447. if (tlb1_map_base == va + isize)
  2448. tlb1_map_base -= isize;
  2449. }
  2450. vm_offset_t
  2451. pmap_early_io_map(vm_paddr_t pa, vm_size_t size)
  2452. {
  2453. vm_paddr_t pa_base;
  2454. vm_offset_t va, sz;
  2455. int i;
  2456. tlb_entry_t e;
  2457. KASSERT(!pmap_bootstrapped, ("Do not use after PMAP is up!"));
  2458. for (i = 0; i < TLB1_ENTRIES; i++) {
  2459. tlb1_read_entry(&e, i);
  2460. if (!(e.mas1 & MAS1_VALID))
  2461. continue;
  2462. if (pa >= e.phys && (pa + size) <=
  2463. (e.phys + e.size))
  2464. return (e.virt + (pa - e.phys));
  2465. }
  2466. pa_base = rounddown(pa, PAGE_SIZE);
  2467. size = roundup(size + (pa - pa_base), PAGE_SIZE);
  2468. tlb1_map_base = roundup2(tlb1_map_base, 1 << (ilog2(size) & ~1));
  2469. va = tlb1_map_base + (pa - pa_base);
  2470. do {
  2471. sz = 1 << (ilog2(size) & ~1);
  2472. tlb1_set_entry(tlb1_map_base, pa_base, sz,
  2473. _TLB_ENTRY_SHARED | _TLB_ENTRY_IO);
  2474. size -= sz;
  2475. pa_base += sz;
  2476. tlb1_map_base += sz;
  2477. } while (size > 0);
  2478. return (va);
  2479. }
  2480. void
  2481. pmap_track_page(pmap_t pmap, vm_offset_t va)
  2482. {
  2483. vm_paddr_t pa;
  2484. vm_page_t page;
  2485. struct pv_entry *pve;
  2486. va = trunc_page(va);
  2487. pa = pmap_kextract(va);
  2488. page = PHYS_TO_VM_PAGE(pa);
  2489. rw_wlock(&pvh_global_lock);
  2490. PMAP_LOCK(pmap);
  2491. TAILQ_FOREACH(pve, &page->md.pv_list, pv_link) {
  2492. if ((pmap == pve->pv_pmap) && (va == pve->pv_va)) {
  2493. goto out;
  2494. }
  2495. }
  2496. page->md.pv_tracked = true;
  2497. pv_insert(pmap, va, page);
  2498. out:
  2499. PMAP_UNLOCK(pmap);
  2500. rw_wunlock(&pvh_global_lock);
  2501. }
  2502. /*
  2503. * Setup MAS4 defaults.
  2504. * These values are loaded to MAS0-2 on a TLB miss.
  2505. */
  2506. static void
  2507. set_mas4_defaults(void)
  2508. {
  2509. uint32_t mas4;
  2510. /* Defaults: TLB0, PID0, TSIZED=4K */
  2511. mas4 = MAS4_TLBSELD0;
  2512. mas4 |= (TLB_SIZE_4K << MAS4_TSIZED_SHIFT) & MAS4_TSIZED_MASK;
  2513. #ifdef SMP
  2514. mas4 |= MAS4_MD;
  2515. #endif
  2516. mtspr(SPR_MAS4, mas4);
  2517. __asm __volatile("isync");
  2518. }
  2519. /*
  2520. * Return 0 if the physical IO range is encompassed by one of the
  2521. * the TLB1 entries, otherwise return related error code.
  2522. */
  2523. static int
  2524. tlb1_iomapped(int i, vm_paddr_t pa, vm_size_t size, vm_offset_t *va)
  2525. {
  2526. uint32_t prot;
  2527. vm_paddr_t pa_start;
  2528. vm_paddr_t pa_end;
  2529. unsigned int entry_tsize;
  2530. vm_size_t entry_size;
  2531. tlb_entry_t e;
  2532. *va = (vm_offset_t)NULL;
  2533. tlb1_read_entry(&e, i);
  2534. /* Skip invalid entries */
  2535. if (!(e.mas1 & MAS1_VALID))
  2536. return (EINVAL);
  2537. /*
  2538. * The entry must be cache-inhibited, guarded, and r/w
  2539. * so it can function as an i/o page
  2540. */
  2541. prot = e.mas2 & (MAS2_I | MAS2_G);
  2542. if (prot != (MAS2_I | MAS2_G))
  2543. return (EPERM);
  2544. prot = e.mas3 & (MAS3_SR | MAS3_SW);
  2545. if (prot != (MAS3_SR | MAS3_SW))
  2546. return (EPERM);
  2547. /* The address should be within the entry range. */
  2548. entry_tsize = (e.mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT;
  2549. KASSERT((entry_tsize), ("tlb1_iomapped: invalid entry tsize"));
  2550. entry_size = tsize2size(entry_tsize);
  2551. pa_start = (((vm_paddr_t)e.mas7 & MAS7_RPN) << 32) |
  2552. (e.mas3 & MAS3_RPN);
  2553. pa_end = pa_start + entry_size;
  2554. if ((pa < pa_start) || ((pa + size) > pa_end))
  2555. return (ERANGE);
  2556. /* Return virtual address of this mapping. */
  2557. *va = (e.mas2 & MAS2_EPN_MASK) + (pa - pa_start);
  2558. return (0);
  2559. }
  2560. #ifdef DDB
  2561. /* Print out contents of the MAS registers for each TLB0 entry */
  2562. static void
  2563. #ifdef __powerpc64__
  2564. tlb_print_entry(int i, uint32_t mas1, uint64_t mas2, uint32_t mas3,
  2565. #else
  2566. tlb_print_entry(int i, uint32_t mas1, uint32_t mas2, uint32_t mas3,
  2567. #endif
  2568. uint32_t mas7)
  2569. {
  2570. int as;
  2571. char desc[3];
  2572. tlbtid_t tid;
  2573. vm_size_t size;
  2574. unsigned int tsize;
  2575. desc[2] = '\0';
  2576. if (mas1 & MAS1_VALID)
  2577. desc[0] = 'V';
  2578. else
  2579. desc[0] = ' ';
  2580. if (mas1 & MAS1_IPROT)
  2581. desc[1] = 'P';
  2582. else
  2583. desc[1] = ' ';
  2584. as = (mas1 & MAS1_TS_MASK) ? 1 : 0;
  2585. tid = MAS1_GETTID(mas1);
  2586. tsize = (mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT;
  2587. size = 0;
  2588. if (tsize)
  2589. size = tsize2size(tsize);
  2590. printf("%3d: (%s) [AS=%d] "
  2591. "sz = 0x%jx tsz = %d tid = %d mas1 = 0x%08x "
  2592. "mas2(va) = 0x%"PRI0ptrX" mas3(pa) = 0x%08x mas7 = 0x%08x\n",
  2593. i, desc, as, (uintmax_t)size, tsize, tid, mas1, mas2, mas3, mas7);
  2594. }
  2595. DB_SHOW_COMMAND(tlb0, tlb0_print_tlbentries)
  2596. {
  2597. uint32_t mas0, mas1, mas3, mas7;
  2598. #ifdef __powerpc64__
  2599. uint64_t mas2;
  2600. #else
  2601. uint32_t mas2;
  2602. #endif
  2603. int entryidx, way, idx;
  2604. printf("TLB0 entries:\n");
  2605. for (way = 0; way < TLB0_WAYS; way ++)
  2606. for (entryidx = 0; entryidx < TLB0_ENTRIES_PER_WAY; entryidx++) {
  2607. mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way);
  2608. mtspr(SPR_MAS0, mas0);
  2609. mas2 = entryidx << MAS2_TLB0_ENTRY_IDX_SHIFT;
  2610. mtspr(SPR_MAS2, mas2);
  2611. __asm __volatile("isync; tlbre");
  2612. mas1 = mfspr(SPR_MAS1);
  2613. mas2 = mfspr(SPR_MAS2);
  2614. mas3 = mfspr(SPR_MAS3);
  2615. mas7 = mfspr(SPR_MAS7);
  2616. idx = tlb0_tableidx(mas2, way);
  2617. tlb_print_entry(idx, mas1, mas2, mas3, mas7);
  2618. }
  2619. }
  2620. /*
  2621. * Print out contents of the MAS registers for each TLB1 entry
  2622. */
  2623. DB_SHOW_COMMAND(tlb1, tlb1_print_tlbentries)
  2624. {
  2625. uint32_t mas0, mas1, mas3, mas7;
  2626. #ifdef __powerpc64__
  2627. uint64_t mas2;
  2628. #else
  2629. uint32_t mas2;
  2630. #endif
  2631. int i;
  2632. printf("TLB1 entries:\n");
  2633. for (i = 0; i < TLB1_ENTRIES; i++) {
  2634. mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(i);
  2635. mtspr(SPR_MAS0, mas0);
  2636. __asm __volatile("isync; tlbre");
  2637. mas1 = mfspr(SPR_MAS1);
  2638. mas2 = mfspr(SPR_MAS2);
  2639. mas3 = mfspr(SPR_MAS3);
  2640. mas7 = mfspr(SPR_MAS7);
  2641. tlb_print_entry(i, mas1, mas2, mas3, mas7);
  2642. }
  2643. }
  2644. #endif