mmu.c 27 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036
  1. /*
  2. * Based on arch/arm/mm/mmu.c
  3. *
  4. * Copyright (C) 1995-2005 Russell King
  5. * Copyright (C) 2012 ARM Ltd.
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License version 2 as
  9. * published by the Free Software Foundation.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  18. */
  19. #include <linux/cache.h>
  20. #include <linux/export.h>
  21. #include <linux/kernel.h>
  22. #include <linux/errno.h>
  23. #include <linux/init.h>
  24. #include <linux/ioport.h>
  25. #include <linux/kexec.h>
  26. #include <linux/libfdt.h>
  27. #include <linux/mman.h>
  28. #include <linux/nodemask.h>
  29. #include <linux/memblock.h>
  30. #include <linux/fs.h>
  31. #include <linux/io.h>
  32. #include <linux/mm.h>
  33. #include <linux/vmalloc.h>
  34. #include <asm/barrier.h>
  35. #include <asm/cputype.h>
  36. #include <asm/fixmap.h>
  37. #include <asm/kasan.h>
  38. #include <asm/kernel-pgtable.h>
  39. #include <asm/sections.h>
  40. #include <asm/setup.h>
  41. #include <asm/sizes.h>
  42. #include <asm/tlb.h>
  43. #include <asm/memblock.h>
  44. #include <asm/mmu_context.h>
  45. #include <asm/ptdump.h>
  46. #include <asm/tlbflush.h>
  47. #define NO_BLOCK_MAPPINGS BIT(0)
  48. #define NO_CONT_MAPPINGS BIT(1)
  49. u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
  50. u64 idmap_ptrs_per_pgd = PTRS_PER_PGD;
  51. u64 kimage_voffset __ro_after_init;
  52. EXPORT_SYMBOL(kimage_voffset);
  53. /*
  54. * Empty_zero_page is a special page that is used for zero-initialized data
  55. * and COW.
  56. */
  57. unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
  58. EXPORT_SYMBOL(empty_zero_page);
  59. static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
  60. static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused;
  61. static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused;
  62. pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
  63. unsigned long size, pgprot_t vma_prot)
  64. {
  65. if (!pfn_valid(pfn))
  66. return pgprot_noncached(vma_prot);
  67. else if (file->f_flags & O_SYNC)
  68. return pgprot_writecombine(vma_prot);
  69. return vma_prot;
  70. }
  71. EXPORT_SYMBOL(phys_mem_access_prot);
  72. static phys_addr_t __init early_pgtable_alloc(void)
  73. {
  74. phys_addr_t phys;
  75. void *ptr;
  76. phys = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
  77. /*
  78. * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE
  79. * slot will be free, so we can (ab)use the FIX_PTE slot to initialise
  80. * any level of table.
  81. */
  82. ptr = pte_set_fixmap(phys);
  83. memset(ptr, 0, PAGE_SIZE);
  84. /*
  85. * Implicit barriers also ensure the zeroed page is visible to the page
  86. * table walker
  87. */
  88. pte_clear_fixmap();
  89. return phys;
  90. }
  91. static bool pgattr_change_is_safe(u64 old, u64 new)
  92. {
  93. /*
  94. * The following mapping attributes may be updated in live
  95. * kernel mappings without the need for break-before-make.
  96. */
  97. static const pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG;
  98. /* creating or taking down mappings is always safe */
  99. if (old == 0 || new == 0)
  100. return true;
  101. /* live contiguous mappings may not be manipulated at all */
  102. if ((old | new) & PTE_CONT)
  103. return false;
  104. /* Transitioning from Non-Global to Global is unsafe */
  105. if (old & ~new & PTE_NG)
  106. return false;
  107. return ((old ^ new) & ~mask) == 0;
  108. }
  109. static void init_pte(pmd_t *pmdp, unsigned long addr, unsigned long end,
  110. phys_addr_t phys, pgprot_t prot)
  111. {
  112. pte_t *ptep;
  113. ptep = pte_set_fixmap_offset(pmdp, addr);
  114. do {
  115. pte_t old_pte = READ_ONCE(*ptep);
  116. set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot));
  117. /*
  118. * After the PTE entry has been populated once, we
  119. * only allow updates to the permission attributes.
  120. */
  121. BUG_ON(!pgattr_change_is_safe(pte_val(old_pte),
  122. READ_ONCE(pte_val(*ptep))));
  123. phys += PAGE_SIZE;
  124. } while (ptep++, addr += PAGE_SIZE, addr != end);
  125. pte_clear_fixmap();
  126. }
  127. static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
  128. unsigned long end, phys_addr_t phys,
  129. pgprot_t prot,
  130. phys_addr_t (*pgtable_alloc)(void),
  131. int flags)
  132. {
  133. unsigned long next;
  134. pmd_t pmd = READ_ONCE(*pmdp);
  135. BUG_ON(pmd_sect(pmd));
  136. if (pmd_none(pmd)) {
  137. phys_addr_t pte_phys;
  138. BUG_ON(!pgtable_alloc);
  139. pte_phys = pgtable_alloc();
  140. __pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE);
  141. pmd = READ_ONCE(*pmdp);
  142. }
  143. BUG_ON(pmd_bad(pmd));
  144. do {
  145. pgprot_t __prot = prot;
  146. next = pte_cont_addr_end(addr, end);
  147. /* use a contiguous mapping if the range is suitably aligned */
  148. if ((((addr | next | phys) & ~CONT_PTE_MASK) == 0) &&
  149. (flags & NO_CONT_MAPPINGS) == 0)
  150. __prot = __pgprot(pgprot_val(prot) | PTE_CONT);
  151. init_pte(pmdp, addr, next, phys, __prot);
  152. phys += next - addr;
  153. } while (addr = next, addr != end);
  154. }
  155. static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end,
  156. phys_addr_t phys, pgprot_t prot,
  157. phys_addr_t (*pgtable_alloc)(void), int flags)
  158. {
  159. unsigned long next;
  160. pmd_t *pmdp;
  161. pmdp = pmd_set_fixmap_offset(pudp, addr);
  162. do {
  163. pmd_t old_pmd = READ_ONCE(*pmdp);
  164. next = pmd_addr_end(addr, end);
  165. /* try section mapping first */
  166. if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
  167. (flags & NO_BLOCK_MAPPINGS) == 0) {
  168. pmd_set_huge(pmdp, phys, prot);
  169. /*
  170. * After the PMD entry has been populated once, we
  171. * only allow updates to the permission attributes.
  172. */
  173. BUG_ON(!pgattr_change_is_safe(pmd_val(old_pmd),
  174. READ_ONCE(pmd_val(*pmdp))));
  175. } else {
  176. alloc_init_cont_pte(pmdp, addr, next, phys, prot,
  177. pgtable_alloc, flags);
  178. BUG_ON(pmd_val(old_pmd) != 0 &&
  179. pmd_val(old_pmd) != READ_ONCE(pmd_val(*pmdp)));
  180. }
  181. phys += next - addr;
  182. } while (pmdp++, addr = next, addr != end);
  183. pmd_clear_fixmap();
  184. }
  185. static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
  186. unsigned long end, phys_addr_t phys,
  187. pgprot_t prot,
  188. phys_addr_t (*pgtable_alloc)(void), int flags)
  189. {
  190. unsigned long next;
  191. pud_t pud = READ_ONCE(*pudp);
  192. /*
  193. * Check for initial section mappings in the pgd/pud.
  194. */
  195. BUG_ON(pud_sect(pud));
  196. if (pud_none(pud)) {
  197. phys_addr_t pmd_phys;
  198. BUG_ON(!pgtable_alloc);
  199. pmd_phys = pgtable_alloc();
  200. __pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE);
  201. pud = READ_ONCE(*pudp);
  202. }
  203. BUG_ON(pud_bad(pud));
  204. do {
  205. pgprot_t __prot = prot;
  206. next = pmd_cont_addr_end(addr, end);
  207. /* use a contiguous mapping if the range is suitably aligned */
  208. if ((((addr | next | phys) & ~CONT_PMD_MASK) == 0) &&
  209. (flags & NO_CONT_MAPPINGS) == 0)
  210. __prot = __pgprot(pgprot_val(prot) | PTE_CONT);
  211. init_pmd(pudp, addr, next, phys, __prot, pgtable_alloc, flags);
  212. phys += next - addr;
  213. } while (addr = next, addr != end);
  214. }
  215. static inline bool use_1G_block(unsigned long addr, unsigned long next,
  216. unsigned long phys)
  217. {
  218. if (PAGE_SHIFT != 12)
  219. return false;
  220. if (((addr | next | phys) & ~PUD_MASK) != 0)
  221. return false;
  222. return true;
  223. }
  224. static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
  225. phys_addr_t phys, pgprot_t prot,
  226. phys_addr_t (*pgtable_alloc)(void),
  227. int flags)
  228. {
  229. unsigned long next;
  230. pud_t *pudp;
  231. pgd_t pgd = READ_ONCE(*pgdp);
  232. if (pgd_none(pgd)) {
  233. phys_addr_t pud_phys;
  234. BUG_ON(!pgtable_alloc);
  235. pud_phys = pgtable_alloc();
  236. __pgd_populate(pgdp, pud_phys, PUD_TYPE_TABLE);
  237. pgd = READ_ONCE(*pgdp);
  238. }
  239. BUG_ON(pgd_bad(pgd));
  240. pudp = pud_set_fixmap_offset(pgdp, addr);
  241. do {
  242. pud_t old_pud = READ_ONCE(*pudp);
  243. next = pud_addr_end(addr, end);
  244. /*
  245. * For 4K granule only, attempt to put down a 1GB block
  246. */
  247. if (use_1G_block(addr, next, phys) &&
  248. (flags & NO_BLOCK_MAPPINGS) == 0) {
  249. pud_set_huge(pudp, phys, prot);
  250. /*
  251. * After the PUD entry has been populated once, we
  252. * only allow updates to the permission attributes.
  253. */
  254. BUG_ON(!pgattr_change_is_safe(pud_val(old_pud),
  255. READ_ONCE(pud_val(*pudp))));
  256. } else {
  257. alloc_init_cont_pmd(pudp, addr, next, phys, prot,
  258. pgtable_alloc, flags);
  259. BUG_ON(pud_val(old_pud) != 0 &&
  260. pud_val(old_pud) != READ_ONCE(pud_val(*pudp)));
  261. }
  262. phys += next - addr;
  263. } while (pudp++, addr = next, addr != end);
  264. pud_clear_fixmap();
  265. }
  266. static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
  267. unsigned long virt, phys_addr_t size,
  268. pgprot_t prot,
  269. phys_addr_t (*pgtable_alloc)(void),
  270. int flags)
  271. {
  272. unsigned long addr, length, end, next;
  273. pgd_t *pgdp = pgd_offset_raw(pgdir, virt);
  274. /*
  275. * If the virtual and physical address don't have the same offset
  276. * within a page, we cannot map the region as the caller expects.
  277. */
  278. if (WARN_ON((phys ^ virt) & ~PAGE_MASK))
  279. return;
  280. phys &= PAGE_MASK;
  281. addr = virt & PAGE_MASK;
  282. length = PAGE_ALIGN(size + (virt & ~PAGE_MASK));
  283. end = addr + length;
  284. do {
  285. next = pgd_addr_end(addr, end);
  286. alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc,
  287. flags);
  288. phys += next - addr;
  289. } while (pgdp++, addr = next, addr != end);
  290. }
  291. static phys_addr_t pgd_pgtable_alloc(void)
  292. {
  293. void *ptr = (void *)__get_free_page(PGALLOC_GFP);
  294. if (!ptr || !pgtable_page_ctor(virt_to_page(ptr)))
  295. BUG();
  296. /* Ensure the zeroed page is visible to the page table walker */
  297. dsb(ishst);
  298. return __pa(ptr);
  299. }
  300. /*
  301. * This function can only be used to modify existing table entries,
  302. * without allocating new levels of table. Note that this permits the
  303. * creation of new section or page entries.
  304. */
  305. static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
  306. phys_addr_t size, pgprot_t prot)
  307. {
  308. if (virt < VMALLOC_START) {
  309. pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n",
  310. &phys, virt);
  311. return;
  312. }
  313. __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL,
  314. NO_CONT_MAPPINGS);
  315. }
  316. void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
  317. unsigned long virt, phys_addr_t size,
  318. pgprot_t prot, bool page_mappings_only)
  319. {
  320. int flags = 0;
  321. BUG_ON(mm == &init_mm);
  322. if (page_mappings_only)
  323. flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
  324. __create_pgd_mapping(mm->pgd, phys, virt, size, prot,
  325. pgd_pgtable_alloc, flags);
  326. }
  327. static void update_mapping_prot(phys_addr_t phys, unsigned long virt,
  328. phys_addr_t size, pgprot_t prot)
  329. {
  330. if (virt < VMALLOC_START) {
  331. pr_warn("BUG: not updating mapping for %pa at 0x%016lx - outside kernel range\n",
  332. &phys, virt);
  333. return;
  334. }
  335. __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL,
  336. NO_CONT_MAPPINGS);
  337. /* flush the TLBs after updating live kernel mappings */
  338. flush_tlb_kernel_range(virt, virt + size);
  339. }
  340. static void __init __map_memblock(pgd_t *pgdp, phys_addr_t start,
  341. phys_addr_t end, pgprot_t prot, int flags)
  342. {
  343. __create_pgd_mapping(pgdp, start, __phys_to_virt(start), end - start,
  344. prot, early_pgtable_alloc, flags);
  345. }
  346. void __init mark_linear_text_alias_ro(void)
  347. {
  348. /*
  349. * Remove the write permissions from the linear alias of .text/.rodata
  350. */
  351. update_mapping_prot(__pa_symbol(_text), (unsigned long)lm_alias(_text),
  352. (unsigned long)__init_begin - (unsigned long)_text,
  353. PAGE_KERNEL_RO);
  354. }
  355. static void __init map_mem(pgd_t *pgdp)
  356. {
  357. phys_addr_t kernel_start = __pa_symbol(_text);
  358. phys_addr_t kernel_end = __pa_symbol(__init_begin);
  359. struct memblock_region *reg;
  360. int flags = 0;
  361. if (debug_pagealloc_enabled())
  362. flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
  363. /*
  364. * Take care not to create a writable alias for the
  365. * read-only text and rodata sections of the kernel image.
  366. * So temporarily mark them as NOMAP to skip mappings in
  367. * the following for-loop
  368. */
  369. memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
  370. #ifdef CONFIG_KEXEC_CORE
  371. if (crashk_res.end)
  372. memblock_mark_nomap(crashk_res.start,
  373. resource_size(&crashk_res));
  374. #endif
  375. /* map all the memory banks */
  376. for_each_memblock(memory, reg) {
  377. phys_addr_t start = reg->base;
  378. phys_addr_t end = start + reg->size;
  379. if (start >= end)
  380. break;
  381. if (memblock_is_nomap(reg))
  382. continue;
  383. __map_memblock(pgdp, start, end, PAGE_KERNEL, flags);
  384. }
  385. /*
  386. * Map the linear alias of the [_text, __init_begin) interval
  387. * as non-executable now, and remove the write permission in
  388. * mark_linear_text_alias_ro() below (which will be called after
  389. * alternative patching has completed). This makes the contents
  390. * of the region accessible to subsystems such as hibernate,
  391. * but protects it from inadvertent modification or execution.
  392. * Note that contiguous mappings cannot be remapped in this way,
  393. * so we should avoid them here.
  394. */
  395. __map_memblock(pgdp, kernel_start, kernel_end,
  396. PAGE_KERNEL, NO_CONT_MAPPINGS);
  397. memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
  398. #ifdef CONFIG_KEXEC_CORE
  399. /*
  400. * Use page-level mappings here so that we can shrink the region
  401. * in page granularity and put back unused memory to buddy system
  402. * through /sys/kernel/kexec_crash_size interface.
  403. */
  404. if (crashk_res.end) {
  405. __map_memblock(pgdp, crashk_res.start, crashk_res.end + 1,
  406. PAGE_KERNEL,
  407. NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
  408. memblock_clear_nomap(crashk_res.start,
  409. resource_size(&crashk_res));
  410. }
  411. #endif
  412. }
  413. void mark_rodata_ro(void)
  414. {
  415. unsigned long section_size;
  416. /*
  417. * mark .rodata as read only. Use __init_begin rather than __end_rodata
  418. * to cover NOTES and EXCEPTION_TABLE.
  419. */
  420. section_size = (unsigned long)__init_begin - (unsigned long)__start_rodata;
  421. update_mapping_prot(__pa_symbol(__start_rodata), (unsigned long)__start_rodata,
  422. section_size, PAGE_KERNEL_RO);
  423. debug_checkwx();
  424. }
  425. static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end,
  426. pgprot_t prot, struct vm_struct *vma,
  427. int flags, unsigned long vm_flags)
  428. {
  429. phys_addr_t pa_start = __pa_symbol(va_start);
  430. unsigned long size = va_end - va_start;
  431. BUG_ON(!PAGE_ALIGNED(pa_start));
  432. BUG_ON(!PAGE_ALIGNED(size));
  433. __create_pgd_mapping(pgdp, pa_start, (unsigned long)va_start, size, prot,
  434. early_pgtable_alloc, flags);
  435. if (!(vm_flags & VM_NO_GUARD))
  436. size += PAGE_SIZE;
  437. vma->addr = va_start;
  438. vma->phys_addr = pa_start;
  439. vma->size = size;
  440. vma->flags = VM_MAP | vm_flags;
  441. vma->caller = __builtin_return_address(0);
  442. vm_area_add_early(vma);
  443. }
  444. static int __init parse_rodata(char *arg)
  445. {
  446. return strtobool(arg, &rodata_enabled);
  447. }
  448. early_param("rodata", parse_rodata);
  449. #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
  450. static int __init map_entry_trampoline(void)
  451. {
  452. pgprot_t prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC;
  453. phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start);
  454. /* The trampoline is always mapped and can therefore be global */
  455. pgprot_val(prot) &= ~PTE_NG;
  456. /* Map only the text into the trampoline page table */
  457. memset(tramp_pg_dir, 0, PGD_SIZE);
  458. __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, PAGE_SIZE,
  459. prot, pgd_pgtable_alloc, 0);
  460. /* Map both the text and data into the kernel page table */
  461. __set_fixmap(FIX_ENTRY_TRAMP_TEXT, pa_start, prot);
  462. if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
  463. extern char __entry_tramp_data_start[];
  464. __set_fixmap(FIX_ENTRY_TRAMP_DATA,
  465. __pa_symbol(__entry_tramp_data_start),
  466. PAGE_KERNEL_RO);
  467. }
  468. return 0;
  469. }
  470. core_initcall(map_entry_trampoline);
  471. #endif
  472. /*
  473. * Create fine-grained mappings for the kernel.
  474. */
  475. static void __init map_kernel(pgd_t *pgdp)
  476. {
  477. static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_inittext,
  478. vmlinux_initdata, vmlinux_data;
  479. /*
  480. * External debuggers may need to write directly to the text
  481. * mapping to install SW breakpoints. Allow this (only) when
  482. * explicitly requested with rodata=off.
  483. */
  484. pgprot_t text_prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC;
  485. /*
  486. * Only rodata will be remapped with different permissions later on,
  487. * all other segments are allowed to use contiguous mappings.
  488. */
  489. map_kernel_segment(pgdp, _text, _etext, text_prot, &vmlinux_text, 0,
  490. VM_NO_GUARD);
  491. map_kernel_segment(pgdp, __start_rodata, __inittext_begin, PAGE_KERNEL,
  492. &vmlinux_rodata, NO_CONT_MAPPINGS, VM_NO_GUARD);
  493. map_kernel_segment(pgdp, __inittext_begin, __inittext_end, text_prot,
  494. &vmlinux_inittext, 0, VM_NO_GUARD);
  495. map_kernel_segment(pgdp, __initdata_begin, __initdata_end, PAGE_KERNEL,
  496. &vmlinux_initdata, 0, VM_NO_GUARD);
  497. map_kernel_segment(pgdp, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0);
  498. if (!READ_ONCE(pgd_val(*pgd_offset_raw(pgdp, FIXADDR_START)))) {
  499. /*
  500. * The fixmap falls in a separate pgd to the kernel, and doesn't
  501. * live in the carveout for the swapper_pg_dir. We can simply
  502. * re-use the existing dir for the fixmap.
  503. */
  504. set_pgd(pgd_offset_raw(pgdp, FIXADDR_START),
  505. READ_ONCE(*pgd_offset_k(FIXADDR_START)));
  506. } else if (CONFIG_PGTABLE_LEVELS > 3) {
  507. /*
  508. * The fixmap shares its top level pgd entry with the kernel
  509. * mapping. This can really only occur when we are running
  510. * with 16k/4 levels, so we can simply reuse the pud level
  511. * entry instead.
  512. */
  513. BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
  514. pud_populate(&init_mm,
  515. pud_set_fixmap_offset(pgdp, FIXADDR_START),
  516. lm_alias(bm_pmd));
  517. pud_clear_fixmap();
  518. } else {
  519. BUG();
  520. }
  521. kasan_copy_shadow(pgdp);
  522. }
  523. /*
  524. * paging_init() sets up the page tables, initialises the zone memory
  525. * maps and sets up the zero page.
  526. */
  527. void __init paging_init(void)
  528. {
  529. phys_addr_t pgd_phys = early_pgtable_alloc();
  530. pgd_t *pgdp = pgd_set_fixmap(pgd_phys);
  531. map_kernel(pgdp);
  532. map_mem(pgdp);
  533. /*
  534. * We want to reuse the original swapper_pg_dir so we don't have to
  535. * communicate the new address to non-coherent secondaries in
  536. * secondary_entry, and so cpu_switch_mm can generate the address with
  537. * adrp+add rather than a load from some global variable.
  538. *
  539. * To do this we need to go via a temporary pgd.
  540. */
  541. cpu_replace_ttbr1(__va(pgd_phys));
  542. memcpy(swapper_pg_dir, pgdp, PGD_SIZE);
  543. cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
  544. pgd_clear_fixmap();
  545. memblock_free(pgd_phys, PAGE_SIZE);
  546. /*
  547. * We only reuse the PGD from the swapper_pg_dir, not the pud + pmd
  548. * allocated with it.
  549. */
  550. memblock_free(__pa_symbol(swapper_pg_dir) + PAGE_SIZE,
  551. __pa_symbol(swapper_pg_end) - __pa_symbol(swapper_pg_dir)
  552. - PAGE_SIZE);
  553. }
  554. /*
  555. * Check whether a kernel address is valid (derived from arch/x86/).
  556. */
  557. int kern_addr_valid(unsigned long addr)
  558. {
  559. pgd_t *pgdp;
  560. pud_t *pudp, pud;
  561. pmd_t *pmdp, pmd;
  562. pte_t *ptep, pte;
  563. if ((((long)addr) >> VA_BITS) != -1UL)
  564. return 0;
  565. pgdp = pgd_offset_k(addr);
  566. if (pgd_none(READ_ONCE(*pgdp)))
  567. return 0;
  568. pudp = pud_offset(pgdp, addr);
  569. pud = READ_ONCE(*pudp);
  570. if (pud_none(pud))
  571. return 0;
  572. if (pud_sect(pud))
  573. return pfn_valid(pud_pfn(pud));
  574. pmdp = pmd_offset(pudp, addr);
  575. pmd = READ_ONCE(*pmdp);
  576. if (pmd_none(pmd))
  577. return 0;
  578. if (pmd_sect(pmd))
  579. return pfn_valid(pmd_pfn(pmd));
  580. ptep = pte_offset_kernel(pmdp, addr);
  581. pte = READ_ONCE(*ptep);
  582. if (pte_none(pte))
  583. return 0;
  584. return pfn_valid(pte_pfn(pte));
  585. }
  586. #ifdef CONFIG_SPARSEMEM_VMEMMAP
  587. #if !ARM64_SWAPPER_USES_SECTION_MAPS
  588. int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
  589. struct vmem_altmap *altmap)
  590. {
  591. return vmemmap_populate_basepages(start, end, node);
  592. }
  593. #else /* !ARM64_SWAPPER_USES_SECTION_MAPS */
  594. int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
  595. struct vmem_altmap *altmap)
  596. {
  597. unsigned long addr = start;
  598. unsigned long next;
  599. pgd_t *pgdp;
  600. pud_t *pudp;
  601. pmd_t *pmdp;
  602. do {
  603. next = pmd_addr_end(addr, end);
  604. pgdp = vmemmap_pgd_populate(addr, node);
  605. if (!pgdp)
  606. return -ENOMEM;
  607. pudp = vmemmap_pud_populate(pgdp, addr, node);
  608. if (!pudp)
  609. return -ENOMEM;
  610. pmdp = pmd_offset(pudp, addr);
  611. if (pmd_none(READ_ONCE(*pmdp))) {
  612. void *p = NULL;
  613. p = vmemmap_alloc_block_buf(PMD_SIZE, node);
  614. if (!p)
  615. return -ENOMEM;
  616. pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL));
  617. } else
  618. vmemmap_verify((pte_t *)pmdp, node, addr, next);
  619. } while (addr = next, addr != end);
  620. return 0;
  621. }
  622. #endif /* CONFIG_ARM64_64K_PAGES */
  623. void vmemmap_free(unsigned long start, unsigned long end,
  624. struct vmem_altmap *altmap)
  625. {
  626. }
  627. #endif /* CONFIG_SPARSEMEM_VMEMMAP */
  628. static inline pud_t * fixmap_pud(unsigned long addr)
  629. {
  630. pgd_t *pgdp = pgd_offset_k(addr);
  631. pgd_t pgd = READ_ONCE(*pgdp);
  632. BUG_ON(pgd_none(pgd) || pgd_bad(pgd));
  633. return pud_offset_kimg(pgdp, addr);
  634. }
  635. static inline pmd_t * fixmap_pmd(unsigned long addr)
  636. {
  637. pud_t *pudp = fixmap_pud(addr);
  638. pud_t pud = READ_ONCE(*pudp);
  639. BUG_ON(pud_none(pud) || pud_bad(pud));
  640. return pmd_offset_kimg(pudp, addr);
  641. }
  642. static inline pte_t * fixmap_pte(unsigned long addr)
  643. {
  644. return &bm_pte[pte_index(addr)];
  645. }
  646. /*
  647. * The p*d_populate functions call virt_to_phys implicitly so they can't be used
  648. * directly on kernel symbols (bm_p*d). This function is called too early to use
  649. * lm_alias so __p*d_populate functions must be used to populate with the
  650. * physical address from __pa_symbol.
  651. */
  652. void __init early_fixmap_init(void)
  653. {
  654. pgd_t *pgdp, pgd;
  655. pud_t *pudp;
  656. pmd_t *pmdp;
  657. unsigned long addr = FIXADDR_START;
  658. pgdp = pgd_offset_k(addr);
  659. pgd = READ_ONCE(*pgdp);
  660. if (CONFIG_PGTABLE_LEVELS > 3 &&
  661. !(pgd_none(pgd) || pgd_page_paddr(pgd) == __pa_symbol(bm_pud))) {
  662. /*
  663. * We only end up here if the kernel mapping and the fixmap
  664. * share the top level pgd entry, which should only happen on
  665. * 16k/4 levels configurations.
  666. */
  667. BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
  668. pudp = pud_offset_kimg(pgdp, addr);
  669. } else {
  670. if (pgd_none(pgd))
  671. __pgd_populate(pgdp, __pa_symbol(bm_pud), PUD_TYPE_TABLE);
  672. pudp = fixmap_pud(addr);
  673. }
  674. if (pud_none(READ_ONCE(*pudp)))
  675. __pud_populate(pudp, __pa_symbol(bm_pmd), PMD_TYPE_TABLE);
  676. pmdp = fixmap_pmd(addr);
  677. __pmd_populate(pmdp, __pa_symbol(bm_pte), PMD_TYPE_TABLE);
  678. /*
  679. * The boot-ioremap range spans multiple pmds, for which
  680. * we are not prepared:
  681. */
  682. BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
  683. != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
  684. if ((pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)))
  685. || pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {
  686. WARN_ON(1);
  687. pr_warn("pmdp %p != %p, %p\n",
  688. pmdp, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)),
  689. fixmap_pmd(fix_to_virt(FIX_BTMAP_END)));
  690. pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
  691. fix_to_virt(FIX_BTMAP_BEGIN));
  692. pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n",
  693. fix_to_virt(FIX_BTMAP_END));
  694. pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END);
  695. pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN);
  696. }
  697. }
  698. /*
  699. * Unusually, this is also called in IRQ context (ghes_iounmap_irq) so if we
  700. * ever need to use IPIs for TLB broadcasting, then we're in trouble here.
  701. */
  702. void __set_fixmap(enum fixed_addresses idx,
  703. phys_addr_t phys, pgprot_t flags)
  704. {
  705. unsigned long addr = __fix_to_virt(idx);
  706. pte_t *ptep;
  707. BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
  708. ptep = fixmap_pte(addr);
  709. if (pgprot_val(flags)) {
  710. set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, flags));
  711. } else {
  712. pte_clear(&init_mm, addr, ptep);
  713. flush_tlb_kernel_range(addr, addr+PAGE_SIZE);
  714. }
  715. }
  716. void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot)
  717. {
  718. const u64 dt_virt_base = __fix_to_virt(FIX_FDT);
  719. int offset;
  720. void *dt_virt;
  721. /*
  722. * Check whether the physical FDT address is set and meets the minimum
  723. * alignment requirement. Since we are relying on MIN_FDT_ALIGN to be
  724. * at least 8 bytes so that we can always access the magic and size
  725. * fields of the FDT header after mapping the first chunk, double check
  726. * here if that is indeed the case.
  727. */
  728. BUILD_BUG_ON(MIN_FDT_ALIGN < 8);
  729. if (!dt_phys || dt_phys % MIN_FDT_ALIGN)
  730. return NULL;
  731. /*
  732. * Make sure that the FDT region can be mapped without the need to
  733. * allocate additional translation table pages, so that it is safe
  734. * to call create_mapping_noalloc() this early.
  735. *
  736. * On 64k pages, the FDT will be mapped using PTEs, so we need to
  737. * be in the same PMD as the rest of the fixmap.
  738. * On 4k pages, we'll use section mappings for the FDT so we only
  739. * have to be in the same PUD.
  740. */
  741. BUILD_BUG_ON(dt_virt_base % SZ_2M);
  742. BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> SWAPPER_TABLE_SHIFT !=
  743. __fix_to_virt(FIX_BTMAP_BEGIN) >> SWAPPER_TABLE_SHIFT);
  744. offset = dt_phys % SWAPPER_BLOCK_SIZE;
  745. dt_virt = (void *)dt_virt_base + offset;
  746. /* map the first chunk so we can read the size from the header */
  747. create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE),
  748. dt_virt_base, SWAPPER_BLOCK_SIZE, prot);
  749. if (fdt_magic(dt_virt) != FDT_MAGIC)
  750. return NULL;
  751. *size = fdt_totalsize(dt_virt);
  752. if (*size > MAX_FDT_SIZE)
  753. return NULL;
  754. if (offset + *size > SWAPPER_BLOCK_SIZE)
  755. create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
  756. round_up(offset + *size, SWAPPER_BLOCK_SIZE), prot);
  757. return dt_virt;
  758. }
  759. void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
  760. {
  761. void *dt_virt;
  762. int size;
  763. dt_virt = __fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL_RO);
  764. if (!dt_virt)
  765. return NULL;
  766. memblock_reserve(dt_phys, size);
  767. return dt_virt;
  768. }
  769. int __init arch_ioremap_pud_supported(void)
  770. {
  771. /*
  772. * Only 4k granule supports level 1 block mappings.
  773. * SW table walks can't handle removal of intermediate entries.
  774. */
  775. return IS_ENABLED(CONFIG_ARM64_4K_PAGES) &&
  776. !IS_ENABLED(CONFIG_ARM64_PTDUMP_DEBUGFS);
  777. }
  778. int __init arch_ioremap_pmd_supported(void)
  779. {
  780. /* See arch_ioremap_pud_supported() */
  781. return !IS_ENABLED(CONFIG_ARM64_PTDUMP_DEBUGFS);
  782. }
  783. int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot)
  784. {
  785. pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT |
  786. pgprot_val(mk_sect_prot(prot)));
  787. pud_t new_pud = pfn_pud(__phys_to_pfn(phys), sect_prot);
  788. /* Only allow permission changes for now */
  789. if (!pgattr_change_is_safe(READ_ONCE(pud_val(*pudp)),
  790. pud_val(new_pud)))
  791. return 0;
  792. BUG_ON(phys & ~PUD_MASK);
  793. set_pud(pudp, new_pud);
  794. return 1;
  795. }
  796. int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot)
  797. {
  798. pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT |
  799. pgprot_val(mk_sect_prot(prot)));
  800. pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), sect_prot);
  801. /* Only allow permission changes for now */
  802. if (!pgattr_change_is_safe(READ_ONCE(pmd_val(*pmdp)),
  803. pmd_val(new_pmd)))
  804. return 0;
  805. BUG_ON(phys & ~PMD_MASK);
  806. set_pmd(pmdp, new_pmd);
  807. return 1;
  808. }
  809. int pud_clear_huge(pud_t *pudp)
  810. {
  811. if (!pud_sect(READ_ONCE(*pudp)))
  812. return 0;
  813. pud_clear(pudp);
  814. return 1;
  815. }
  816. int pmd_clear_huge(pmd_t *pmdp)
  817. {
  818. if (!pmd_sect(READ_ONCE(*pmdp)))
  819. return 0;
  820. pmd_clear(pmdp);
  821. return 1;
  822. }
  823. int pmd_free_pte_page(pmd_t *pmdp, unsigned long addr)
  824. {
  825. pte_t *table;
  826. pmd_t pmd;
  827. pmd = READ_ONCE(*pmdp);
  828. if (!pmd_present(pmd))
  829. return 1;
  830. if (!pmd_table(pmd)) {
  831. VM_WARN_ON(!pmd_table(pmd));
  832. return 1;
  833. }
  834. table = pte_offset_kernel(pmdp, addr);
  835. pmd_clear(pmdp);
  836. __flush_tlb_kernel_pgtable(addr);
  837. pte_free_kernel(NULL, table);
  838. return 1;
  839. }
  840. int pud_free_pmd_page(pud_t *pudp, unsigned long addr)
  841. {
  842. pmd_t *table;
  843. pmd_t *pmdp;
  844. pud_t pud;
  845. unsigned long next, end;
  846. pud = READ_ONCE(*pudp);
  847. if (!pud_present(pud))
  848. return 1;
  849. if (!pud_table(pud)) {
  850. VM_WARN_ON(!pud_table(pud));
  851. return 1;
  852. }
  853. table = pmd_offset(pudp, addr);
  854. pmdp = table;
  855. next = addr;
  856. end = addr + PUD_SIZE;
  857. do {
  858. pmd_free_pte_page(pmdp, next);
  859. } while (pmdp++, next += PMD_SIZE, next != end);
  860. pud_clear(pudp);
  861. __flush_tlb_kernel_pgtable(addr);
  862. pmd_free(NULL, table);
  863. return 1;
  864. }