pgtable.h 31 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. #ifndef _ASM_GENERIC_PGTABLE_H
  3. #define _ASM_GENERIC_PGTABLE_H
  4. #include <linux/pfn.h>
  5. #ifndef __ASSEMBLY__
  6. #ifdef CONFIG_MMU
  7. #include <linux/mm_types.h>
  8. #include <linux/bug.h>
  9. #include <linux/errno.h>
  10. #if 5 - defined(__PAGETABLE_P4D_FOLDED) - defined(__PAGETABLE_PUD_FOLDED) - \
  11. defined(__PAGETABLE_PMD_FOLDED) != CONFIG_PGTABLE_LEVELS
  12. #error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{P4D,PUD,PMD}_FOLDED
  13. #endif
  14. /*
  15. * On almost all architectures and configurations, 0 can be used as the
  16. * upper ceiling to free_pgtables(): on many architectures it has the same
  17. * effect as using TASK_SIZE. However, there is one configuration which
  18. * must impose a more careful limit, to avoid freeing kernel pgtables.
  19. */
  20. #ifndef USER_PGTABLES_CEILING
  21. #define USER_PGTABLES_CEILING 0UL
  22. #endif
  23. #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
  24. extern int ptep_set_access_flags(struct vm_area_struct *vma,
  25. unsigned long address, pte_t *ptep,
  26. pte_t entry, int dirty);
  27. #endif
  28. #ifndef __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
  29. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  30. extern int pmdp_set_access_flags(struct vm_area_struct *vma,
  31. unsigned long address, pmd_t *pmdp,
  32. pmd_t entry, int dirty);
  33. extern int pudp_set_access_flags(struct vm_area_struct *vma,
  34. unsigned long address, pud_t *pudp,
  35. pud_t entry, int dirty);
  36. #else
  37. static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
  38. unsigned long address, pmd_t *pmdp,
  39. pmd_t entry, int dirty)
  40. {
  41. BUILD_BUG();
  42. return 0;
  43. }
  44. static inline int pudp_set_access_flags(struct vm_area_struct *vma,
  45. unsigned long address, pud_t *pudp,
  46. pud_t entry, int dirty)
  47. {
  48. BUILD_BUG();
  49. return 0;
  50. }
  51. #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  52. #endif
  53. #ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
  54. static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
  55. unsigned long address,
  56. pte_t *ptep)
  57. {
  58. pte_t pte = *ptep;
  59. int r = 1;
  60. if (!pte_young(pte))
  61. r = 0;
  62. else
  63. set_pte_at(vma->vm_mm, address, ptep, pte_mkold(pte));
  64. return r;
  65. }
  66. #endif
  67. #ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
  68. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  69. static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
  70. unsigned long address,
  71. pmd_t *pmdp)
  72. {
  73. pmd_t pmd = *pmdp;
  74. int r = 1;
  75. if (!pmd_young(pmd))
  76. r = 0;
  77. else
  78. set_pmd_at(vma->vm_mm, address, pmdp, pmd_mkold(pmd));
  79. return r;
  80. }
  81. #else
  82. static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
  83. unsigned long address,
  84. pmd_t *pmdp)
  85. {
  86. BUILD_BUG();
  87. return 0;
  88. }
  89. #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  90. #endif
  91. #ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
  92. int ptep_clear_flush_young(struct vm_area_struct *vma,
  93. unsigned long address, pte_t *ptep);
  94. #endif
  95. #ifndef __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
  96. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  97. extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
  98. unsigned long address, pmd_t *pmdp);
  99. #else
  100. /*
  101. * Despite relevant to THP only, this API is called from generic rmap code
  102. * under PageTransHuge(), hence needs a dummy implementation for !THP
  103. */
  104. static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
  105. unsigned long address, pmd_t *pmdp)
  106. {
  107. BUILD_BUG();
  108. return 0;
  109. }
  110. #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  111. #endif
  112. #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
  113. static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
  114. unsigned long address,
  115. pte_t *ptep)
  116. {
  117. pte_t pte = *ptep;
  118. pte_clear(mm, address, ptep);
  119. return pte;
  120. }
  121. #endif
  122. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  123. #ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
  124. static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
  125. unsigned long address,
  126. pmd_t *pmdp)
  127. {
  128. pmd_t pmd = *pmdp;
  129. pmd_clear(pmdp);
  130. return pmd;
  131. }
  132. #endif /* __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR */
  133. #ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
  134. static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
  135. unsigned long address,
  136. pud_t *pudp)
  137. {
  138. pud_t pud = *pudp;
  139. pud_clear(pudp);
  140. return pud;
  141. }
  142. #endif /* __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR */
  143. #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  144. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  145. #ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
  146. static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm,
  147. unsigned long address, pmd_t *pmdp,
  148. int full)
  149. {
  150. return pmdp_huge_get_and_clear(mm, address, pmdp);
  151. }
  152. #endif
  153. #ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR_FULL
  154. static inline pud_t pudp_huge_get_and_clear_full(struct mm_struct *mm,
  155. unsigned long address, pud_t *pudp,
  156. int full)
  157. {
  158. return pudp_huge_get_and_clear(mm, address, pudp);
  159. }
  160. #endif
  161. #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  162. #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
  163. static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
  164. unsigned long address, pte_t *ptep,
  165. int full)
  166. {
  167. pte_t pte;
  168. pte = ptep_get_and_clear(mm, address, ptep);
  169. return pte;
  170. }
  171. #endif
  172. /*
  173. * Some architectures may be able to avoid expensive synchronization
  174. * primitives when modifications are made to PTE's which are already
  175. * not present, or in the process of an address space destruction.
  176. */
  177. #ifndef __HAVE_ARCH_PTE_CLEAR_NOT_PRESENT_FULL
  178. static inline void pte_clear_not_present_full(struct mm_struct *mm,
  179. unsigned long address,
  180. pte_t *ptep,
  181. int full)
  182. {
  183. pte_clear(mm, address, ptep);
  184. }
  185. #endif
  186. #ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH
  187. extern pte_t ptep_clear_flush(struct vm_area_struct *vma,
  188. unsigned long address,
  189. pte_t *ptep);
  190. #endif
  191. #ifndef __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH
  192. extern pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma,
  193. unsigned long address,
  194. pmd_t *pmdp);
  195. extern pud_t pudp_huge_clear_flush(struct vm_area_struct *vma,
  196. unsigned long address,
  197. pud_t *pudp);
  198. #endif
  199. #ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT
  200. struct mm_struct;
  201. static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
  202. {
  203. pte_t old_pte = *ptep;
  204. set_pte_at(mm, address, ptep, pte_wrprotect(old_pte));
  205. }
  206. #endif
  207. #ifndef pte_savedwrite
  208. #define pte_savedwrite pte_write
  209. #endif
  210. #ifndef pte_mk_savedwrite
  211. #define pte_mk_savedwrite pte_mkwrite
  212. #endif
  213. #ifndef pte_clear_savedwrite
  214. #define pte_clear_savedwrite pte_wrprotect
  215. #endif
  216. #ifndef pmd_savedwrite
  217. #define pmd_savedwrite pmd_write
  218. #endif
  219. #ifndef pmd_mk_savedwrite
  220. #define pmd_mk_savedwrite pmd_mkwrite
  221. #endif
  222. #ifndef pmd_clear_savedwrite
  223. #define pmd_clear_savedwrite pmd_wrprotect
  224. #endif
  225. #ifndef __HAVE_ARCH_PMDP_SET_WRPROTECT
  226. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  227. static inline void pmdp_set_wrprotect(struct mm_struct *mm,
  228. unsigned long address, pmd_t *pmdp)
  229. {
  230. pmd_t old_pmd = *pmdp;
  231. set_pmd_at(mm, address, pmdp, pmd_wrprotect(old_pmd));
  232. }
  233. #else
  234. static inline void pmdp_set_wrprotect(struct mm_struct *mm,
  235. unsigned long address, pmd_t *pmdp)
  236. {
  237. BUILD_BUG();
  238. }
  239. #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  240. #endif
  241. #ifndef __HAVE_ARCH_PUDP_SET_WRPROTECT
  242. #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
  243. static inline void pudp_set_wrprotect(struct mm_struct *mm,
  244. unsigned long address, pud_t *pudp)
  245. {
  246. pud_t old_pud = *pudp;
  247. set_pud_at(mm, address, pudp, pud_wrprotect(old_pud));
  248. }
  249. #else
  250. static inline void pudp_set_wrprotect(struct mm_struct *mm,
  251. unsigned long address, pud_t *pudp)
  252. {
  253. BUILD_BUG();
  254. }
  255. #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
  256. #endif
  257. #ifndef pmdp_collapse_flush
  258. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  259. extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
  260. unsigned long address, pmd_t *pmdp);
  261. #else
  262. static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
  263. unsigned long address,
  264. pmd_t *pmdp)
  265. {
  266. BUILD_BUG();
  267. return *pmdp;
  268. }
  269. #define pmdp_collapse_flush pmdp_collapse_flush
  270. #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  271. #endif
  272. #ifndef __HAVE_ARCH_PGTABLE_DEPOSIT
  273. extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
  274. pgtable_t pgtable);
  275. #endif
  276. #ifndef __HAVE_ARCH_PGTABLE_WITHDRAW
  277. extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
  278. #endif
  279. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  280. /*
  281. * This is an implementation of pmdp_establish() that is only suitable for an
  282. * architecture that doesn't have hardware dirty/accessed bits. In this case we
  283. * can't race with CPU which sets these bits and non-atomic aproach is fine.
  284. */
  285. static inline pmd_t generic_pmdp_establish(struct vm_area_struct *vma,
  286. unsigned long address, pmd_t *pmdp, pmd_t pmd)
  287. {
  288. pmd_t old_pmd = *pmdp;
  289. set_pmd_at(vma->vm_mm, address, pmdp, pmd);
  290. return old_pmd;
  291. }
  292. #endif
  293. #ifndef __HAVE_ARCH_PMDP_INVALIDATE
  294. extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
  295. pmd_t *pmdp);
  296. #endif
  297. #ifndef __HAVE_ARCH_PTE_SAME
  298. static inline int pte_same(pte_t pte_a, pte_t pte_b)
  299. {
  300. return pte_val(pte_a) == pte_val(pte_b);
  301. }
  302. #endif
  303. #ifndef __HAVE_ARCH_PTE_UNUSED
  304. /*
  305. * Some architectures provide facilities to virtualization guests
  306. * so that they can flag allocated pages as unused. This allows the
  307. * host to transparently reclaim unused pages. This function returns
  308. * whether the pte's page is unused.
  309. */
  310. static inline int pte_unused(pte_t pte)
  311. {
  312. return 0;
  313. }
  314. #endif
  315. #ifndef pte_access_permitted
  316. #define pte_access_permitted(pte, write) \
  317. (pte_present(pte) && (!(write) || pte_write(pte)))
  318. #endif
  319. #ifndef pmd_access_permitted
  320. #define pmd_access_permitted(pmd, write) \
  321. (pmd_present(pmd) && (!(write) || pmd_write(pmd)))
  322. #endif
  323. #ifndef pud_access_permitted
  324. #define pud_access_permitted(pud, write) \
  325. (pud_present(pud) && (!(write) || pud_write(pud)))
  326. #endif
  327. #ifndef p4d_access_permitted
  328. #define p4d_access_permitted(p4d, write) \
  329. (p4d_present(p4d) && (!(write) || p4d_write(p4d)))
  330. #endif
  331. #ifndef pgd_access_permitted
  332. #define pgd_access_permitted(pgd, write) \
  333. (pgd_present(pgd) && (!(write) || pgd_write(pgd)))
  334. #endif
  335. #ifndef __HAVE_ARCH_PMD_SAME
  336. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  337. static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
  338. {
  339. return pmd_val(pmd_a) == pmd_val(pmd_b);
  340. }
  341. static inline int pud_same(pud_t pud_a, pud_t pud_b)
  342. {
  343. return pud_val(pud_a) == pud_val(pud_b);
  344. }
  345. #else /* CONFIG_TRANSPARENT_HUGEPAGE */
  346. static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
  347. {
  348. BUILD_BUG();
  349. return 0;
  350. }
  351. static inline int pud_same(pud_t pud_a, pud_t pud_b)
  352. {
  353. BUILD_BUG();
  354. return 0;
  355. }
  356. #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  357. #endif
  358. #ifndef __HAVE_ARCH_DO_SWAP_PAGE
  359. /*
  360. * Some architectures support metadata associated with a page. When a
  361. * page is being swapped out, this metadata must be saved so it can be
  362. * restored when the page is swapped back in. SPARC M7 and newer
  363. * processors support an ADI (Application Data Integrity) tag for the
  364. * page as metadata for the page. arch_do_swap_page() can restore this
  365. * metadata when a page is swapped back in.
  366. */
  367. static inline void arch_do_swap_page(struct mm_struct *mm,
  368. struct vm_area_struct *vma,
  369. unsigned long addr,
  370. pte_t pte, pte_t oldpte)
  371. {
  372. }
  373. #endif
  374. #ifndef __HAVE_ARCH_UNMAP_ONE
  375. /*
  376. * Some architectures support metadata associated with a page. When a
  377. * page is being swapped out, this metadata must be saved so it can be
  378. * restored when the page is swapped back in. SPARC M7 and newer
  379. * processors support an ADI (Application Data Integrity) tag for the
  380. * page as metadata for the page. arch_unmap_one() can save this
  381. * metadata on a swap-out of a page.
  382. */
  383. static inline int arch_unmap_one(struct mm_struct *mm,
  384. struct vm_area_struct *vma,
  385. unsigned long addr,
  386. pte_t orig_pte)
  387. {
  388. return 0;
  389. }
  390. #endif
  391. #ifndef __HAVE_ARCH_PGD_OFFSET_GATE
  392. #define pgd_offset_gate(mm, addr) pgd_offset(mm, addr)
  393. #endif
  394. #ifndef __HAVE_ARCH_MOVE_PTE
  395. #define move_pte(pte, prot, old_addr, new_addr) (pte)
  396. #endif
  397. #ifndef pte_accessible
  398. # define pte_accessible(mm, pte) ((void)(pte), 1)
  399. #endif
  400. #ifndef flush_tlb_fix_spurious_fault
  401. #define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address)
  402. #endif
  403. #ifndef pgprot_noncached
  404. #define pgprot_noncached(prot) (prot)
  405. #endif
  406. #ifndef pgprot_writecombine
  407. #define pgprot_writecombine pgprot_noncached
  408. #endif
  409. #ifndef pgprot_writethrough
  410. #define pgprot_writethrough pgprot_noncached
  411. #endif
  412. #ifndef pgprot_device
  413. #define pgprot_device pgprot_noncached
  414. #endif
  415. #ifndef pgprot_modify
  416. #define pgprot_modify pgprot_modify
  417. static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
  418. {
  419. if (pgprot_val(oldprot) == pgprot_val(pgprot_noncached(oldprot)))
  420. newprot = pgprot_noncached(newprot);
  421. if (pgprot_val(oldprot) == pgprot_val(pgprot_writecombine(oldprot)))
  422. newprot = pgprot_writecombine(newprot);
  423. if (pgprot_val(oldprot) == pgprot_val(pgprot_device(oldprot)))
  424. newprot = pgprot_device(newprot);
  425. return newprot;
  426. }
  427. #endif
  428. /*
  429. * When walking page tables, get the address of the next boundary,
  430. * or the end address of the range if that comes earlier. Although no
  431. * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout.
  432. */
  433. #define pgd_addr_end(addr, end) \
  434. ({ unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \
  435. (__boundary - 1 < (end) - 1)? __boundary: (end); \
  436. })
  437. #ifndef p4d_addr_end
  438. #define p4d_addr_end(addr, end) \
  439. ({ unsigned long __boundary = ((addr) + P4D_SIZE) & P4D_MASK; \
  440. (__boundary - 1 < (end) - 1)? __boundary: (end); \
  441. })
  442. #endif
  443. #ifndef pud_addr_end
  444. #define pud_addr_end(addr, end) \
  445. ({ unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK; \
  446. (__boundary - 1 < (end) - 1)? __boundary: (end); \
  447. })
  448. #endif
  449. #ifndef pmd_addr_end
  450. #define pmd_addr_end(addr, end) \
  451. ({ unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \
  452. (__boundary - 1 < (end) - 1)? __boundary: (end); \
  453. })
  454. #endif
  455. /*
  456. * When walking page tables, we usually want to skip any p?d_none entries;
  457. * and any p?d_bad entries - reporting the error before resetting to none.
  458. * Do the tests inline, but report and clear the bad entry in mm/memory.c.
  459. */
  460. void pgd_clear_bad(pgd_t *);
  461. void p4d_clear_bad(p4d_t *);
  462. void pud_clear_bad(pud_t *);
  463. void pmd_clear_bad(pmd_t *);
  464. static inline int pgd_none_or_clear_bad(pgd_t *pgd)
  465. {
  466. if (pgd_none(*pgd))
  467. return 1;
  468. if (unlikely(pgd_bad(*pgd))) {
  469. pgd_clear_bad(pgd);
  470. return 1;
  471. }
  472. return 0;
  473. }
  474. static inline int p4d_none_or_clear_bad(p4d_t *p4d)
  475. {
  476. if (p4d_none(*p4d))
  477. return 1;
  478. if (unlikely(p4d_bad(*p4d))) {
  479. p4d_clear_bad(p4d);
  480. return 1;
  481. }
  482. return 0;
  483. }
  484. static inline int pud_none_or_clear_bad(pud_t *pud)
  485. {
  486. if (pud_none(*pud))
  487. return 1;
  488. if (unlikely(pud_bad(*pud))) {
  489. pud_clear_bad(pud);
  490. return 1;
  491. }
  492. return 0;
  493. }
  494. static inline int pmd_none_or_clear_bad(pmd_t *pmd)
  495. {
  496. if (pmd_none(*pmd))
  497. return 1;
  498. if (unlikely(pmd_bad(*pmd))) {
  499. pmd_clear_bad(pmd);
  500. return 1;
  501. }
  502. return 0;
  503. }
  504. static inline pte_t __ptep_modify_prot_start(struct mm_struct *mm,
  505. unsigned long addr,
  506. pte_t *ptep)
  507. {
  508. /*
  509. * Get the current pte state, but zero it out to make it
  510. * non-present, preventing the hardware from asynchronously
  511. * updating it.
  512. */
  513. return ptep_get_and_clear(mm, addr, ptep);
  514. }
  515. static inline void __ptep_modify_prot_commit(struct mm_struct *mm,
  516. unsigned long addr,
  517. pte_t *ptep, pte_t pte)
  518. {
  519. /*
  520. * The pte is non-present, so there's no hardware state to
  521. * preserve.
  522. */
  523. set_pte_at(mm, addr, ptep, pte);
  524. }
  525. #ifndef __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
  526. /*
  527. * Start a pte protection read-modify-write transaction, which
  528. * protects against asynchronous hardware modifications to the pte.
  529. * The intention is not to prevent the hardware from making pte
  530. * updates, but to prevent any updates it may make from being lost.
  531. *
  532. * This does not protect against other software modifications of the
  533. * pte; the appropriate pte lock must be held over the transation.
  534. *
  535. * Note that this interface is intended to be batchable, meaning that
  536. * ptep_modify_prot_commit may not actually update the pte, but merely
  537. * queue the update to be done at some later time. The update must be
  538. * actually committed before the pte lock is released, however.
  539. */
  540. static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
  541. unsigned long addr,
  542. pte_t *ptep)
  543. {
  544. return __ptep_modify_prot_start(mm, addr, ptep);
  545. }
  546. /*
  547. * Commit an update to a pte, leaving any hardware-controlled bits in
  548. * the PTE unmodified.
  549. */
  550. static inline void ptep_modify_prot_commit(struct mm_struct *mm,
  551. unsigned long addr,
  552. pte_t *ptep, pte_t pte)
  553. {
  554. __ptep_modify_prot_commit(mm, addr, ptep, pte);
  555. }
  556. #endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */
  557. #endif /* CONFIG_MMU */
  558. /*
  559. * No-op macros that just return the current protection value. Defined here
  560. * because these macros can be used used even if CONFIG_MMU is not defined.
  561. */
  562. #ifndef pgprot_encrypted
  563. #define pgprot_encrypted(prot) (prot)
  564. #endif
  565. #ifndef pgprot_decrypted
  566. #define pgprot_decrypted(prot) (prot)
  567. #endif
  568. /*
  569. * A facility to provide lazy MMU batching. This allows PTE updates and
  570. * page invalidations to be delayed until a call to leave lazy MMU mode
  571. * is issued. Some architectures may benefit from doing this, and it is
  572. * beneficial for both shadow and direct mode hypervisors, which may batch
  573. * the PTE updates which happen during this window. Note that using this
  574. * interface requires that read hazards be removed from the code. A read
  575. * hazard could result in the direct mode hypervisor case, since the actual
  576. * write to the page tables may not yet have taken place, so reads though
  577. * a raw PTE pointer after it has been modified are not guaranteed to be
  578. * up to date. This mode can only be entered and left under the protection of
  579. * the page table locks for all page tables which may be modified. In the UP
  580. * case, this is required so that preemption is disabled, and in the SMP case,
  581. * it must synchronize the delayed page table writes properly on other CPUs.
  582. */
  583. #ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE
  584. #define arch_enter_lazy_mmu_mode() do {} while (0)
  585. #define arch_leave_lazy_mmu_mode() do {} while (0)
  586. #define arch_flush_lazy_mmu_mode() do {} while (0)
  587. #endif
  588. /*
  589. * A facility to provide batching of the reload of page tables and
  590. * other process state with the actual context switch code for
  591. * paravirtualized guests. By convention, only one of the batched
  592. * update (lazy) modes (CPU, MMU) should be active at any given time,
  593. * entry should never be nested, and entry and exits should always be
  594. * paired. This is for sanity of maintaining and reasoning about the
  595. * kernel code. In this case, the exit (end of the context switch) is
  596. * in architecture-specific code, and so doesn't need a generic
  597. * definition.
  598. */
  599. #ifndef __HAVE_ARCH_START_CONTEXT_SWITCH
  600. #define arch_start_context_switch(prev) do {} while (0)
  601. #endif
  602. #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
  603. #ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION
  604. static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
  605. {
  606. return pmd;
  607. }
  608. static inline int pmd_swp_soft_dirty(pmd_t pmd)
  609. {
  610. return 0;
  611. }
  612. static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
  613. {
  614. return pmd;
  615. }
  616. #endif
  617. #else /* !CONFIG_HAVE_ARCH_SOFT_DIRTY */
  618. static inline int pte_soft_dirty(pte_t pte)
  619. {
  620. return 0;
  621. }
  622. static inline int pmd_soft_dirty(pmd_t pmd)
  623. {
  624. return 0;
  625. }
  626. static inline pte_t pte_mksoft_dirty(pte_t pte)
  627. {
  628. return pte;
  629. }
  630. static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
  631. {
  632. return pmd;
  633. }
  634. static inline pte_t pte_clear_soft_dirty(pte_t pte)
  635. {
  636. return pte;
  637. }
  638. static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
  639. {
  640. return pmd;
  641. }
  642. static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
  643. {
  644. return pte;
  645. }
  646. static inline int pte_swp_soft_dirty(pte_t pte)
  647. {
  648. return 0;
  649. }
  650. static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
  651. {
  652. return pte;
  653. }
  654. static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
  655. {
  656. return pmd;
  657. }
  658. static inline int pmd_swp_soft_dirty(pmd_t pmd)
  659. {
  660. return 0;
  661. }
  662. static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
  663. {
  664. return pmd;
  665. }
  666. #endif
  667. #ifndef __HAVE_PFNMAP_TRACKING
  668. /*
  669. * Interfaces that can be used by architecture code to keep track of
  670. * memory type of pfn mappings specified by the remap_pfn_range,
  671. * vm_insert_pfn.
  672. */
  673. /*
  674. * track_pfn_remap is called when a _new_ pfn mapping is being established
  675. * by remap_pfn_range() for physical range indicated by pfn and size.
  676. */
  677. static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
  678. unsigned long pfn, unsigned long addr,
  679. unsigned long size)
  680. {
  681. return 0;
  682. }
  683. /*
  684. * track_pfn_insert is called when a _new_ single pfn is established
  685. * by vm_insert_pfn().
  686. */
  687. static inline void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
  688. pfn_t pfn)
  689. {
  690. }
  691. /*
  692. * track_pfn_copy is called when vma that is covering the pfnmap gets
  693. * copied through copy_page_range().
  694. */
  695. static inline int track_pfn_copy(struct vm_area_struct *vma)
  696. {
  697. return 0;
  698. }
  699. /*
  700. * untrack_pfn is called while unmapping a pfnmap for a region.
  701. * untrack can be called for a specific region indicated by pfn and size or
  702. * can be for the entire vma (in which case pfn, size are zero).
  703. */
  704. static inline void untrack_pfn(struct vm_area_struct *vma,
  705. unsigned long pfn, unsigned long size)
  706. {
  707. }
  708. /*
  709. * untrack_pfn_moved is called while mremapping a pfnmap for a new region.
  710. */
  711. static inline void untrack_pfn_moved(struct vm_area_struct *vma)
  712. {
  713. }
  714. #else
  715. extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
  716. unsigned long pfn, unsigned long addr,
  717. unsigned long size);
  718. extern void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
  719. pfn_t pfn);
  720. extern int track_pfn_copy(struct vm_area_struct *vma);
  721. extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
  722. unsigned long size);
  723. extern void untrack_pfn_moved(struct vm_area_struct *vma);
  724. #endif
  725. #ifdef __HAVE_COLOR_ZERO_PAGE
  726. static inline int is_zero_pfn(unsigned long pfn)
  727. {
  728. extern unsigned long zero_pfn;
  729. unsigned long offset_from_zero_pfn = pfn - zero_pfn;
  730. return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
  731. }
  732. #define my_zero_pfn(addr) page_to_pfn(ZERO_PAGE(addr))
  733. #else
  734. static inline int is_zero_pfn(unsigned long pfn)
  735. {
  736. extern unsigned long zero_pfn;
  737. return pfn == zero_pfn;
  738. }
  739. static inline unsigned long my_zero_pfn(unsigned long addr)
  740. {
  741. extern unsigned long zero_pfn;
  742. return zero_pfn;
  743. }
  744. #endif
  745. #ifdef CONFIG_MMU
  746. #ifndef CONFIG_TRANSPARENT_HUGEPAGE
  747. static inline int pmd_trans_huge(pmd_t pmd)
  748. {
  749. return 0;
  750. }
  751. #ifndef pmd_write
  752. static inline int pmd_write(pmd_t pmd)
  753. {
  754. BUG();
  755. return 0;
  756. }
  757. #endif /* pmd_write */
  758. #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  759. #ifndef pud_write
  760. static inline int pud_write(pud_t pud)
  761. {
  762. BUG();
  763. return 0;
  764. }
  765. #endif /* pud_write */
  766. #if !defined(CONFIG_TRANSPARENT_HUGEPAGE) || \
  767. (defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
  768. !defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD))
  769. static inline int pud_trans_huge(pud_t pud)
  770. {
  771. return 0;
  772. }
  773. #endif
  774. #ifndef pmd_read_atomic
  775. static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
  776. {
  777. /*
  778. * Depend on compiler for an atomic pmd read. NOTE: this is
  779. * only going to work, if the pmdval_t isn't larger than
  780. * an unsigned long.
  781. */
  782. return *pmdp;
  783. }
  784. #endif
  785. #ifndef arch_needs_pgtable_deposit
  786. #define arch_needs_pgtable_deposit() (false)
  787. #endif
  788. /*
  789. * This function is meant to be used by sites walking pagetables with
  790. * the mmap_sem hold in read mode to protect against MADV_DONTNEED and
  791. * transhuge page faults. MADV_DONTNEED can convert a transhuge pmd
  792. * into a null pmd and the transhuge page fault can convert a null pmd
  793. * into an hugepmd or into a regular pmd (if the hugepage allocation
  794. * fails). While holding the mmap_sem in read mode the pmd becomes
  795. * stable and stops changing under us only if it's not null and not a
  796. * transhuge pmd. When those races occurs and this function makes a
  797. * difference vs the standard pmd_none_or_clear_bad, the result is
  798. * undefined so behaving like if the pmd was none is safe (because it
  799. * can return none anyway). The compiler level barrier() is critically
  800. * important to compute the two checks atomically on the same pmdval.
  801. *
  802. * For 32bit kernels with a 64bit large pmd_t this automatically takes
  803. * care of reading the pmd atomically to avoid SMP race conditions
  804. * against pmd_populate() when the mmap_sem is hold for reading by the
  805. * caller (a special atomic read not done by "gcc" as in the generic
  806. * version above, is also needed when THP is disabled because the page
  807. * fault can populate the pmd from under us).
  808. */
  809. static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd)
  810. {
  811. pmd_t pmdval = pmd_read_atomic(pmd);
  812. /*
  813. * The barrier will stabilize the pmdval in a register or on
  814. * the stack so that it will stop changing under the code.
  815. *
  816. * When CONFIG_TRANSPARENT_HUGEPAGE=y on x86 32bit PAE,
  817. * pmd_read_atomic is allowed to return a not atomic pmdval
  818. * (for example pointing to an hugepage that has never been
  819. * mapped in the pmd). The below checks will only care about
  820. * the low part of the pmd with 32bit PAE x86 anyway, with the
  821. * exception of pmd_none(). So the important thing is that if
  822. * the low part of the pmd is found null, the high part will
  823. * be also null or the pmd_none() check below would be
  824. * confused.
  825. */
  826. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  827. barrier();
  828. #endif
  829. /*
  830. * !pmd_present() checks for pmd migration entries
  831. *
  832. * The complete check uses is_pmd_migration_entry() in linux/swapops.h
  833. * But using that requires moving current function and pmd_trans_unstable()
  834. * to linux/swapops.h to resovle dependency, which is too much code move.
  835. *
  836. * !pmd_present() is equivalent to is_pmd_migration_entry() currently,
  837. * because !pmd_present() pages can only be under migration not swapped
  838. * out.
  839. *
  840. * pmd_none() is preseved for future condition checks on pmd migration
  841. * entries and not confusing with this function name, although it is
  842. * redundant with !pmd_present().
  843. */
  844. if (pmd_none(pmdval) || pmd_trans_huge(pmdval) ||
  845. (IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION) && !pmd_present(pmdval)))
  846. return 1;
  847. if (unlikely(pmd_bad(pmdval))) {
  848. pmd_clear_bad(pmd);
  849. return 1;
  850. }
  851. return 0;
  852. }
  853. /*
  854. * This is a noop if Transparent Hugepage Support is not built into
  855. * the kernel. Otherwise it is equivalent to
  856. * pmd_none_or_trans_huge_or_clear_bad(), and shall only be called in
  857. * places that already verified the pmd is not none and they want to
  858. * walk ptes while holding the mmap sem in read mode (write mode don't
  859. * need this). If THP is not enabled, the pmd can't go away under the
  860. * code even if MADV_DONTNEED runs, but if THP is enabled we need to
  861. * run a pmd_trans_unstable before walking the ptes after
  862. * split_huge_page_pmd returns (because it may have run when the pmd
  863. * become null, but then a page fault can map in a THP and not a
  864. * regular page).
  865. */
  866. static inline int pmd_trans_unstable(pmd_t *pmd)
  867. {
  868. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  869. return pmd_none_or_trans_huge_or_clear_bad(pmd);
  870. #else
  871. return 0;
  872. #endif
  873. }
  874. #ifndef CONFIG_NUMA_BALANCING
  875. /*
  876. * Technically a PTE can be PROTNONE even when not doing NUMA balancing but
  877. * the only case the kernel cares is for NUMA balancing and is only ever set
  878. * when the VMA is accessible. For PROT_NONE VMAs, the PTEs are not marked
  879. * _PAGE_PROTNONE so by by default, implement the helper as "always no". It
  880. * is the responsibility of the caller to distinguish between PROT_NONE
  881. * protections and NUMA hinting fault protections.
  882. */
  883. static inline int pte_protnone(pte_t pte)
  884. {
  885. return 0;
  886. }
  887. static inline int pmd_protnone(pmd_t pmd)
  888. {
  889. return 0;
  890. }
  891. #endif /* CONFIG_NUMA_BALANCING */
  892. #endif /* CONFIG_MMU */
  893. #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
  894. #ifndef __PAGETABLE_P4D_FOLDED
  895. int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot);
  896. int p4d_clear_huge(p4d_t *p4d);
  897. #else
  898. static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
  899. {
  900. return 0;
  901. }
  902. static inline int p4d_clear_huge(p4d_t *p4d)
  903. {
  904. return 0;
  905. }
  906. #endif /* !__PAGETABLE_P4D_FOLDED */
  907. int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot);
  908. int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot);
  909. int pud_clear_huge(pud_t *pud);
  910. int pmd_clear_huge(pmd_t *pmd);
  911. int pud_free_pmd_page(pud_t *pud, unsigned long addr);
  912. int pmd_free_pte_page(pmd_t *pmd, unsigned long addr);
  913. #else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
  914. static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
  915. {
  916. return 0;
  917. }
  918. static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
  919. {
  920. return 0;
  921. }
  922. static inline int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
  923. {
  924. return 0;
  925. }
  926. static inline int p4d_clear_huge(p4d_t *p4d)
  927. {
  928. return 0;
  929. }
  930. static inline int pud_clear_huge(pud_t *pud)
  931. {
  932. return 0;
  933. }
  934. static inline int pmd_clear_huge(pmd_t *pmd)
  935. {
  936. return 0;
  937. }
  938. static inline int pud_free_pmd_page(pud_t *pud, unsigned long addr)
  939. {
  940. return 0;
  941. }
  942. static inline int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
  943. {
  944. return 0;
  945. }
  946. #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
  947. #ifndef __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
  948. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  949. /*
  950. * ARCHes with special requirements for evicting THP backing TLB entries can
  951. * implement this. Otherwise also, it can help optimize normal TLB flush in
  952. * THP regime. stock flush_tlb_range() typically has optimization to nuke the
  953. * entire TLB TLB if flush span is greater than a threshold, which will
  954. * likely be true for a single huge page. Thus a single thp flush will
  955. * invalidate the entire TLB which is not desitable.
  956. * e.g. see arch/arc: flush_pmd_tlb_range
  957. */
  958. #define flush_pmd_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end)
  959. #define flush_pud_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end)
  960. #else
  961. #define flush_pmd_tlb_range(vma, addr, end) BUILD_BUG()
  962. #define flush_pud_tlb_range(vma, addr, end) BUILD_BUG()
  963. #endif
  964. #endif
  965. struct file;
  966. int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
  967. unsigned long size, pgprot_t *vma_prot);
  968. #ifndef CONFIG_X86_ESPFIX64
  969. static inline void init_espfix_bsp(void) { }
  970. #endif
  971. #ifndef __HAVE_ARCH_PFN_MODIFY_ALLOWED
  972. static inline bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot)
  973. {
  974. return true;
  975. }
  976. static inline bool arch_has_pfn_modify_check(void)
  977. {
  978. return false;
  979. }
  980. #endif /* !_HAVE_ARCH_PFN_MODIFY_ALLOWED */
  981. /*
  982. * Architecture PAGE_KERNEL_* fallbacks
  983. *
  984. * Some architectures don't define certain PAGE_KERNEL_* flags. This is either
  985. * because they really don't support them, or the port needs to be updated to
  986. * reflect the required functionality. Below are a set of relatively safe
  987. * fallbacks, as best effort, which we can count on in lieu of the architectures
  988. * not defining them on their own yet.
  989. */
  990. #ifndef PAGE_KERNEL_RO
  991. # define PAGE_KERNEL_RO PAGE_KERNEL
  992. #endif
  993. #ifndef PAGE_KERNEL_EXEC
  994. # define PAGE_KERNEL_EXEC PAGE_KERNEL
  995. #endif
  996. #endif /* !__ASSEMBLY__ */
  997. #ifndef io_remap_pfn_range
  998. #define io_remap_pfn_range remap_pfn_range
  999. #endif
  1000. #ifndef has_transparent_hugepage
  1001. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  1002. #define has_transparent_hugepage() 1
  1003. #else
  1004. #define has_transparent_hugepage() 0
  1005. #endif
  1006. #endif
  1007. /*
  1008. * On some architectures it depends on the mm if the p4d/pud or pmd
  1009. * layer of the page table hierarchy is folded or not.
  1010. */
  1011. #ifndef mm_p4d_folded
  1012. #define mm_p4d_folded(mm) __is_defined(__PAGETABLE_P4D_FOLDED)
  1013. #endif
  1014. #ifndef mm_pud_folded
  1015. #define mm_pud_folded(mm) __is_defined(__PAGETABLE_PUD_FOLDED)
  1016. #endif
  1017. #ifndef mm_pmd_folded
  1018. #define mm_pmd_folded(mm) __is_defined(__PAGETABLE_PMD_FOLDED)
  1019. #endif
  1020. #endif /* _ASM_GENERIC_PGTABLE_H */