memremap.h 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. #ifndef _LINUX_MEMREMAP_H_
  3. #define _LINUX_MEMREMAP_H_
  4. #include <linux/ioport.h>
  5. #include <linux/percpu-refcount.h>
  6. #include <asm/pgtable.h>
  7. struct resource;
  8. struct device;
  9. /**
  10. * struct vmem_altmap - pre-allocated storage for vmemmap_populate
  11. * @base_pfn: base of the entire dev_pagemap mapping
  12. * @reserve: pages mapped, but reserved for driver use (relative to @base)
  13. * @free: free pages set aside in the mapping for memmap storage
  14. * @align: pages reserved to meet allocation alignments
  15. * @alloc: track pages consumed, private to vmemmap_populate()
  16. */
  17. struct vmem_altmap {
  18. const unsigned long base_pfn;
  19. const unsigned long reserve;
  20. unsigned long free;
  21. unsigned long align;
  22. unsigned long alloc;
  23. };
  24. /*
  25. * Specialize ZONE_DEVICE memory into multiple types each having differents
  26. * usage.
  27. *
  28. * MEMORY_DEVICE_PRIVATE:
  29. * Device memory that is not directly addressable by the CPU: CPU can neither
  30. * read nor write private memory. In this case, we do still have struct pages
  31. * backing the device memory. Doing so simplifies the implementation, but it is
  32. * important to remember that there are certain points at which the struct page
  33. * must be treated as an opaque object, rather than a "normal" struct page.
  34. *
  35. * A more complete discussion of unaddressable memory may be found in
  36. * include/linux/hmm.h and Documentation/vm/hmm.rst.
  37. *
  38. * MEMORY_DEVICE_PUBLIC:
  39. * Device memory that is cache coherent from device and CPU point of view. This
  40. * is use on platform that have an advance system bus (like CAPI or CCIX). A
  41. * driver can hotplug the device memory using ZONE_DEVICE and with that memory
  42. * type. Any page of a process can be migrated to such memory. However no one
  43. * should be allow to pin such memory so that it can always be evicted.
  44. *
  45. * MEMORY_DEVICE_FS_DAX:
  46. * Host memory that has similar access semantics as System RAM i.e. DMA
  47. * coherent and supports page pinning. In support of coordinating page
  48. * pinning vs other operations MEMORY_DEVICE_FS_DAX arranges for a
  49. * wakeup event whenever a page is unpinned and becomes idle. This
  50. * wakeup is used to coordinate physical address space management (ex:
  51. * fs truncate/hole punch) vs pinned pages (ex: device dma).
  52. */
  53. enum memory_type {
  54. MEMORY_DEVICE_PRIVATE = 1,
  55. MEMORY_DEVICE_PUBLIC,
  56. MEMORY_DEVICE_FS_DAX,
  57. };
  58. /*
  59. * For MEMORY_DEVICE_PRIVATE we use ZONE_DEVICE and extend it with two
  60. * callbacks:
  61. * page_fault()
  62. * page_free()
  63. *
  64. * Additional notes about MEMORY_DEVICE_PRIVATE may be found in
  65. * include/linux/hmm.h and Documentation/vm/hmm.rst. There is also a brief
  66. * explanation in include/linux/memory_hotplug.h.
  67. *
  68. * The page_fault() callback must migrate page back, from device memory to
  69. * system memory, so that the CPU can access it. This might fail for various
  70. * reasons (device issues, device have been unplugged, ...). When such error
  71. * conditions happen, the page_fault() callback must return VM_FAULT_SIGBUS and
  72. * set the CPU page table entry to "poisoned".
  73. *
  74. * Note that because memory cgroup charges are transferred to the device memory,
  75. * this should never fail due to memory restrictions. However, allocation
  76. * of a regular system page might still fail because we are out of memory. If
  77. * that happens, the page_fault() callback must return VM_FAULT_OOM.
  78. *
  79. * The page_fault() callback can also try to migrate back multiple pages in one
  80. * chunk, as an optimization. It must, however, prioritize the faulting address
  81. * over all the others.
  82. *
  83. *
  84. * The page_free() callback is called once the page refcount reaches 1
  85. * (ZONE_DEVICE pages never reach 0 refcount unless there is a refcount bug.
  86. * This allows the device driver to implement its own memory management.)
  87. *
  88. * For MEMORY_DEVICE_PUBLIC only the page_free() callback matter.
  89. */
  90. typedef int (*dev_page_fault_t)(struct vm_area_struct *vma,
  91. unsigned long addr,
  92. const struct page *page,
  93. unsigned int flags,
  94. pmd_t *pmdp);
  95. typedef void (*dev_page_free_t)(struct page *page, void *data);
  96. /**
  97. * struct dev_pagemap - metadata for ZONE_DEVICE mappings
  98. * @page_fault: callback when CPU fault on an unaddressable device page
  99. * @page_free: free page callback when page refcount reaches 1
  100. * @altmap: pre-allocated/reserved memory for vmemmap allocations
  101. * @res: physical address range covered by @ref
  102. * @ref: reference count that pins the devm_memremap_pages() mapping
  103. * @kill: callback to transition @ref to the dead state
  104. * @dev: host device of the mapping for debug
  105. * @data: private data pointer for page_free()
  106. * @type: memory type: see MEMORY_* in memory_hotplug.h
  107. */
  108. struct dev_pagemap {
  109. dev_page_fault_t page_fault;
  110. dev_page_free_t page_free;
  111. struct vmem_altmap altmap;
  112. bool altmap_valid;
  113. struct resource res;
  114. struct percpu_ref *ref;
  115. void (*kill)(struct percpu_ref *ref);
  116. struct device *dev;
  117. void *data;
  118. enum memory_type type;
  119. };
  120. #ifdef CONFIG_ZONE_DEVICE
  121. void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap);
  122. struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
  123. struct dev_pagemap *pgmap);
  124. unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
  125. void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);
  126. #else
  127. static inline void *devm_memremap_pages(struct device *dev,
  128. struct dev_pagemap *pgmap)
  129. {
  130. /*
  131. * Fail attempts to call devm_memremap_pages() without
  132. * ZONE_DEVICE support enabled, this requires callers to fall
  133. * back to plain devm_memremap() based on config
  134. */
  135. WARN_ON_ONCE(1);
  136. return ERR_PTR(-ENXIO);
  137. }
  138. static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
  139. struct dev_pagemap *pgmap)
  140. {
  141. return NULL;
  142. }
  143. static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
  144. {
  145. return 0;
  146. }
  147. static inline void vmem_altmap_free(struct vmem_altmap *altmap,
  148. unsigned long nr_pfns)
  149. {
  150. }
  151. #endif /* CONFIG_ZONE_DEVICE */
  152. static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
  153. {
  154. if (pgmap)
  155. percpu_ref_put(pgmap->ref);
  156. }
  157. #endif /* _LINUX_MEMREMAP_H_ */