page.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918
  1. /*
  2. * Copyright (c) 2010-2017 Richard Braun.
  3. *
  4. * This program is free software: you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation, either version 3 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. *
  17. *
  18. * This implementation uses the binary buddy system to manage its heap.
  19. * Descriptions of the buddy system can be found in the following works :
  20. * - "UNIX Internals: The New Frontiers", by Uresh Vahalia.
  21. * - "Dynamic Storage Allocation: A Survey and Critical Review",
  22. * by Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles.
  23. *
  24. * In addition, this allocator uses per-CPU pools of pages for order 0
  25. * (i.e. single page) allocations. These pools act as caches (but are named
  26. * differently to avoid confusion with CPU caches) that reduce contention on
  27. * multiprocessor systems. When a pool is empty and cannot provide a page,
  28. * it is filled by transferring multiple pages from the backend buddy system.
  29. * The symmetric case is handled likewise.
  30. */
  31. #include <assert.h>
  32. #include <stdalign.h>
  33. #include <stdbool.h>
  34. #include <stddef.h>
  35. #include <stdint.h>
  36. #include <stdio.h>
  37. #include <string.h>
  38. #include <kern/init.h>
  39. #include <kern/list.h>
  40. #include <kern/log.h>
  41. #include <kern/macros.h>
  42. #include <kern/mutex.h>
  43. #include <kern/panic.h>
  44. #include <kern/pqueue.h>
  45. #include <kern/printf.h>
  46. #include <kern/shell.h>
  47. #include <kern/slist.h>
  48. #include <kern/spinlock.h>
  49. #include <kern/thread.h>
  50. #include <machine/boot.h>
  51. #include <machine/cpu.h>
  52. #include <machine/page.h>
  53. #include <machine/pmap.h>
  54. #include <machine/pmem.h>
  55. #include <machine/types.h>
  56. #include <vm/map.h>
  57. #include <vm/page.h>
  58. #include <vm/rset.h>
  59. // Number of free block lists per zone.
  60. #define VM_PAGE_NR_FREE_LISTS 11
  61. /*
  62. * The size of a CPU pool is computed by dividing the number of pages in its
  63. * containing zone by this value.
  64. */
  65. #define VM_PAGE_CPU_POOL_RATIO 1024
  66. // Maximum number of pages in a CPU pool.
  67. #define VM_PAGE_CPU_POOL_MAX_SIZE 128
  68. /*
  69. * The transfer size of a CPU pool is computed by dividing the pool size by
  70. * this value.
  71. */
  72. #define VM_PAGE_CPU_POOL_TRANSFER_RATIO 2
  73. // Per-processor cache of pages.
  74. struct vm_page_cpu_pool
  75. {
  76. __cacheline_aligned struct mutex lock;
  77. int size;
  78. int transfer_size;
  79. int nr_pages;
  80. struct list pages;
  81. };
  82. /*
  83. * Special order value for pages that aren't in a free list. Such pages are
  84. * either allocated, or part of a free block of pages but not the head page.
  85. */
  86. #define VM_PAGE_ORDER_UNLISTED (0xff)
  87. // Doubly-linked list of free blocks.
  88. struct vm_page_free_list
  89. {
  90. size_t size;
  91. struct list blocks;
  92. };
  93. // Zone name buffer size.
  94. #define VM_PAGE_NAME_SIZE 16
  95. // Zone of contiguous memory.
  96. struct vm_page_zone
  97. {
  98. struct vm_page_cpu_pool cpu_pools[CONFIG_MAX_CPUS];
  99. phys_addr_t start;
  100. phys_addr_t end;
  101. struct vm_page *pages;
  102. struct vm_page *pages_end;
  103. struct vm_page_free_list free_lists[VM_PAGE_NR_FREE_LISTS];
  104. size_t nr_free_pages;
  105. __cacheline_aligned struct mutex lock;
  106. };
  107. // Bootstrap information about a zone.
  108. struct vm_page_boot_zone
  109. {
  110. phys_addr_t start;
  111. phys_addr_t end;
  112. bool heap_present;
  113. phys_addr_t avail_start;
  114. phys_addr_t avail_end;
  115. };
  116. // Threads waiting for free object pages.
  117. struct vm_page_waiter
  118. {
  119. struct thread *thread;
  120. struct pqueue_node node;
  121. uint32_t order;
  122. bool done;
  123. };
  124. struct vm_page_bucket
  125. {
  126. struct spinlock lock;
  127. struct pqueue waiters;
  128. };
  129. static int vm_page_is_ready __read_mostly;
  130. /*
  131. * Zone table.
  132. *
  133. * The system supports a maximum of 4 zones :
  134. * - DMA: suitable for DMA
  135. * - DMA32: suitable for DMA when devices support 32-bits addressing
  136. * - DIRECTMAP: direct physical mapping, allows direct access from
  137. * the kernel with a simple offset translation
  138. * - HIGHMEM: must be mapped before it can be accessed
  139. *
  140. * Zones are ordered by priority, 0 being the lowest priority. Their
  141. * relative priorities are DMA < DMA32 < DIRECTMAP < HIGHMEM. Some zones
  142. * may actually be aliases for others, e.g. if DMA is always possible from
  143. * the direct physical mapping, DMA and DMA32 are aliases for DIRECTMAP,
  144. * in which case the zone table contains DIRECTMAP and HIGHMEM only.
  145. */
  146. static struct vm_page_zone vm_page_zones[PMEM_MAX_ZONES];
  147. // Bootstrap zone table.
  148. static struct vm_page_boot_zone vm_page_boot_zones[PMEM_MAX_ZONES]
  149. __initdata;
  150. // Number of loaded zones.
  151. static uint32_t vm_page_zones_size __read_mostly;
  152. // Registry of page_waiters.
  153. static struct vm_page_bucket vm_page_buckets[PMEM_MAX_ZONES];
  154. static void __init
  155. vm_page_init (struct vm_page *page, uint16_t zone_index, phys_addr_t pa)
  156. {
  157. memset (page, 0, sizeof (*page));
  158. page->type = VM_PAGE_RESERVED;
  159. page->zone_index = zone_index;
  160. page->order = VM_PAGE_ORDER_UNLISTED;
  161. page->phys_addr = pa;
  162. page->nr_refs = 0;
  163. page->object = NULL;
  164. }
  165. void
  166. vm_page_set_type (struct vm_page *page, uint32_t order, uint16_t type)
  167. {
  168. for (uint32_t i = 0; i < (1u << order); i++)
  169. {
  170. page[i].type = type;
  171. spinlock_init (&page[i].rset_lock);
  172. if (type != VM_PAGE_OBJECT)
  173. list_init (&page[i].node);
  174. else
  175. slist_init (&page[i].rset);
  176. }
  177. }
  178. static void
  179. vm_page_clear (struct vm_page *page, uint32_t order)
  180. {
  181. for (uint32_t i = 0; i < (1u << order); ++i)
  182. {
  183. page[i].type = VM_PAGE_FREE;
  184. page[i].dirty = 0;
  185. page[i].priv = NULL;
  186. }
  187. }
  188. static void __init
  189. vm_page_free_list_init (struct vm_page_free_list *free_list)
  190. {
  191. free_list->size = 0;
  192. list_init (&free_list->blocks);
  193. }
  194. static inline void
  195. vm_page_free_list_insert (struct vm_page_free_list *free_list,
  196. struct vm_page *page)
  197. {
  198. assert (page->order == VM_PAGE_ORDER_UNLISTED);
  199. ++free_list->size;
  200. list_insert_head (&free_list->blocks, &page->node);
  201. }
  202. static inline void
  203. vm_page_free_list_remove (struct vm_page_free_list *free_list,
  204. struct vm_page *page)
  205. {
  206. assert (page->order != VM_PAGE_ORDER_UNLISTED);
  207. --free_list->size;
  208. list_remove (&page->node);
  209. }
  210. static struct vm_page*
  211. vm_page_zone_alloc_from_buddy (struct vm_page_zone *zone, uint32_t order)
  212. {
  213. struct vm_page_free_list *free_list = free_list;
  214. assert (order < VM_PAGE_NR_FREE_LISTS);
  215. uint32_t i;
  216. for (i = order; i < VM_PAGE_NR_FREE_LISTS; ++i)
  217. {
  218. free_list = &zone->free_lists[i];
  219. if (free_list->size != 0)
  220. break;
  221. }
  222. if (i == VM_PAGE_NR_FREE_LISTS)
  223. return (NULL);
  224. _Auto page = list_first_entry (&free_list->blocks, struct vm_page, node);
  225. vm_page_free_list_remove (free_list, page);
  226. page->order = VM_PAGE_ORDER_UNLISTED;
  227. while (i > order)
  228. {
  229. i--;
  230. _Auto buddy = &page[1 << i];
  231. vm_page_free_list_insert (&zone->free_lists[i], buddy);
  232. buddy->order = i;
  233. }
  234. zone->nr_free_pages -= 1 << order;
  235. return (page);
  236. }
  237. static void
  238. vm_page_zone_free_to_buddy (struct vm_page_zone *zone, struct vm_page *page,
  239. uint32_t order)
  240. {
  241. assert (page >= zone->pages);
  242. assert (page < zone->pages_end);
  243. assert (page->order == VM_PAGE_ORDER_UNLISTED);
  244. assert (order < VM_PAGE_NR_FREE_LISTS);
  245. uint32_t nr_pages = 1 << order;
  246. phys_addr_t pa = page->phys_addr;
  247. while (order < VM_PAGE_NR_FREE_LISTS - 1)
  248. {
  249. phys_addr_t buddy_pa = pa ^ vm_page_ptob (1 << order);
  250. if (buddy_pa < zone->start || buddy_pa >= zone->end)
  251. break;
  252. _Auto buddy = &zone->pages[vm_page_btop (buddy_pa - zone->start)];
  253. if (buddy->order != order)
  254. break;
  255. vm_page_free_list_remove (&zone->free_lists[order], buddy);
  256. buddy->order = VM_PAGE_ORDER_UNLISTED;
  257. ++order;
  258. pa &= -vm_page_ptob (1 << order);
  259. page = &zone->pages[vm_page_btop (pa - zone->start)];
  260. }
  261. vm_page_free_list_insert (&zone->free_lists[order], page);
  262. page->order = order;
  263. zone->nr_free_pages += nr_pages;
  264. }
  265. static void __init
  266. vm_page_cpu_pool_init (struct vm_page_cpu_pool *cpu_pool, int size)
  267. {
  268. mutex_init (&cpu_pool->lock);
  269. cpu_pool->size = size;
  270. cpu_pool->transfer_size = (size + VM_PAGE_CPU_POOL_TRANSFER_RATIO - 1) /
  271. VM_PAGE_CPU_POOL_TRANSFER_RATIO;
  272. cpu_pool->nr_pages = 0;
  273. list_init (&cpu_pool->pages);
  274. }
  275. static inline struct vm_page_cpu_pool*
  276. vm_page_cpu_pool_get (struct vm_page_zone *zone)
  277. {
  278. return (&zone->cpu_pools[cpu_id ()]);
  279. }
  280. static inline struct vm_page*
  281. vm_page_cpu_pool_pop (struct vm_page_cpu_pool *cpu_pool)
  282. {
  283. assert (cpu_pool->nr_pages != 0);
  284. --cpu_pool->nr_pages;
  285. return (list_pop (&cpu_pool->pages, struct vm_page, node));
  286. }
  287. static inline void
  288. vm_page_cpu_pool_push (struct vm_page_cpu_pool *cpu_pool, struct vm_page *page)
  289. {
  290. assert (cpu_pool->nr_pages < cpu_pool->size);
  291. cpu_pool->nr_pages++;
  292. list_insert_head (&cpu_pool->pages, &page->node);
  293. }
  294. static int
  295. vm_page_cpu_pool_fill (struct vm_page_cpu_pool *cpu_pool,
  296. struct vm_page_zone *zone)
  297. {
  298. assert (cpu_pool->nr_pages == 0);
  299. MUTEX_GUARD (&zone->lock);
  300. int i;
  301. for (i = 0; i < cpu_pool->transfer_size; i++)
  302. {
  303. _Auto page = vm_page_zone_alloc_from_buddy (zone, 0);
  304. if (! page)
  305. break;
  306. vm_page_cpu_pool_push (cpu_pool, page);
  307. }
  308. return (i);
  309. }
  310. static void
  311. vm_page_cpu_pool_drain (struct vm_page_cpu_pool *cpu_pool,
  312. struct vm_page_zone *zone)
  313. {
  314. assert (cpu_pool->nr_pages == cpu_pool->size);
  315. MUTEX_GUARD (&zone->lock);
  316. for (int i = cpu_pool->transfer_size; i > 0; --i)
  317. {
  318. _Auto page = vm_page_cpu_pool_pop (cpu_pool);
  319. vm_page_zone_free_to_buddy (zone, page, 0);
  320. }
  321. }
  322. static phys_addr_t __init
  323. vm_page_zone_size (struct vm_page_zone *zone)
  324. {
  325. return (zone->end - zone->start);
  326. }
  327. static int __init
  328. vm_page_zone_compute_pool_size (struct vm_page_zone *zone)
  329. {
  330. phys_addr_t size = vm_page_btop (vm_page_zone_size (zone)) /
  331. VM_PAGE_CPU_POOL_RATIO;
  332. return (!size ? 1 : MIN (VM_PAGE_CPU_POOL_MAX_SIZE, size));
  333. }
  334. static void __init
  335. vm_page_zone_init (struct vm_page_zone *zone, phys_addr_t start, phys_addr_t end,
  336. struct vm_page *pages)
  337. {
  338. zone->start = start;
  339. zone->end = end;
  340. int pool_size = vm_page_zone_compute_pool_size (zone);
  341. for (uint32_t i = 0; i < ARRAY_SIZE (zone->cpu_pools); ++i)
  342. vm_page_cpu_pool_init (&zone->cpu_pools[i], pool_size);
  343. zone->pages = pages;
  344. zone->pages_end = pages + vm_page_btop (vm_page_zone_size (zone));
  345. mutex_init (&zone->lock);
  346. for (uint32_t i = 0; i < ARRAY_SIZE (zone->free_lists); ++i)
  347. vm_page_free_list_init (&zone->free_lists[i]);
  348. zone->nr_free_pages = 0;
  349. uint32_t i = zone - vm_page_zones;
  350. for (phys_addr_t pa = zone->start; pa < zone->end; pa += PAGE_SIZE)
  351. vm_page_init (&pages[vm_page_btop (pa - zone->start)], i, pa);
  352. }
  353. static inline int
  354. vm_page_zone_selector (const struct vm_page_zone *zone)
  355. {
  356. return ((int)(zone - vm_page_zones));
  357. }
  358. static bool
  359. vm_page_wait (struct vm_page_zone *zone, struct mutex *mtx,
  360. struct vm_page_waiter *waiter)
  361. {
  362. if (! waiter)
  363. {
  364. mutex_unlock (mtx);
  365. return (false);
  366. }
  367. _Auto bucket = &vm_page_buckets[vm_page_zone_selector (zone)];
  368. // Grab the queue lock before dropping the page pool one.
  369. SPINLOCK_GUARD (&bucket->lock);
  370. mutex_unlock (mtx);
  371. // Test if there are enough free pages before going to sleep.
  372. if (zone->nr_free_pages >= (1u << waiter->order))
  373. return (true);
  374. else if (waiter->order == 0)
  375. { // Allocating from a CPU pool.
  376. void *p = (char *)mtx - OFFSETOF (struct vm_page_cpu_pool, lock);
  377. if (((struct vm_page_cpu_pool *)p)->nr_pages)
  378. return (true);
  379. }
  380. pqueue_insert (&bucket->waiters, &waiter->node);
  381. thread_sleep (&bucket->lock, bucket, "vm-page");
  382. pqueue_remove (&bucket->waiters, &waiter->node);
  383. if (waiter->done)
  384. // Only bump the other waiters if we succeeded.
  385. pqueue_inc (&bucket->waiters, 1);
  386. return (waiter->done);
  387. }
  388. static struct vm_page*
  389. vm_page_zone_alloc (struct vm_page_zone *zone, uint32_t order,
  390. uint32_t type, struct vm_page_waiter *waiter)
  391. {
  392. assert (order < VM_PAGE_NR_FREE_LISTS);
  393. struct vm_page *page;
  394. if (! order)
  395. while (1)
  396. {
  397. THREAD_PIN_GUARD ();
  398. _Auto cpu_pool = vm_page_cpu_pool_get (zone);
  399. mutex_lock (&cpu_pool->lock);
  400. if (cpu_pool->nr_pages || vm_page_cpu_pool_fill (cpu_pool, zone))
  401. {
  402. page = vm_page_cpu_pool_pop (cpu_pool);
  403. mutex_unlock (&cpu_pool->lock);
  404. break;
  405. }
  406. else if (!vm_page_wait (zone, &cpu_pool->lock, waiter))
  407. return (NULL);
  408. }
  409. else
  410. while (1)
  411. {
  412. mutex_lock (&zone->lock);
  413. page = vm_page_zone_alloc_from_buddy (zone, order);
  414. if (page)
  415. {
  416. mutex_unlock (&zone->lock);
  417. break;
  418. }
  419. else if (!vm_page_wait (zone, &zone->lock, waiter))
  420. return (NULL);
  421. }
  422. assert (page->type == VM_PAGE_FREE);
  423. vm_page_set_type (page, order, type);
  424. return (page);
  425. }
  426. static bool
  427. vm_page_wakeup (struct vm_page_bucket *bucket, uint32_t order)
  428. {
  429. SPINLOCK_GUARD (&bucket->lock);
  430. pqueue_for_each (&bucket->waiters, pnode)
  431. {
  432. _Auto waiter = pqueue_entry (pnode, struct vm_page_waiter, node);
  433. if (likely (waiter->order <= order))
  434. {
  435. waiter->done = true;
  436. thread_wakeup (waiter->thread);
  437. return (true);
  438. }
  439. }
  440. return (false);
  441. }
  442. static void
  443. vm_page_zone_free (struct vm_page_zone *zone, struct vm_page *page,
  444. uint32_t order, uint32_t flags)
  445. {
  446. assert (page->type != VM_PAGE_FREE);
  447. assert (order < VM_PAGE_NR_FREE_LISTS);
  448. vm_page_clear (page, order);
  449. if (! order)
  450. {
  451. THREAD_PIN_GUARD ();
  452. _Auto cpu_pool = vm_page_cpu_pool_get (zone);
  453. MUTEX_GUARD (&cpu_pool->lock);
  454. if (cpu_pool->nr_pages == cpu_pool->size)
  455. vm_page_cpu_pool_drain (cpu_pool, zone);
  456. vm_page_cpu_pool_push (cpu_pool, page);
  457. }
  458. else
  459. {
  460. MUTEX_GUARD (&zone->lock);
  461. vm_page_zone_free_to_buddy (zone, page, order);
  462. }
  463. if (!(flags & VM_PAGE_SLEEP))
  464. return;
  465. int selector = vm_page_zone_selector (zone);
  466. for (; selector >= 0; --selector)
  467. if (vm_page_wakeup (&vm_page_buckets[selector], order))
  468. return;
  469. }
  470. void __init
  471. vm_page_load (uint32_t zone_index, phys_addr_t start, phys_addr_t end)
  472. {
  473. assert (zone_index < ARRAY_SIZE (vm_page_boot_zones));
  474. assert (vm_page_aligned (start));
  475. assert (vm_page_aligned (end));
  476. assert (start < end);
  477. assert (vm_page_zones_size < ARRAY_SIZE (vm_page_boot_zones));
  478. _Auto zone = &vm_page_boot_zones[zone_index];
  479. zone->start = start;
  480. zone->end = end;
  481. zone->heap_present = false;
  482. log_debug ("vm_page: load: %s: %llx:%llx",
  483. vm_page_zone_name (zone_index), (uint64_t)start, (uint64_t)end);
  484. ++vm_page_zones_size;
  485. }
  486. void
  487. vm_page_load_heap (uint32_t zone_index, phys_addr_t start, phys_addr_t end)
  488. {
  489. assert (zone_index < ARRAY_SIZE (vm_page_boot_zones));
  490. assert (vm_page_aligned (start));
  491. assert (vm_page_aligned (end));
  492. _Auto zone = &vm_page_boot_zones[zone_index];
  493. assert (zone->start <= start);
  494. assert (end <= zone-> end);
  495. zone->avail_start = start;
  496. zone->avail_end = end;
  497. zone->heap_present = true;
  498. log_debug ("vm_page: heap: %s: %llx:%llx",
  499. vm_page_zone_name (zone_index), (uint64_t)start, (uint64_t)end);
  500. }
  501. int
  502. vm_page_ready (void)
  503. {
  504. return (vm_page_is_ready);
  505. }
  506. static uint32_t
  507. vm_page_select_alloc_zone (uint32_t selector)
  508. {
  509. uint32_t zone_index;
  510. switch (selector)
  511. {
  512. case VM_PAGE_SEL_DMA:
  513. zone_index = PMEM_ZONE_DMA;
  514. break;
  515. case VM_PAGE_SEL_DMA32:
  516. zone_index = PMEM_ZONE_DMA32;
  517. break;
  518. case VM_PAGE_SEL_DIRECTMAP:
  519. zone_index = PMEM_ZONE_DIRECTMAP;
  520. break;
  521. case VM_PAGE_SEL_HIGHMEM:
  522. zone_index = PMEM_ZONE_HIGHMEM;
  523. break;
  524. default:
  525. panic ("vm_page: invalid selector");
  526. }
  527. return (MIN (vm_page_zones_size - 1, zone_index));
  528. }
  529. static int __init
  530. vm_page_boot_zone_loaded (const struct vm_page_boot_zone *zone)
  531. {
  532. return (zone->end != 0);
  533. }
  534. static void __init
  535. vm_page_check_boot_zones (void)
  536. {
  537. if (! vm_page_zones_size)
  538. panic ("vm_page: no physical memory loaded");
  539. for (size_t i = 0; i < ARRAY_SIZE (vm_page_boot_zones); i++)
  540. if (vm_page_boot_zone_loaded (&vm_page_boot_zones[i]) !=
  541. (i < vm_page_zones_size))
  542. panic ("vm_page: invalid boot zone table");
  543. }
  544. static phys_addr_t __init
  545. vm_page_boot_zone_size (struct vm_page_boot_zone *zone)
  546. {
  547. return (zone->end - zone->start);
  548. }
  549. static phys_addr_t __init
  550. vm_page_boot_zone_avail_size (struct vm_page_boot_zone *zone)
  551. {
  552. return (zone->avail_end - zone->avail_start);
  553. }
  554. static void* __init
  555. vm_page_bootalloc (size_t size)
  556. {
  557. for (size_t i = vm_page_select_alloc_zone (VM_PAGE_SEL_DIRECTMAP);
  558. i < vm_page_zones_size; --i)
  559. {
  560. _Auto zone = &vm_page_boot_zones[i];
  561. if (zone->heap_present &&
  562. size <= vm_page_boot_zone_avail_size (zone))
  563. {
  564. phys_addr_t pa = zone->avail_start;
  565. zone->avail_start += vm_page_round (size);
  566. return ((void *)vm_page_direct_va (pa));
  567. }
  568. }
  569. panic ("vm_page: no physical memory available");
  570. }
  571. #ifdef CONFIG_SHELL
  572. static void
  573. vm_page_shell_info (struct shell *shell, int c __unused, char **v __unused)
  574. {
  575. vm_page_info (shell->stream);
  576. }
  577. static struct shell_cmd vm_page_shell_cmds[] =
  578. {
  579. SHELL_CMD_INITIALIZER ("vm_page_info", vm_page_shell_info,
  580. "vm_page_info",
  581. "display information about physical memory"),
  582. };
  583. static int __init
  584. vm_page_setup_shell (void)
  585. {
  586. SHELL_REGISTER_CMDS (vm_page_shell_cmds, shell_get_main_cmd_set ());
  587. return (0);
  588. }
  589. INIT_OP_DEFINE (vm_page_setup_shell,
  590. INIT_OP_DEP (printf_setup, true),
  591. INIT_OP_DEP (shell_setup, true),
  592. INIT_OP_DEP (vm_page_setup, true));
  593. #endif
  594. static int __init
  595. vm_page_setup (void)
  596. {
  597. vm_page_check_boot_zones ();
  598. // Compute the page table size.
  599. size_t nr_pages = 0;
  600. for (uint32_t i = 0; i < vm_page_zones_size; ++i)
  601. nr_pages += vm_page_btop (vm_page_boot_zone_size (&vm_page_boot_zones[i]));
  602. size_t table_size = vm_page_round (nr_pages * sizeof (struct vm_page));
  603. log_info ("vm_page: page table size: %zu entries (%zuk)",
  604. nr_pages, table_size >> 10);
  605. struct vm_page *table = vm_page_bootalloc (table_size);
  606. uintptr_t va = (uintptr_t) table;
  607. /*
  608. * Initialize the zones, associating them to the page table. When
  609. * the zones are initialized, all their pages are set allocated.
  610. * Pages are then released, which populates the free lists.
  611. */
  612. for (uint32_t i = 0; i < vm_page_zones_size; ++i)
  613. {
  614. _Auto zone = &vm_page_zones[i];
  615. _Auto boot_zone = &vm_page_boot_zones[i];
  616. vm_page_zone_init (zone, boot_zone->start, boot_zone->end, table);
  617. _Auto page = zone->pages + vm_page_btop (boot_zone->avail_start -
  618. boot_zone->start);
  619. _Auto end = zone->pages + vm_page_btop (boot_zone->avail_end -
  620. boot_zone->start);
  621. for (; page < end; ++page)
  622. {
  623. page->type = VM_PAGE_FREE;
  624. vm_page_zone_free_to_buddy (zone, page, 0);
  625. }
  626. table += vm_page_btop (vm_page_zone_size (zone));
  627. }
  628. while (va < (uintptr_t) table)
  629. {
  630. phys_addr_t pa = vm_page_direct_pa (va);
  631. struct vm_page *page = vm_page_lookup (pa);
  632. assert (page && page->type == VM_PAGE_RESERVED);
  633. page->type = VM_PAGE_TABLE;
  634. va += PAGE_SIZE;
  635. }
  636. for (size_t i = 0; i < ARRAY_SIZE (vm_page_buckets); ++i)
  637. {
  638. pqueue_init (&vm_page_buckets[i].waiters);
  639. spinlock_init (&vm_page_buckets[i].lock);
  640. }
  641. vm_page_is_ready = 1;
  642. return (0);
  643. }
  644. INIT_OP_DEFINE (vm_page_setup,
  645. INIT_OP_DEP (boot_load_vm_page_zones, true),
  646. INIT_OP_DEP (log_setup, true),
  647. INIT_OP_DEP (printf_setup, true));
  648. void __init
  649. vm_page_handle (struct vm_page *page)
  650. {
  651. assert (page->zone_index < ARRAY_SIZE (vm_page_zones));
  652. assert (page->type == VM_PAGE_RESERVED);
  653. vm_page_clear (page, 0);
  654. vm_page_zone_free_to_buddy (&vm_page_zones[page->zone_index], page, 0);
  655. }
  656. struct vm_page*
  657. vm_page_lookup (phys_addr_t pa)
  658. {
  659. for (uint32_t i = 0; i < vm_page_zones_size; i++)
  660. {
  661. _Auto zone = &vm_page_zones[i];
  662. if (pa >= zone->start && pa < zone->end)
  663. return (&zone->pages[vm_page_btop (pa - zone->start)]);
  664. }
  665. return (NULL);
  666. }
  667. static bool
  668. vm_page_block_referenced (const struct vm_page *page, uint32_t order)
  669. {
  670. for (uint32_t i = 0, nr_pages = 1 << order; i < nr_pages; i++)
  671. if (vm_page_referenced (&page[i]))
  672. return (true);
  673. return (false);
  674. }
  675. static void
  676. vm_page_waiter_init (struct vm_page_waiter *wp, uint32_t order)
  677. {
  678. wp->thread = thread_self ();
  679. wp->order = order;
  680. wp->done = false;
  681. pqueue_node_init (&wp->node, thread_real_global_priority (wp->thread));
  682. }
  683. struct vm_page*
  684. vm_page_alloc (uint32_t order, uint32_t selector,
  685. uint32_t type, uint32_t flags)
  686. {
  687. struct vm_page_waiter *waiter = NULL;
  688. if (flags & VM_PAGE_SLEEP)
  689. {
  690. waiter = alloca (sizeof (*waiter));
  691. vm_page_waiter_init (waiter, order);
  692. }
  693. for (uint32_t i = vm_page_select_alloc_zone (selector);
  694. i < vm_page_zones_size; --i)
  695. {
  696. _Auto page = vm_page_zone_alloc (&vm_page_zones[i], order, type, waiter);
  697. if (page)
  698. {
  699. assert (!vm_page_block_referenced (page, order));
  700. return (page);
  701. }
  702. }
  703. return (NULL);
  704. }
  705. void
  706. vm_page_free (struct vm_page *page, uint32_t order, uint32_t flags)
  707. {
  708. assert (page->zone_index < ARRAY_SIZE (vm_page_zones));
  709. assert (!vm_page_block_referenced (page, order));
  710. vm_page_zone_free (&vm_page_zones[page->zone_index], page, order, flags);
  711. }
  712. void
  713. vm_page_list_free (struct list *pages)
  714. {
  715. list_for_each_safe (pages, node, tmp)
  716. {
  717. _Auto page = list_entry (node, struct vm_page, node);
  718. vm_page_zone_free (&vm_page_zones[page->zone_index],
  719. page, 0, VM_PAGE_SLEEP);
  720. }
  721. }
  722. const char*
  723. vm_page_zone_name (uint32_t zone_index)
  724. {
  725. // Don't use a switch statement since zones can be aliased.
  726. if (zone_index == PMEM_ZONE_HIGHMEM)
  727. return ("HIGHMEM");
  728. else if (zone_index == PMEM_ZONE_DIRECTMAP)
  729. return ("DIRECTMAP");
  730. else if (zone_index == PMEM_ZONE_DMA32)
  731. return ("DMA32");
  732. else if (zone_index == PMEM_ZONE_DMA)
  733. return ("DMA");
  734. else
  735. panic ("vm_page: invalid zone index");
  736. }
  737. void
  738. vm_page_wash_begin (struct vm_page *page)
  739. {
  740. while (1)
  741. {
  742. uint32_t tmp = atomic_load_rlx (&page->whole);
  743. if ((tmp & 0xff) == VM_PAGE_LAUNDRY ||
  744. atomic_cas_bool_acq_rel (&page->whole, tmp,
  745. (tmp & ~0xff) | VM_PAGE_LAUNDRY))
  746. return;
  747. atomic_spin_nop ();
  748. }
  749. }
  750. void
  751. vm_page_wash_end (struct vm_page *page)
  752. {
  753. uint32_t tmp = atomic_load_rlx (&page->whole);
  754. if ((tmp & 0xff) == VM_PAGE_LAUNDRY &&
  755. atomic_cas_bool_rlx (&page->whole, tmp, tmp & ~0xff) &&
  756. !vm_page_referenced (page))
  757. vm_page_detach (page);
  758. }
  759. void
  760. vm_page_zero (struct vm_page *page)
  761. {
  762. _Auto window = pmap_window_get (0);
  763. pmap_window_set (window, vm_page_to_pa (page));
  764. memset (pmap_window_va (window), 0, PAGE_SIZE);
  765. pmap_window_put (window);
  766. }
  767. void
  768. vm_page_info (struct stream *stream)
  769. {
  770. for (uint32_t i = 0; i < vm_page_zones_size; ++i)
  771. {
  772. _Auto zone = &vm_page_zones[i];
  773. size_t pages = (size_t) (zone->pages_end - zone->pages);
  774. fmt_xprintf (stream, "vm_page: %s: pages: %zu (%zuM), "
  775. "free: %zu (%zuM)\n",
  776. vm_page_zone_name (i), pages, pages >> (20 - PAGE_SHIFT),
  777. zone->nr_free_pages,
  778. zone->nr_free_pages >> (20 - PAGE_SHIFT));
  779. }
  780. }