page.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869
  1. /*
  2. * Copyright (c) 2010-2017 Richard Braun.
  3. *
  4. * This program is free software: you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation, either version 3 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. *
  17. *
  18. * This implementation uses the binary buddy system to manage its heap.
  19. * Descriptions of the buddy system can be found in the following works :
  20. * - "UNIX Internals: The New Frontiers", by Uresh Vahalia.
  21. * - "Dynamic Storage Allocation: A Survey and Critical Review",
  22. * by Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles.
  23. *
  24. * In addition, this allocator uses per-CPU pools of pages for order 0
  25. * (i.e. single page) allocations. These pools act as caches (but are named
  26. * differently to avoid confusion with CPU caches) that reduce contention on
  27. * multiprocessor systems. When a pool is empty and cannot provide a page,
  28. * it is filled by transferring multiple pages from the backend buddy system.
  29. * The symmetric case is handled likewise.
  30. */
  31. #include <assert.h>
  32. #include <stdalign.h>
  33. #include <stdbool.h>
  34. #include <stddef.h>
  35. #include <stdint.h>
  36. #include <stdio.h>
  37. #include <string.h>
  38. #include <kern/init.h>
  39. #include <kern/list.h>
  40. #include <kern/log.h>
  41. #include <kern/macros.h>
  42. #include <kern/mutex.h>
  43. #include <kern/panic.h>
  44. #include <kern/pqueue.h>
  45. #include <kern/printf.h>
  46. #include <kern/shell.h>
  47. #include <kern/spinlock.h>
  48. #include <kern/thread.h>
  49. #include <machine/boot.h>
  50. #include <machine/cpu.h>
  51. #include <machine/page.h>
  52. #include <machine/pmem.h>
  53. #include <machine/types.h>
  54. #include <vm/map.h>
  55. #include <vm/page.h>
  56. // Number of free block lists per zone.
  57. #define VM_PAGE_NR_FREE_LISTS 11
  58. /*
  59. * The size of a CPU pool is computed by dividing the number of pages in its
  60. * containing zone by this value.
  61. */
  62. #define VM_PAGE_CPU_POOL_RATIO 1024
  63. // Maximum number of pages in a CPU pool.
  64. #define VM_PAGE_CPU_POOL_MAX_SIZE 128
  65. /*
  66. * The transfer size of a CPU pool is computed by dividing the pool size by
  67. * this value.
  68. */
  69. #define VM_PAGE_CPU_POOL_TRANSFER_RATIO 2
  70. // Per-processor cache of pages.
  71. struct vm_page_cpu_pool
  72. {
  73. __cacheline_aligned struct mutex lock;
  74. int size;
  75. int transfer_size;
  76. int nr_pages;
  77. struct list pages;
  78. };
  79. /*
  80. * Special order value for pages that aren't in a free list. Such pages are
  81. * either allocated, or part of a free block of pages but not the head page.
  82. */
  83. #define VM_PAGE_ORDER_UNLISTED ((unsigned short)-1)
  84. // Doubly-linked list of free blocks.
  85. struct vm_page_free_list
  86. {
  87. size_t size;
  88. struct list blocks;
  89. };
  90. // Zone name buffer size.
  91. #define VM_PAGE_NAME_SIZE 16
  92. // Zone of contiguous memory.
  93. struct vm_page_zone
  94. {
  95. struct vm_page_cpu_pool cpu_pools[CONFIG_MAX_CPUS];
  96. phys_addr_t start;
  97. phys_addr_t end;
  98. struct vm_page *pages;
  99. struct vm_page *pages_end;
  100. struct vm_page_free_list free_lists[VM_PAGE_NR_FREE_LISTS];
  101. size_t nr_free_pages;
  102. __cacheline_aligned struct mutex lock;
  103. };
  104. // Bootstrap information about a zone.
  105. struct vm_page_boot_zone
  106. {
  107. phys_addr_t start;
  108. phys_addr_t end;
  109. bool heap_present;
  110. phys_addr_t avail_start;
  111. phys_addr_t avail_end;
  112. };
  113. // Threads waiting for free object pages.
  114. struct vm_page_waiter
  115. {
  116. struct thread *thread;
  117. struct pqueue_node node;
  118. uint32_t order;
  119. bool done;
  120. };
  121. struct vm_page_bucket
  122. {
  123. struct spinlock lock;
  124. struct pqueue waiters;
  125. };
  126. static int vm_page_is_ready __read_mostly;
  127. /*
  128. * Zone table.
  129. *
  130. * The system supports a maximum of 4 zones :
  131. * - DMA: suitable for DMA
  132. * - DMA32: suitable for DMA when devices support 32-bits addressing
  133. * - DIRECTMAP: direct physical mapping, allows direct access from
  134. * the kernel with a simple offset translation
  135. * - HIGHMEM: must be mapped before it can be accessed
  136. *
  137. * Zones are ordered by priority, 0 being the lowest priority. Their
  138. * relative priorities are DMA < DMA32 < DIRECTMAP < HIGHMEM. Some zones
  139. * may actually be aliases for others, e.g. if DMA is always possible from
  140. * the direct physical mapping, DMA and DMA32 are aliases for DIRECTMAP,
  141. * in which case the zone table contains DIRECTMAP and HIGHMEM only.
  142. */
  143. static struct vm_page_zone vm_page_zones[PMEM_MAX_ZONES];
  144. // Bootstrap zone table.
  145. static struct vm_page_boot_zone vm_page_boot_zones[PMEM_MAX_ZONES]
  146. __initdata;
  147. // Number of loaded zones.
  148. static uint32_t vm_page_zones_size __read_mostly;
  149. // Registry of page_waiters.
  150. static struct vm_page_bucket vm_page_buckets[PMEM_MAX_ZONES];
  151. static void __init
  152. vm_page_init (struct vm_page *page, uint16_t zone_index, phys_addr_t pa)
  153. {
  154. memset (page, 0, sizeof (*page));
  155. page->type = VM_PAGE_RESERVED;
  156. page->zone_index = zone_index;
  157. page->order = VM_PAGE_ORDER_UNLISTED;
  158. page->phys_addr = pa;
  159. page->nr_refs = 0;
  160. page->object = NULL;
  161. }
  162. void
  163. vm_page_set_type (struct vm_page *page, uint32_t order, uint16_t type)
  164. {
  165. for (uint32_t i = 0; i < (1u << order); i++)
  166. {
  167. page[i].type = type;
  168. spinlock_init (&page[i].rset_lock);
  169. list_init (&page[i].node);
  170. }
  171. }
  172. static void
  173. vm_page_clear (struct vm_page *page, uint32_t order)
  174. {
  175. for (uint32_t i = 0; i < (1u << order); ++i)
  176. {
  177. page[i].type = VM_PAGE_FREE;
  178. page[i].priv = NULL;
  179. }
  180. }
  181. static void __init
  182. vm_page_free_list_init (struct vm_page_free_list *free_list)
  183. {
  184. free_list->size = 0;
  185. list_init (&free_list->blocks);
  186. }
  187. static inline void
  188. vm_page_free_list_insert (struct vm_page_free_list *free_list,
  189. struct vm_page *page)
  190. {
  191. assert (page->order == VM_PAGE_ORDER_UNLISTED);
  192. ++free_list->size;
  193. list_insert_head (&free_list->blocks, &page->node);
  194. }
  195. static inline void
  196. vm_page_free_list_remove (struct vm_page_free_list *free_list,
  197. struct vm_page *page)
  198. {
  199. assert (page->order != VM_PAGE_ORDER_UNLISTED);
  200. --free_list->size;
  201. list_remove (&page->node);
  202. }
  203. static struct vm_page*
  204. vm_page_zone_alloc_from_buddy (struct vm_page_zone *zone, uint32_t order)
  205. {
  206. struct vm_page_free_list *free_list = free_list;
  207. assert (order < VM_PAGE_NR_FREE_LISTS);
  208. uint32_t i;
  209. for (i = order; i < VM_PAGE_NR_FREE_LISTS; ++i)
  210. {
  211. free_list = &zone->free_lists[i];
  212. if (free_list->size != 0)
  213. break;
  214. }
  215. if (i == VM_PAGE_NR_FREE_LISTS)
  216. return (NULL);
  217. _Auto page = list_first_entry (&free_list->blocks, struct vm_page, node);
  218. vm_page_free_list_remove (free_list, page);
  219. page->order = VM_PAGE_ORDER_UNLISTED;
  220. while (i > order)
  221. {
  222. i--;
  223. _Auto buddy = &page[1 << i];
  224. vm_page_free_list_insert (&zone->free_lists[i], buddy);
  225. buddy->order = i;
  226. }
  227. zone->nr_free_pages -= 1 << order;
  228. return (page);
  229. }
  230. static void
  231. vm_page_zone_free_to_buddy (struct vm_page_zone *zone, struct vm_page *page,
  232. uint32_t order)
  233. {
  234. assert (page >= zone->pages);
  235. assert (page < zone->pages_end);
  236. assert (page->order == VM_PAGE_ORDER_UNLISTED);
  237. assert (order < VM_PAGE_NR_FREE_LISTS);
  238. uint32_t nr_pages = 1 << order;
  239. phys_addr_t pa = page->phys_addr;
  240. while (order < VM_PAGE_NR_FREE_LISTS - 1)
  241. {
  242. phys_addr_t buddy_pa = pa ^ vm_page_ptob (1 << order);
  243. if (buddy_pa < zone->start || buddy_pa >= zone->end)
  244. break;
  245. _Auto buddy = &zone->pages[vm_page_btop (buddy_pa - zone->start)];
  246. if (buddy->order != order)
  247. break;
  248. vm_page_free_list_remove (&zone->free_lists[order], buddy);
  249. buddy->order = VM_PAGE_ORDER_UNLISTED;
  250. ++order;
  251. pa &= -vm_page_ptob (1 << order);
  252. page = &zone->pages[vm_page_btop (pa - zone->start)];
  253. }
  254. vm_page_free_list_insert (&zone->free_lists[order], page);
  255. page->order = order;
  256. zone->nr_free_pages += nr_pages;
  257. }
  258. static void __init
  259. vm_page_cpu_pool_init (struct vm_page_cpu_pool *cpu_pool, int size)
  260. {
  261. mutex_init (&cpu_pool->lock);
  262. cpu_pool->size = size;
  263. cpu_pool->transfer_size = (size + VM_PAGE_CPU_POOL_TRANSFER_RATIO - 1) /
  264. VM_PAGE_CPU_POOL_TRANSFER_RATIO;
  265. cpu_pool->nr_pages = 0;
  266. list_init (&cpu_pool->pages);
  267. }
  268. static inline struct vm_page_cpu_pool*
  269. vm_page_cpu_pool_get (struct vm_page_zone *zone)
  270. {
  271. return (&zone->cpu_pools[cpu_id ()]);
  272. }
  273. static inline struct vm_page*
  274. vm_page_cpu_pool_pop (struct vm_page_cpu_pool *cpu_pool)
  275. {
  276. assert (cpu_pool->nr_pages != 0);
  277. --cpu_pool->nr_pages;
  278. return (list_pop (&cpu_pool->pages, struct vm_page, node));
  279. }
  280. static inline void
  281. vm_page_cpu_pool_push (struct vm_page_cpu_pool *cpu_pool, struct vm_page *page)
  282. {
  283. assert (cpu_pool->nr_pages < cpu_pool->size);
  284. cpu_pool->nr_pages++;
  285. list_insert_head (&cpu_pool->pages, &page->node);
  286. }
  287. static int
  288. vm_page_cpu_pool_fill (struct vm_page_cpu_pool *cpu_pool,
  289. struct vm_page_zone *zone)
  290. {
  291. assert (cpu_pool->nr_pages == 0);
  292. MUTEX_GUARD (&zone->lock);
  293. int i;
  294. for (i = 0; i < cpu_pool->transfer_size; i++)
  295. {
  296. _Auto page = vm_page_zone_alloc_from_buddy (zone, 0);
  297. if (! page)
  298. break;
  299. vm_page_cpu_pool_push (cpu_pool, page);
  300. }
  301. return (i);
  302. }
  303. static void
  304. vm_page_cpu_pool_drain (struct vm_page_cpu_pool *cpu_pool,
  305. struct vm_page_zone *zone)
  306. {
  307. assert (cpu_pool->nr_pages == cpu_pool->size);
  308. MUTEX_GUARD (&zone->lock);
  309. for (int i = cpu_pool->transfer_size; i > 0; --i)
  310. {
  311. _Auto page = vm_page_cpu_pool_pop (cpu_pool);
  312. vm_page_zone_free_to_buddy (zone, page, 0);
  313. }
  314. }
  315. static phys_addr_t __init
  316. vm_page_zone_size (struct vm_page_zone *zone)
  317. {
  318. return (zone->end - zone->start);
  319. }
  320. static int __init
  321. vm_page_zone_compute_pool_size (struct vm_page_zone *zone)
  322. {
  323. phys_addr_t size = vm_page_btop (vm_page_zone_size (zone)) /
  324. VM_PAGE_CPU_POOL_RATIO;
  325. return (!size ? 1 : MIN (VM_PAGE_CPU_POOL_MAX_SIZE, size));
  326. }
  327. static void __init
  328. vm_page_zone_init (struct vm_page_zone *zone, phys_addr_t start, phys_addr_t end,
  329. struct vm_page *pages)
  330. {
  331. zone->start = start;
  332. zone->end = end;
  333. int pool_size = vm_page_zone_compute_pool_size (zone);
  334. for (uint32_t i = 0; i < ARRAY_SIZE (zone->cpu_pools); ++i)
  335. vm_page_cpu_pool_init (&zone->cpu_pools[i], pool_size);
  336. zone->pages = pages;
  337. zone->pages_end = pages + vm_page_btop (vm_page_zone_size (zone));
  338. mutex_init (&zone->lock);
  339. for (uint32_t i = 0; i < ARRAY_SIZE (zone->free_lists); ++i)
  340. vm_page_free_list_init (&zone->free_lists[i]);
  341. zone->nr_free_pages = 0;
  342. uint32_t i = zone - vm_page_zones;
  343. for (phys_addr_t pa = zone->start; pa < zone->end; pa += PAGE_SIZE)
  344. vm_page_init (&pages[vm_page_btop (pa - zone->start)], i, pa);
  345. }
  346. static inline int
  347. vm_page_zone_selector (const struct vm_page_zone *zone)
  348. {
  349. return ((int)(zone - vm_page_zones));
  350. }
  351. static bool
  352. vm_page_wait (struct vm_page_zone *zone, struct mutex *mtx,
  353. struct vm_page_waiter *waiter)
  354. {
  355. if (! waiter)
  356. {
  357. mutex_unlock (mtx);
  358. return (false);
  359. }
  360. _Auto bucket = &vm_page_buckets[vm_page_zone_selector (zone)];
  361. // Grab the queue lock before dropping the page pool one.
  362. SPINLOCK_GUARD (&bucket->lock);
  363. mutex_unlock (mtx);
  364. pqueue_insert (&bucket->waiters, &waiter->node);
  365. thread_sleep (&bucket->lock, bucket, "vm-page");
  366. pqueue_remove (&bucket->waiters, &waiter->node);
  367. if (waiter->done)
  368. // Only bump the other waiters if we succeeded.
  369. pqueue_inc (&bucket->waiters, 1);
  370. return (waiter->done);
  371. }
  372. static struct vm_page*
  373. vm_page_zone_alloc (struct vm_page_zone *zone, uint32_t order,
  374. uint16_t type, struct vm_page_waiter *waiter)
  375. {
  376. assert (order < VM_PAGE_NR_FREE_LISTS);
  377. struct vm_page *page;
  378. if (! order)
  379. while (1)
  380. {
  381. THREAD_PIN_GUARD ();
  382. _Auto cpu_pool = vm_page_cpu_pool_get (zone);
  383. mutex_lock (&cpu_pool->lock);
  384. if (cpu_pool->nr_pages || vm_page_cpu_pool_fill (cpu_pool, zone))
  385. {
  386. page = vm_page_cpu_pool_pop (cpu_pool);
  387. mutex_unlock (&cpu_pool->lock);
  388. break;
  389. }
  390. else if (!vm_page_wait (zone, &cpu_pool->lock, waiter))
  391. return (NULL);
  392. }
  393. else
  394. while (1)
  395. {
  396. mutex_lock (&zone->lock);
  397. page = vm_page_zone_alloc_from_buddy (zone, order);
  398. if (page)
  399. {
  400. mutex_unlock (&zone->lock);
  401. break;
  402. }
  403. else if (!vm_page_wait (zone, &zone->lock, waiter))
  404. return (NULL);
  405. }
  406. assert (page->type == VM_PAGE_FREE);
  407. vm_page_set_type (page, order, type);
  408. return (page);
  409. }
  410. static bool
  411. vm_page_wakeup (struct vm_page_bucket *bucket, uint32_t order)
  412. {
  413. SPINLOCK_GUARD (&bucket->lock);
  414. pqueue_for_each (&bucket->waiters, pnode)
  415. {
  416. _Auto waiter = pqueue_entry (pnode, struct vm_page_waiter, node);
  417. if (likely (waiter->order <= order))
  418. {
  419. waiter->done = true;
  420. thread_wakeup (waiter->thread);
  421. return (true);
  422. }
  423. }
  424. return (false);
  425. }
  426. static void
  427. vm_page_zone_free (struct vm_page_zone *zone, struct vm_page *page,
  428. uint32_t order, uint32_t flags)
  429. {
  430. assert (page->type != VM_PAGE_FREE);
  431. assert (order < VM_PAGE_NR_FREE_LISTS);
  432. vm_page_clear (page, order);
  433. if (! order)
  434. {
  435. THREAD_PIN_GUARD ();
  436. _Auto cpu_pool = vm_page_cpu_pool_get (zone);
  437. MUTEX_GUARD (&cpu_pool->lock);
  438. if (cpu_pool->nr_pages == cpu_pool->size)
  439. vm_page_cpu_pool_drain (cpu_pool, zone);
  440. vm_page_cpu_pool_push (cpu_pool, page);
  441. }
  442. else
  443. {
  444. MUTEX_GUARD (&zone->lock);
  445. vm_page_zone_free_to_buddy (zone, page, order);
  446. }
  447. if (!(flags & VM_PAGE_SLEEP))
  448. return;
  449. int selector = vm_page_zone_selector (zone);
  450. for (; selector >= 0; --selector)
  451. if (vm_page_wakeup (&vm_page_buckets[selector], order))
  452. return;
  453. }
  454. void __init
  455. vm_page_load (uint32_t zone_index, phys_addr_t start, phys_addr_t end)
  456. {
  457. assert (zone_index < ARRAY_SIZE (vm_page_boot_zones));
  458. assert (vm_page_aligned (start));
  459. assert (vm_page_aligned (end));
  460. assert (start < end);
  461. assert (vm_page_zones_size < ARRAY_SIZE (vm_page_boot_zones));
  462. _Auto zone = &vm_page_boot_zones[zone_index];
  463. zone->start = start;
  464. zone->end = end;
  465. zone->heap_present = false;
  466. log_debug ("vm_page: load: %s: %llx:%llx",
  467. vm_page_zone_name (zone_index), (uint64_t)start, (uint64_t)end);
  468. ++vm_page_zones_size;
  469. }
  470. void
  471. vm_page_load_heap (uint32_t zone_index, phys_addr_t start, phys_addr_t end)
  472. {
  473. assert (zone_index < ARRAY_SIZE (vm_page_boot_zones));
  474. assert (vm_page_aligned (start));
  475. assert (vm_page_aligned (end));
  476. _Auto zone = &vm_page_boot_zones[zone_index];
  477. assert (zone->start <= start);
  478. assert (end <= zone-> end);
  479. zone->avail_start = start;
  480. zone->avail_end = end;
  481. zone->heap_present = true;
  482. log_debug ("vm_page: heap: %s: %llx:%llx",
  483. vm_page_zone_name (zone_index), (uint64_t)start, (uint64_t)end);
  484. }
  485. int
  486. vm_page_ready (void)
  487. {
  488. return (vm_page_is_ready);
  489. }
  490. static uint32_t
  491. vm_page_select_alloc_zone (uint32_t selector)
  492. {
  493. uint32_t zone_index;
  494. switch (selector)
  495. {
  496. case VM_PAGE_SEL_DMA:
  497. zone_index = PMEM_ZONE_DMA;
  498. break;
  499. case VM_PAGE_SEL_DMA32:
  500. zone_index = PMEM_ZONE_DMA32;
  501. break;
  502. case VM_PAGE_SEL_DIRECTMAP:
  503. zone_index = PMEM_ZONE_DIRECTMAP;
  504. break;
  505. case VM_PAGE_SEL_HIGHMEM:
  506. zone_index = PMEM_ZONE_HIGHMEM;
  507. break;
  508. default:
  509. panic ("vm_page: invalid selector");
  510. }
  511. return (MIN (vm_page_zones_size - 1, zone_index));
  512. }
  513. static int __init
  514. vm_page_boot_zone_loaded (const struct vm_page_boot_zone *zone)
  515. {
  516. return (zone->end != 0);
  517. }
  518. static void __init
  519. vm_page_check_boot_zones (void)
  520. {
  521. if (! vm_page_zones_size)
  522. panic ("vm_page: no physical memory loaded");
  523. for (size_t i = 0; i < ARRAY_SIZE (vm_page_boot_zones); i++)
  524. if (vm_page_boot_zone_loaded (&vm_page_boot_zones[i]) !=
  525. (i < vm_page_zones_size))
  526. panic ("vm_page: invalid boot zone table");
  527. }
  528. static phys_addr_t __init
  529. vm_page_boot_zone_size (struct vm_page_boot_zone *zone)
  530. {
  531. return (zone->end - zone->start);
  532. }
  533. static phys_addr_t __init
  534. vm_page_boot_zone_avail_size (struct vm_page_boot_zone *zone)
  535. {
  536. return (zone->avail_end - zone->avail_start);
  537. }
  538. static void* __init
  539. vm_page_bootalloc (size_t size)
  540. {
  541. for (size_t i = vm_page_select_alloc_zone (VM_PAGE_SEL_DIRECTMAP);
  542. i < vm_page_zones_size; --i)
  543. {
  544. _Auto zone = &vm_page_boot_zones[i];
  545. if (!zone->heap_present)
  546. continue;
  547. else if (size <= vm_page_boot_zone_avail_size (zone))
  548. {
  549. phys_addr_t pa = zone->avail_start;
  550. zone->avail_start += vm_page_round (size);
  551. return ((void *)vm_page_direct_va (pa));
  552. }
  553. }
  554. panic ("vm_page: no physical memory available");
  555. }
  556. #ifdef CONFIG_SHELL
  557. static void
  558. vm_page_shell_info (struct shell *shell, int c __unused, char **v __unused)
  559. {
  560. vm_page_info (shell->stream);
  561. }
  562. static struct shell_cmd vm_page_shell_cmds[] =
  563. {
  564. SHELL_CMD_INITIALIZER ("vm_page_info", vm_page_shell_info,
  565. "vm_page_info",
  566. "display information about physical memory"),
  567. };
  568. static int __init
  569. vm_page_setup_shell (void)
  570. {
  571. SHELL_REGISTER_CMDS (vm_page_shell_cmds, shell_get_main_cmd_set ());
  572. return (0);
  573. }
  574. INIT_OP_DEFINE (vm_page_setup_shell,
  575. INIT_OP_DEP (printf_setup, true),
  576. INIT_OP_DEP (shell_setup, true),
  577. INIT_OP_DEP (vm_page_setup, true));
  578. #endif
  579. static int __init
  580. vm_page_setup (void)
  581. {
  582. vm_page_check_boot_zones ();
  583. // Compute the page table size.
  584. size_t nr_pages = 0;
  585. for (uint32_t i = 0; i < vm_page_zones_size; ++i)
  586. nr_pages += vm_page_btop (vm_page_boot_zone_size (&vm_page_boot_zones[i]));
  587. size_t table_size = vm_page_round (nr_pages * sizeof (struct vm_page));
  588. log_info ("vm_page: page table size: %zu entries (%zuk)",
  589. nr_pages, table_size >> 10);
  590. struct vm_page *table = vm_page_bootalloc (table_size);
  591. uintptr_t va = (uintptr_t) table;
  592. /*
  593. * Initialize the zones, associating them to the page table. When
  594. * the zones are initialized, all their pages are set allocated.
  595. * Pages are then released, which populates the free lists.
  596. */
  597. for (uint32_t i = 0; i < vm_page_zones_size; ++i)
  598. {
  599. _Auto zone = &vm_page_zones[i];
  600. _Auto boot_zone = &vm_page_boot_zones[i];
  601. vm_page_zone_init (zone, boot_zone->start, boot_zone->end, table);
  602. _Auto page = zone->pages + vm_page_btop (boot_zone->avail_start -
  603. boot_zone->start);
  604. _Auto end = zone->pages + vm_page_btop (boot_zone->avail_end -
  605. boot_zone->start);
  606. for (; page < end; ++page)
  607. {
  608. page->type = VM_PAGE_FREE;
  609. vm_page_zone_free_to_buddy (zone, page, 0);
  610. }
  611. table += vm_page_btop (vm_page_zone_size (zone));
  612. }
  613. while (va < (uintptr_t) table)
  614. {
  615. phys_addr_t pa = vm_page_direct_pa (va);
  616. struct vm_page *page = vm_page_lookup (pa);
  617. assert (page && page->type == VM_PAGE_RESERVED);
  618. page->type = VM_PAGE_TABLE;
  619. va += PAGE_SIZE;
  620. }
  621. for (size_t i = 0; i < ARRAY_SIZE (vm_page_buckets); ++i)
  622. {
  623. pqueue_init (&vm_page_buckets[i].waiters);
  624. spinlock_init (&vm_page_buckets[i].lock);
  625. }
  626. vm_page_is_ready = 1;
  627. return (0);
  628. }
  629. INIT_OP_DEFINE (vm_page_setup,
  630. INIT_OP_DEP (boot_load_vm_page_zones, true),
  631. INIT_OP_DEP (log_setup, true),
  632. INIT_OP_DEP (printf_setup, true));
  633. // TODO Rename to avoid confusion with "managed pages".
  634. void __init
  635. vm_page_manage (struct vm_page *page)
  636. {
  637. assert (page->zone_index < ARRAY_SIZE (vm_page_zones));
  638. assert (page->type == VM_PAGE_RESERVED);
  639. vm_page_clear (page, 0);
  640. vm_page_zone_free_to_buddy (&vm_page_zones[page->zone_index], page, 0);
  641. }
  642. struct vm_page*
  643. vm_page_lookup (phys_addr_t pa)
  644. {
  645. for (uint32_t i = 0; i < vm_page_zones_size; i++)
  646. {
  647. _Auto zone = &vm_page_zones[i];
  648. if (pa >= zone->start && pa < zone->end)
  649. return (&zone->pages[vm_page_btop (pa - zone->start)]);
  650. }
  651. return (NULL);
  652. }
  653. static bool
  654. vm_page_block_referenced (const struct vm_page *page, uint32_t order)
  655. {
  656. for (uint32_t i = 0, nr_pages = 1 << order; i < nr_pages; i++)
  657. if (vm_page_referenced (&page[i]))
  658. return (true);
  659. return (false);
  660. }
  661. static void
  662. vm_page_waiter_init (struct vm_page_waiter *wp, uint32_t order)
  663. {
  664. wp->thread = thread_self ();
  665. wp->order = order;
  666. wp->done = false;
  667. pqueue_node_init (&wp->node, thread_real_global_priority (wp->thread));
  668. }
  669. struct vm_page*
  670. vm_page_alloc (uint32_t order, uint32_t selector,
  671. uint16_t type, uint32_t flags)
  672. {
  673. struct vm_page_waiter *waiter = NULL;
  674. if (flags & VM_PAGE_SLEEP)
  675. {
  676. waiter = alloca (sizeof (*waiter));
  677. vm_page_waiter_init (waiter, order);
  678. }
  679. for (uint32_t i = vm_page_select_alloc_zone (selector);
  680. i < vm_page_zones_size; --i)
  681. {
  682. _Auto page = vm_page_zone_alloc (&vm_page_zones[i], order, type, waiter);
  683. if (page)
  684. {
  685. assert (!vm_page_block_referenced (page, order));
  686. return (page);
  687. }
  688. }
  689. return (NULL);
  690. }
  691. void
  692. vm_page_free (struct vm_page *page, uint32_t order, uint32_t flags)
  693. {
  694. assert (page->zone_index < ARRAY_SIZE (vm_page_zones));
  695. assert (!vm_page_block_referenced (page, order));
  696. vm_page_zone_free (&vm_page_zones[page->zone_index], page, order, flags);
  697. }
  698. void
  699. vm_page_list_free (struct list *pages)
  700. {
  701. list_for_each_safe (pages, node, tmp)
  702. {
  703. _Auto page = list_entry (node, struct vm_page, node);
  704. vm_page_zone_free (&vm_page_zones[page->zone_index],
  705. page, 0, VM_PAGE_SLEEP);
  706. }
  707. }
  708. const char*
  709. vm_page_zone_name (uint32_t zone_index)
  710. {
  711. // Don't use a switch statement since zones can be aliased.
  712. if (zone_index == PMEM_ZONE_HIGHMEM)
  713. return ("HIGHMEM");
  714. else if (zone_index == PMEM_ZONE_DIRECTMAP)
  715. return ("DIRECTMAP");
  716. else if (zone_index == PMEM_ZONE_DMA32)
  717. return ("DMA32");
  718. else if (zone_index == PMEM_ZONE_DMA)
  719. return ("DMA");
  720. else
  721. panic ("vm_page: invalid zone index");
  722. }
  723. void
  724. vm_page_info (struct stream *stream)
  725. {
  726. for (uint32_t i = 0; i < vm_page_zones_size; ++i)
  727. {
  728. _Auto zone = &vm_page_zones[i];
  729. size_t pages = (size_t) (zone->pages_end - zone->pages);
  730. fmt_xprintf (stream, "vm_page: %s: pages: %zu (%zuM), "
  731. "free: %zu (%zuM)\n",
  732. vm_page_zone_name (i), pages, pages >> (20 - PAGE_SHIFT),
  733. zone->nr_free_pages,
  734. zone->nr_free_pages >> (20 - PAGE_SHIFT));
  735. }
  736. }