map.c 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536
  1. /*
  2. * Copyright (c) 2011-2017 Richard Braun.
  3. *
  4. * This program is free software: you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation, either version 3 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. *
  17. */
  18. #include <assert.h>
  19. #include <errno.h>
  20. #include <stdbool.h>
  21. #include <stddef.h>
  22. #include <stdint.h>
  23. #include <stdio.h>
  24. #include <kern/capability.h>
  25. #include <kern/init.h>
  26. #include <kern/ipc.h>
  27. #include <kern/kmem.h>
  28. #include <kern/kmessage.h>
  29. #include <kern/list.h>
  30. #include <kern/log.h>
  31. #include <kern/panic.h>
  32. #include <kern/rbtree.h>
  33. #include <kern/shell.h>
  34. #include <kern/spinlock.h>
  35. #include <kern/task.h>
  36. #include <machine/page.h>
  37. #include <machine/pmap.h>
  38. #include <vm/defs.h>
  39. #include <vm/map.h>
  40. #include <vm/kmem.h>
  41. #include <vm/object.h>
  42. #include <vm/page.h>
  43. struct vm_map_page_target
  44. {
  45. uintptr_t front;
  46. uintptr_t back;
  47. };
  48. #define VM_MAP_PMAP_FLAGS (PMAP_PEF_GLOBAL | PMAP_IGNORE_ERRORS)
  49. enum
  50. {
  51. VM_MAP_FREE_NONE,
  52. VM_MAP_FREE_OBJ,
  53. VM_MAP_FREE_ALL,
  54. };
  55. /*
  56. * Mapping request.
  57. *
  58. * Most members are input parameters from a call to e.g. vm_map_enter(). The
  59. * start member is also an output argument. The next member is used internally
  60. * by the mapping functions.
  61. */
  62. struct vm_map_request
  63. {
  64. uintptr_t start;
  65. size_t size;
  66. int flags;
  67. struct vm_object *object;
  68. uint64_t offset;
  69. struct vm_map_entry *next;
  70. };
  71. static int vm_map_prepare (struct vm_map *map, uintptr_t start, size_t size,
  72. int flags, struct vm_object *object,
  73. uint64_t offset, struct vm_map_request *request);
  74. static int vm_map_insert (struct vm_map *map, struct vm_map_entry *entry,
  75. const struct vm_map_request *request);
  76. static void vm_map_entry_unmap (struct vm_map *map, struct vm_map_entry *ep);
  77. static struct kmem_cache vm_map_entry_cache;
  78. static struct kmem_cache vm_map_cache;
  79. struct vm_map vm_map_kernel_map;
  80. static struct vm_map_entry*
  81. vm_map_entry_create (void)
  82. {
  83. return (kmem_cache_alloc (&vm_map_entry_cache));
  84. }
  85. static int
  86. vm_map_entry_alloc (struct list *list, uint32_t n)
  87. {
  88. list_init (list);
  89. for (uint32_t i = 0; i < n; ++i)
  90. {
  91. _Auto entry = vm_map_entry_create ();
  92. if (! entry)
  93. {
  94. list_for_each_safe (list, nd, tmp)
  95. kmem_cache_free (&vm_map_entry_cache,
  96. structof (nd, struct vm_map_entry, list_node));
  97. return (ENOMEM);
  98. }
  99. list_insert_tail (list, &entry->list_node);
  100. }
  101. return (0);
  102. }
  103. static struct vm_map_entry*
  104. vm_map_entry_pop (struct list *list)
  105. {
  106. assert (!list_empty (list));
  107. return (list_pop (list, struct vm_map_entry, list_node));
  108. }
  109. static void
  110. vm_map_unref_object (struct vm_object *obj)
  111. {
  112. if (obj->flags & VM_OBJECT_EXTERNAL)
  113. cap_channel_put_vmobj (obj->channel);
  114. else
  115. vm_object_unref (obj);
  116. }
  117. static void
  118. vm_map_entry_free_pages (struct vm_map_entry *entry)
  119. {
  120. if ((entry->flags & (VM_MAP_PHYS | VM_MAP_ANON)) !=
  121. (VM_MAP_PHYS | VM_MAP_ANON))
  122. return;
  123. size_t nr_pages = (entry->end - entry->start) / PAGE_SIZE;
  124. for (size_t i = 0; i < nr_pages; ++i)
  125. vm_page_unref (entry->pages + i);
  126. }
  127. static void
  128. vm_map_entry_free_obj (struct vm_map *map, struct vm_map_entry *ep, int free)
  129. {
  130. struct vm_object *obj = ep->object;
  131. switch (free)
  132. {
  133. case VM_MAP_FREE_ALL:
  134. vm_map_entry_unmap (map, ep);
  135. __fallthrough;
  136. case VM_MAP_FREE_OBJ:
  137. if (! obj)
  138. vm_map_entry_free_pages (ep);
  139. else
  140. vm_object_remove (obj, ep->offset, ep->offset + ep->end - ep->start);
  141. __fallthrough;
  142. case VM_MAP_FREE_NONE:
  143. break;
  144. }
  145. if (obj)
  146. vm_map_unref_object (obj);
  147. }
  148. static void
  149. vm_map_entry_destroy (struct vm_map *map, struct vm_map_entry *entry, int free)
  150. {
  151. vm_map_entry_free_obj (map, entry, free);
  152. kmem_cache_free (&vm_map_entry_cache, entry);
  153. }
  154. static void
  155. vm_map_entry_list_destroy (struct vm_map *map, struct list *list, int free)
  156. {
  157. list_for_each_safe (list, ex, tmp)
  158. vm_map_entry_destroy (map, list_entry (ex, struct vm_map_entry,
  159. list_node), free);
  160. }
  161. static inline int
  162. vm_map_entry_cmp_lookup (uintptr_t addr, const struct rbtree_node *node)
  163. {
  164. _Auto entry = rbtree_entry (node, struct vm_map_entry, tree_node);
  165. return (addr >= entry->end ? 1 : (addr >= entry->start ? 0 : -1));
  166. }
  167. static inline int
  168. vm_map_entry_cmp_insert (const struct rbtree_node *a,
  169. const struct rbtree_node *b)
  170. {
  171. _Auto entry = rbtree_entry (a, struct vm_map_entry, tree_node);
  172. return (vm_map_entry_cmp_lookup (entry->start, b));
  173. }
  174. static bool
  175. vm_map_request_valid (const struct vm_map_request *request)
  176. {
  177. return ((request->object || !request->offset ||
  178. (request->flags & VM_MAP_PHYS)) &&
  179. vm_page_aligned (request->offset) &&
  180. vm_page_aligned (request->start) &&
  181. request->size > 0 && vm_page_aligned (request->size) &&
  182. request->start + request->size > request->start &&
  183. ((VM_MAP_PROT (request->flags) & VM_MAP_MAXPROT (request->flags)) ==
  184. VM_MAP_PROT (request->flags)));
  185. }
  186. /*
  187. * Look up an entry in a map.
  188. *
  189. * This function returns the entry which is closest to the given address
  190. * such that addr < entry->end (i.e. either containing or after the requested
  191. * address), or NULL if there is no such entry.
  192. */
  193. static struct vm_map_entry*
  194. vm_map_lookup_nearest (struct vm_map *map, uintptr_t addr)
  195. {
  196. assert (vm_page_aligned (addr));
  197. _Auto entry = map->lookup_cache;
  198. if (entry && addr >= entry->start && addr < entry->end)
  199. return (entry);
  200. _Auto node = rbtree_lookup_nearest (&map->entry_tree, addr,
  201. vm_map_entry_cmp_lookup, RBTREE_RIGHT);
  202. if (node)
  203. {
  204. _Auto e2 = rbtree_entry (node, struct vm_map_entry, tree_node);
  205. assert (addr < e2->end);
  206. atomic_cas_rlx (&map->lookup_cache, entry, e2);
  207. return (e2);
  208. }
  209. return (NULL);
  210. }
  211. static int
  212. vm_map_find_fixed (struct vm_map *map, struct vm_map_request *request)
  213. {
  214. uintptr_t start = request->start;
  215. size_t size = request->size;
  216. if (start < map->start || start + size > map->end)
  217. return (ENOMEM);
  218. _Auto next = vm_map_lookup_nearest (map, start);
  219. if (next && (start >= next->start || next->start - start < size))
  220. return (ENOMEM);
  221. request->next = next;
  222. return (0);
  223. }
  224. static inline struct vm_map_entry*
  225. vm_map_next (struct vm_map *map, struct vm_map_entry *entry)
  226. {
  227. struct list *node = list_next (&entry->list_node);
  228. return (list_end (&map->entry_list, node) ?
  229. NULL : list_entry (node, struct vm_map_entry, list_node));
  230. }
  231. // Always try to map addresses above this threshold.
  232. #define VM_MAP_FIRST_ADDR ((1 << 20) * 4)
  233. static int
  234. vm_map_find_avail (struct vm_map *map, struct vm_map_request *request)
  235. {
  236. // If there is a hint, try there.
  237. if (request->start &&
  238. vm_map_find_fixed (map, request) == 0)
  239. return (0);
  240. size_t size = request->size;
  241. uintptr_t start = MAX (map->start, VM_MAP_FIRST_ADDR);
  242. _Auto next = vm_map_lookup_nearest (map, start);
  243. while (1)
  244. {
  245. assert (start <= map->end);
  246. if (map->end - start < size)
  247. // The end of the map has been reached and no space could be found.
  248. return (ENOMEM);
  249. size_t space = !next ? map->end - start :
  250. (start >= next->start ? 0 : next->start - start);
  251. if (space >= size)
  252. {
  253. request->start = start;
  254. request->next = next;
  255. return (0);
  256. }
  257. start = next->end;
  258. next = vm_map_next (map, next);
  259. }
  260. }
  261. static void
  262. vm_map_link (struct vm_map *map, struct vm_map_entry *entry,
  263. struct vm_map_entry *next)
  264. {
  265. assert (entry->start < entry->end);
  266. if (! next)
  267. list_insert_tail (&map->entry_list, &entry->list_node);
  268. else
  269. list_insert_before (&entry->list_node, &next->list_node);
  270. rbtree_insert (&map->entry_tree, &entry->tree_node, vm_map_entry_cmp_insert);
  271. ++map->nr_entries;
  272. }
  273. static void
  274. vm_map_unlink (struct vm_map *map, struct vm_map_entry *entry)
  275. {
  276. assert (entry->start < entry->end);
  277. // No need for atomics here as this is done under an exclusive lock.
  278. if (map->lookup_cache == entry)
  279. map->lookup_cache = NULL;
  280. list_remove (&entry->list_node);
  281. rbtree_remove (&map->entry_tree, &entry->tree_node);
  282. --map->nr_entries;
  283. }
  284. /*
  285. * Check mapping parameters, find a suitable area of virtual memory, and
  286. * prepare the mapping request for that region.
  287. */
  288. static int
  289. vm_map_prepare (struct vm_map *map, uintptr_t start,
  290. size_t size, int flags, struct vm_object *object,
  291. uint64_t offset, struct vm_map_request *request)
  292. {
  293. request->start = start;
  294. request->size = size;
  295. request->flags = flags;
  296. request->object = object;
  297. request->offset = offset;
  298. assert (vm_map_request_valid (request));
  299. return ((flags & VM_MAP_FIXED) ?
  300. vm_map_find_fixed (map, request) :
  301. vm_map_find_avail (map, request));
  302. }
  303. /*
  304. * Merging functions.
  305. *
  306. * There is room for optimization (e.g. not reinserting entries when it is
  307. * known the tree doesn't need to be adjusted), but focus on correctness for
  308. * now.
  309. */
  310. static inline int
  311. vm_map_try_merge_compatible (const struct vm_map_request *request,
  312. const struct vm_map_entry *entry)
  313. {
  314. return (request->object == entry->object &&
  315. request->object != NULL &&
  316. ((request->flags & VM_MAP_ENTRY_MASK) ==
  317. (entry->flags & VM_MAP_ENTRY_MASK)));
  318. }
  319. static struct vm_map_entry*
  320. vm_map_try_merge_prev (struct vm_map *map, const struct vm_map_request *request,
  321. struct vm_map_entry *entry)
  322. {
  323. assert (entry);
  324. if (!vm_map_try_merge_compatible (request, entry) ||
  325. entry->end != request->start ||
  326. (entry->object &&
  327. entry->offset + entry->end - entry->start != request->offset))
  328. return (NULL);
  329. _Auto next = vm_map_next (map, entry);
  330. vm_map_unlink (map, entry);
  331. entry->end += request->size;
  332. vm_map_link (map, entry, next);
  333. return (entry);
  334. }
  335. static struct vm_map_entry*
  336. vm_map_try_merge_next (struct vm_map *map, const struct vm_map_request *req,
  337. struct vm_map_entry *entry)
  338. {
  339. assert (entry);
  340. if (!vm_map_try_merge_compatible (req, entry))
  341. return (NULL);
  342. uintptr_t end = req->start + req->size;
  343. if (end != entry->start ||
  344. (entry->object &&
  345. req->offset + req->size != entry->offset))
  346. return (NULL);
  347. _Auto next = vm_map_next (map, entry);
  348. vm_map_unlink (map, entry);
  349. entry->start = req->start;
  350. entry->offset = req->offset;
  351. vm_map_link (map, entry, next);
  352. return (entry);
  353. }
  354. static struct vm_map_entry*
  355. vm_map_try_merge_near (struct vm_map *map, const struct vm_map_request *request,
  356. struct vm_map_entry *first, struct vm_map_entry *second)
  357. {
  358. assert (first);
  359. assert (second);
  360. if (first->end == request->start &&
  361. request->start + request->size == second->start &&
  362. vm_map_try_merge_compatible (request, first) &&
  363. vm_map_try_merge_compatible (request, second) &&
  364. first->offset + first->end - first->start == request->offset &&
  365. request->offset + request->size == second->offset)
  366. {
  367. _Auto next = vm_map_next (map, second);
  368. vm_map_unlink (map, first);
  369. vm_map_unlink (map, second);
  370. first->end = second->end;
  371. vm_map_entry_destroy (map, second, VM_MAP_FREE_NONE);
  372. vm_map_link (map, first, next);
  373. return (first);
  374. }
  375. _Auto entry = vm_map_try_merge_prev (map, request, first);
  376. return (entry ?: vm_map_try_merge_next (map, request, second));
  377. }
  378. static struct vm_map_entry*
  379. vm_map_try_merge (struct vm_map *map, const struct vm_map_request *request)
  380. {
  381. // Statically allocated map entries must not be merged.
  382. assert (!(request->flags & VM_MAP_NOMERGE));
  383. if (!request->next)
  384. {
  385. struct list *node = list_last (&map->entry_list);
  386. if (list_end (&map->entry_list, node))
  387. return (NULL);
  388. _Auto prev = list_entry (node, struct vm_map_entry, list_node);
  389. return (vm_map_try_merge_prev (map, request, prev));
  390. }
  391. struct list *node = list_prev (&request->next->list_node);
  392. if (list_end (&map->entry_list, node))
  393. return (vm_map_try_merge_next (map, request, request->next));
  394. _Auto prev = list_entry (node, struct vm_map_entry, list_node);
  395. return (vm_map_try_merge_near (map, request, prev, request->next));
  396. }
  397. /*
  398. * Convert a prepared mapping request into an entry in the given map.
  399. *
  400. * If entry is NULL, a map entry is allocated for the mapping.
  401. */
  402. static int
  403. vm_map_insert (struct vm_map *map, struct vm_map_entry *entry,
  404. const struct vm_map_request *req)
  405. {
  406. if (! entry)
  407. {
  408. entry = vm_map_try_merge (map, req);
  409. if (entry)
  410. goto out;
  411. entry = vm_map_entry_create ();
  412. if (! entry)
  413. return (ENOMEM);
  414. }
  415. entry->start = req->start;
  416. entry->end = req->start + req->size;
  417. entry->object = req->object;
  418. if (req->flags & VM_MAP_PHYS)
  419. entry->pages = vm_page_lookup (req->offset);
  420. else
  421. entry->offset = (req->flags & VM_MAP_ANON) ?
  422. vm_page_anon_offset (entry->start) : req->offset;
  423. entry->flags = req->flags & VM_MAP_ENTRY_MASK;
  424. vm_map_link (map, entry, req->next);
  425. if (entry->object)
  426. vm_object_ref (entry->object);
  427. out:
  428. map->size += req->size;
  429. return (0);
  430. }
  431. static inline int
  432. vm_map_enter_locked (struct vm_map *map, uintptr_t *startp, size_t size,
  433. int flags, struct vm_object *object, uint64_t offset)
  434. {
  435. struct vm_map_request request;
  436. int error = vm_map_prepare (map, *startp, size, flags, object,
  437. offset, &request);
  438. if (error != 0 ||
  439. (error = vm_map_insert (map, NULL, &request)) != 0)
  440. return (error);
  441. *startp = request.start;
  442. return (0);
  443. }
  444. static int
  445. vm_map_umap_req (struct vm_object *obj, uint64_t offset, int flags)
  446. {
  447. struct kmessage msg;
  448. msg.type = KMSG_TYPE_MMAP_REQ;
  449. msg.mmap_req.offset = offset;
  450. msg.mmap_req.prot = VM_MAP_PROT (flags);
  451. msg.mmap_req.tag = obj->channel->tag;
  452. struct cap_iters it;
  453. cap_iters_init_buf (&it, &msg, sizeof (msg));
  454. ssize_t rv = cap_send_iters (CAP (obj->channel), &it, &it,
  455. 0, IPC_MSG_KERNEL);
  456. return (rv < 0 ? (int)-rv : (int)rv);
  457. }
  458. int
  459. vm_map_enter (struct vm_map *map, uintptr_t *startp, size_t size,
  460. int flags, struct vm_object *object, uint64_t offset)
  461. {
  462. if (flags & VM_MAP_PHYS)
  463. {
  464. if (object)
  465. return (EINVAL);
  466. else if (flags & VM_MAP_ANON)
  467. {
  468. uint32_t order = vm_page_order (size);
  469. _Auto pages = vm_page_alloc (order, VM_PAGE_SEL_HIGHMEM,
  470. VM_PAGE_OBJECT, VM_PAGE_SLEEP);
  471. if (! pages)
  472. return (ENOMEM);
  473. for (uint32_t i = 0; i < (1u << order); ++i)
  474. {
  475. vm_page_init_refcount (pages + i);
  476. vm_page_zero (pages + i);
  477. }
  478. offset = vm_page_to_pa (pages);
  479. }
  480. else if (!vm_page_lookup (offset + size - PAGE_SIZE))
  481. return (EFAULT);
  482. }
  483. else if (object && object->flags & VM_OBJECT_EXTERNAL)
  484. { // Need to check if the mapping is permitted.
  485. int error = vm_map_umap_req (object, offset, flags);
  486. if (error)
  487. return (error);
  488. }
  489. sxlock_exlock (&map->lock);
  490. int error = vm_map_enter_locked (map, startp, size, flags, object, offset);
  491. sxlock_unlock (&map->lock);
  492. if (error && ((flags & (VM_MAP_PHYS | VM_MAP_ANON)) ==
  493. (VM_MAP_PHYS | VM_MAP_ANON)))
  494. {
  495. _Auto pages = vm_page_lookup (offset);
  496. for (uint32_t i = 0; i < (1u << vm_page_order (size)); ++i)
  497. vm_page_unref (pages + i);
  498. }
  499. return (error);
  500. }
  501. static void
  502. vm_map_entry_copy_impl (struct vm_map_entry *dst,
  503. const struct vm_map_entry *src)
  504. {
  505. dst->start = src->start;
  506. dst->end = src->end;
  507. dst->offset = src->offset;
  508. dst->flags = src->flags;
  509. }
  510. static void
  511. vm_map_entry_copy (struct vm_map_entry *dst, const struct vm_map_entry *src)
  512. {
  513. vm_map_entry_copy_impl (dst, src);
  514. dst->object = src->object;
  515. if (dst->object)
  516. vm_object_ref (dst->object);
  517. }
  518. static void
  519. vm_map_split_entries (struct vm_map_entry *prev, struct vm_map_entry *next,
  520. uintptr_t split_addr)
  521. {
  522. uintptr_t delta = split_addr - prev->start;
  523. prev->end = split_addr;
  524. next->start = split_addr;
  525. if (next->object)
  526. next->offset += delta;
  527. }
  528. static void
  529. vm_map_clip_start (struct vm_map *map, struct vm_map_entry *entry,
  530. uintptr_t start, struct list *alloc)
  531. {
  532. if (start <= entry->start || start >= entry->end)
  533. return;
  534. _Auto new_entry = vm_map_entry_pop (alloc);
  535. _Auto next = vm_map_next (map, entry);
  536. vm_map_unlink (map, entry);
  537. vm_map_entry_copy (new_entry, entry);
  538. vm_map_split_entries (new_entry, entry, start);
  539. vm_map_link (map, entry, next);
  540. vm_map_link (map, new_entry, entry);
  541. }
  542. static void
  543. vm_map_clip_end (struct vm_map *map, struct vm_map_entry *entry,
  544. uintptr_t end, struct list *alloc)
  545. {
  546. if (end <= entry->start || end >= entry->end)
  547. return;
  548. _Auto new_entry = vm_map_entry_pop (alloc);
  549. _Auto next = vm_map_next (map, entry);
  550. vm_map_unlink (map, entry);
  551. vm_map_entry_copy (new_entry, entry);
  552. vm_map_split_entries (entry, new_entry, end);
  553. vm_map_link (map, entry, next);
  554. vm_map_link (map, new_entry, next);
  555. }
  556. static void
  557. vm_map_entry_unmap (struct vm_map *map, struct vm_map_entry *entry)
  558. {
  559. uint32_t xflg = (entry->flags & VM_MAP_CLEAN) ? PMAP_CLEAN_PAGES : 0;
  560. pmap_remove_range (map->pmap, entry->start, entry->end,
  561. VM_MAP_PMAP_FLAGS | xflg);
  562. }
  563. static int
  564. vm_map_remove_impl (struct vm_map *map, uintptr_t start,
  565. uintptr_t end, struct list *list)
  566. {
  567. assert (start >= map->start);
  568. assert (end <= map->end);
  569. assert (start < end);
  570. SXLOCK_EXGUARD (&map->lock);
  571. _Auto entry = vm_map_lookup_nearest (map, start);
  572. if (! entry)
  573. return (0);
  574. _Auto last = vm_map_lookup_nearest (map, end) ?: entry;
  575. struct list alloc_entries;
  576. int error = vm_map_entry_alloc (&alloc_entries,
  577. (start > entry->start &&
  578. start < entry->end) ||
  579. (end > last->start &&
  580. end < last->end));
  581. if (error)
  582. return (error);
  583. vm_map_clip_start (map, entry, start, &alloc_entries);
  584. do
  585. {
  586. vm_map_clip_end (map, entry, end, &alloc_entries);
  587. map->size -= entry->end - entry->start;
  588. struct list *node = list_next (&entry->list_node);
  589. vm_map_unlink (map, entry);
  590. list_insert_tail (list, &entry->list_node);
  591. vm_map_entry_unmap (map, entry);
  592. if (list_end (&map->entry_list, node))
  593. break;
  594. entry = list_entry (node, struct vm_map_entry, list_node);
  595. }
  596. while (entry->start < end);
  597. assert (list_empty (&alloc_entries));
  598. // Don't prevent lookups and page faults from here on.
  599. sxlock_share (&map->lock);
  600. pmap_update (map->pmap);
  601. return (0);
  602. }
  603. int
  604. vm_map_remove (struct vm_map *map, uintptr_t start, uintptr_t end)
  605. {
  606. struct list entries;
  607. list_init (&entries);
  608. int error = vm_map_remove_impl (map, start, end, &entries);
  609. if (! error)
  610. vm_map_entry_list_destroy (map, &entries, VM_MAP_FREE_OBJ);
  611. return (error);
  612. }
  613. static void
  614. vm_map_try_merge_entries (struct vm_map *map, struct vm_map_entry *prev,
  615. struct vm_map_entry *next, struct list *dead)
  616. {
  617. if ((prev->flags & VM_MAP_ENTRY_MASK) !=
  618. (next->flags & VM_MAP_ENTRY_MASK) ||
  619. prev->end != next->start ||
  620. prev->object != next->object ||
  621. prev->offset + prev->end - prev->start != next->offset)
  622. return;
  623. _Auto new_next = vm_map_next (map, next);
  624. next->start = prev->start;
  625. next->offset = prev->offset;
  626. vm_map_unlink (map, prev);
  627. vm_map_unlink (map, next);
  628. vm_map_link (map, next, new_next);
  629. list_insert_tail (dead, &prev->list_node);
  630. }
  631. static int
  632. vm_map_protect_entry (struct vm_map *map, struct vm_map_entry *entry,
  633. uintptr_t start, uintptr_t end,
  634. int prot, struct list *dead)
  635. {
  636. if ((VM_MAP_MAXPROT (entry->flags) & prot) != prot)
  637. return (EACCES);
  638. else if (VM_MAP_PROT (entry->flags) == prot)
  639. return (0); // Nothing to do.
  640. int nr_entries = (start != entry->start) + (end != entry->end);
  641. if (nr_entries != 0)
  642. {
  643. struct list entries;
  644. int error = vm_map_entry_alloc (&entries, nr_entries);
  645. if (error)
  646. return (error);
  647. vm_map_clip_start (map, entry, start, &entries);
  648. vm_map_clip_end (map, entry, end, &entries);
  649. VM_MAP_SET_PROT (&entry->flags, prot);
  650. assert (list_empty (&entries));
  651. }
  652. else
  653. {
  654. VM_MAP_SET_PROT (&entry->flags, prot);
  655. if (&entry->list_node != list_first (&map->entry_list))
  656. vm_map_try_merge_entries (map, list_prev_entry (entry, list_node),
  657. entry, dead);
  658. }
  659. if (prot == VM_PROT_NONE)
  660. /*
  661. * Note that this removes the mappings, but the pages are still
  662. * available via the VM object.
  663. */
  664. pmap_remove_range (map->pmap, start, end, VM_MAP_PMAP_FLAGS);
  665. else
  666. pmap_protect_range (map->pmap, start, end, prot, VM_MAP_PMAP_FLAGS);
  667. return (0);
  668. }
  669. static int
  670. vm_map_protect_impl (struct vm_map *map, uintptr_t start, uintptr_t end,
  671. int prot, struct list *dead)
  672. {
  673. SXLOCK_EXGUARD (&map->lock);
  674. _Auto entry = vm_map_lookup_nearest (map, start);
  675. if (! entry)
  676. return (ENOMEM);
  677. int error;
  678. struct vm_map_entry *next;
  679. while (1)
  680. {
  681. next = vm_map_next (map, entry);
  682. error = vm_map_protect_entry (map, entry, start, end, prot, dead);
  683. if (error || entry->end >= end)
  684. break;
  685. else if (!next || entry->end != next->start)
  686. {
  687. error = ENOMEM;
  688. break;
  689. }
  690. entry = next;
  691. }
  692. if (!error && next)
  693. vm_map_try_merge_entries (map, entry, next, dead);
  694. // Don't prevent lookups and page faults from here on.
  695. sxlock_share (&map->lock);
  696. pmap_update (map->pmap);
  697. return (error);
  698. }
  699. int
  700. vm_map_protect (struct vm_map *map, uintptr_t start, uintptr_t end, int prot)
  701. {
  702. if (!vm_page_aligned (start) || !vm_page_aligned (end) || end < start)
  703. return (EINVAL);
  704. struct list dead;
  705. list_init (&dead);
  706. int error = vm_map_protect_impl (map, start, end, prot, &dead);
  707. vm_map_entry_list_destroy (map, &dead, VM_MAP_FREE_NONE);
  708. return (error);
  709. }
  710. static void
  711. vm_map_init (struct vm_map *map, struct pmap *pmap,
  712. uintptr_t start, uintptr_t end, struct vm_object *priv)
  713. {
  714. assert (vm_page_aligned (start));
  715. assert (vm_page_aligned (end));
  716. assert (start < end);
  717. sxlock_init (&map->lock);
  718. list_init (&map->entry_list);
  719. rbtree_init (&map->entry_tree);
  720. map->nr_entries = 0;
  721. map->start = start;
  722. map->end = end;
  723. map->size = 0;
  724. map->lookup_cache = NULL;
  725. map->pmap = pmap;
  726. map->priv_cache = priv;
  727. map->soft_faults = 0;
  728. map->hard_faults = 0;
  729. }
  730. #ifdef CONFIG_SHELL
  731. static void
  732. vm_map_shell_info (struct shell *shell, int argc, char **argv)
  733. {
  734. if (argc < 2)
  735. {
  736. stream_puts (shell->stream, "usage: vm_map_info task\n");
  737. return;
  738. }
  739. const _Auto task = task_lookup (argv[1]);
  740. if (! task)
  741. stream_puts (shell->stream, "vm_map_info: task not found\n");
  742. else
  743. {
  744. vm_map_info (task_get_vm_map (task), shell->stream);
  745. task_unref (task);
  746. }
  747. }
  748. static struct shell_cmd vm_map_shell_cmds[] =
  749. {
  750. SHELL_CMD_INITIALIZER ("vm_map_info", vm_map_shell_info,
  751. "vm_map_info <task_name>",
  752. "display information about a VM map"),
  753. };
  754. static int __init
  755. vm_map_setup_shell (void)
  756. {
  757. SHELL_REGISTER_CMDS (vm_map_shell_cmds, shell_get_main_cmd_set ());
  758. return (0);
  759. }
  760. INIT_OP_DEFINE (vm_map_setup_shell,
  761. INIT_OP_DEP (printf_setup, true),
  762. INIT_OP_DEP (shell_setup, true),
  763. INIT_OP_DEP (task_setup, true),
  764. INIT_OP_DEP (vm_map_setup, true));
  765. #endif
  766. static int __init
  767. vm_map_bootstrap (void)
  768. {
  769. vm_map_init (vm_map_get_kernel_map (), pmap_get_kernel_pmap (),
  770. PMAP_START_KMEM_ADDRESS, PMAP_END_KMEM_ADDRESS, NULL);
  771. kmem_cache_init (&vm_map_entry_cache, "vm_map_entry",
  772. sizeof (struct vm_map_entry), 0, NULL,
  773. KMEM_CACHE_PAGE_ONLY);
  774. return (0);
  775. }
  776. INIT_OP_DEFINE (vm_map_bootstrap,
  777. INIT_OP_DEP (kmem_bootstrap, true),
  778. INIT_OP_DEP (thread_bootstrap, true));
  779. // Paging cluster parameters (Expressed in bytes).
  780. static const struct vm_map_page_target vm_map_page_targets[] =
  781. {
  782. [VM_ADV_NORMAL] = { .front = PAGE_SIZE, .back = 3 * PAGE_SIZE },
  783. [VM_ADV_RANDOM] = { .front = 0, .back = PAGE_SIZE },
  784. [VM_ADV_SEQUENTIAL] = { .front = 0, .back = 8 * PAGE_SIZE }
  785. };
  786. static int __init
  787. vm_map_setup (void)
  788. {
  789. kmem_cache_init (&vm_map_cache, "vm_map", sizeof (struct vm_map),
  790. 0, NULL, KMEM_CACHE_PAGE_ONLY);
  791. return (0);
  792. }
  793. INIT_OP_DEFINE (vm_map_setup,
  794. INIT_OP_DEP (pmap_setup, true),
  795. INIT_OP_DEP (printf_setup, true),
  796. INIT_OP_DEP (vm_map_bootstrap, true));
  797. struct vm_map_fault_pages
  798. {
  799. struct vm_page *store[VM_MAP_MAX_FRAMES];
  800. int nr_pages;
  801. };
  802. static void
  803. vm_map_fault_get_params (struct vm_map_entry *entry, uintptr_t addr,
  804. uint64_t *offsetp, int *npagesp)
  805. {
  806. uint32_t adv = VM_MAP_ADVICE (entry->flags);
  807. assert (adv < ARRAY_SIZE (vm_map_page_targets));
  808. const _Auto target = &vm_map_page_targets[adv];
  809. // Mind overflows when computing the offsets.
  810. uintptr_t start_off = MIN (addr - entry->start, target->front),
  811. last_off = MIN (entry->end - addr, target->back);
  812. *npagesp = (int)((last_off + start_off) >> PAGE_SHIFT);
  813. assert (*npagesp <= VM_MAP_MAX_FRAMES);
  814. *offsetp -= start_off;
  815. }
  816. static int
  817. vm_map_fault_alloc_pages (struct vm_map_fault_pages *pages)
  818. {
  819. for (int i = 0; i < pages->nr_pages; ++i)
  820. {
  821. _Auto page = vm_page_alloc (0, VM_PAGE_SEL_HIGHMEM,
  822. VM_PAGE_OBJECT, VM_PAGE_SLEEP);
  823. if (! page)
  824. {
  825. while (--i >= 0)
  826. vm_page_unref (pages->store[i]);
  827. return (-ENOMEM);
  828. }
  829. vm_page_init_refcount (page);
  830. pages->store[i] = page;
  831. }
  832. return (0);
  833. }
  834. static inline void
  835. vm_map_cleanup_object (void *ptr)
  836. {
  837. vm_map_unref_object (*(struct vm_object **)ptr);
  838. }
  839. static int
  840. vm_map_fault_get_data (struct vm_object *obj, uint64_t off,
  841. struct vm_map_fault_pages *pages, int prot)
  842. {
  843. if (obj->flags & VM_OBJECT_EXTERNAL)
  844. return (cap_request_pages (obj->channel, off, pages->nr_pages,
  845. pages->store));
  846. // Simple callback-based object.
  847. int ret = vm_map_fault_alloc_pages (pages);
  848. if (ret < 0)
  849. return (ret);
  850. _Auto window = pmap_window_get (0);
  851. void *va = pmap_window_va (window);
  852. for (ret = 0; ret < pages->nr_pages; ++ret, off += PAGE_SIZE)
  853. {
  854. pmap_window_set (window, vm_page_to_pa (pages->store[ret]));
  855. int tmp = obj->page_get (obj, off, PAGE_SIZE, prot, va);
  856. if (tmp < 0)
  857. {
  858. ret = tmp;
  859. break;
  860. }
  861. }
  862. pmap_window_put (window);
  863. return (ret);
  864. }
  865. static int
  866. vm_map_fault_handle_cow (uintptr_t addr, struct vm_page **pgp,
  867. struct vm_map *map)
  868. {
  869. _Auto dst_w = pmap_window_get (0);
  870. cpu_intr_enable ();
  871. _Auto p2 = vm_page_alloc (0, VM_PAGE_SEL_HIGHMEM,
  872. VM_PAGE_OBJECT, VM_PAGE_SLEEP);
  873. if (! p2)
  874. {
  875. pmap_window_put (dst_w);
  876. return (EINTR);
  877. }
  878. _Auto page = *pgp;
  879. /*
  880. * We need both windows to copy the page's contents because the virtual
  881. * address may not be mapped.
  882. */
  883. _Auto src_w = pmap_window_get (1);
  884. pmap_window_set (dst_w, vm_page_to_pa (p2));
  885. pmap_window_set (src_w, vm_page_to_pa (page));
  886. memcpy (pmap_window_va (dst_w), pmap_window_va (src_w), PAGE_SIZE);
  887. pmap_window_put (dst_w);
  888. cpu_intr_disable ();
  889. pmap_window_put (src_w);
  890. vm_page_init_refcount (p2);
  891. int ret = vm_object_swap (map->priv_cache, p2, page->offset, page);
  892. if (likely (ret == 0))
  893. {
  894. *pgp = p2;
  895. // Removing the physical mapping will unreference the source page.
  896. pmap_remove (map->pmap, addr, 0);
  897. }
  898. else
  899. {
  900. cpu_intr_enable ();
  901. vm_page_unref (p2);
  902. }
  903. return (ret);
  904. }
  905. static bool
  906. vm_map_fault_soft (struct vm_map *map, struct vm_object *obj, uint64_t off,
  907. uintptr_t addr, struct vm_map_entry *entry)
  908. {
  909. int pflags = PMAP_IGNORE_ERRORS;
  910. struct vm_page *page;
  911. if (entry->flags & VM_MAP_PHYS)
  912. {
  913. page = entry->pages + ((addr - entry->start) / PAGE_SIZE);
  914. pflags |= PMAP_SKIP_RSET;
  915. }
  916. else
  917. {
  918. page = vm_object_lookup (obj, off);
  919. if (! page)
  920. return (false);
  921. else if (!(obj->flags & VM_OBJECT_FLUSHES))
  922. pflags |= PMAP_SKIP_RSET;
  923. }
  924. int prot = VM_MAP_PROT (entry->flags);
  925. if (((prot & VM_PROT_WRITE) == 0 || !vm_page_is_cow (page) ||
  926. vm_map_fault_handle_cow (addr, &page, map) == 0) &&
  927. pmap_enter (map->pmap, addr, vm_page_to_pa (page), prot, pflags) == 0)
  928. pmap_update (map->pmap);
  929. if (page->object)
  930. vm_page_unref (page);
  931. atomic_add_rlx (&map->soft_faults, 1);
  932. return (true);
  933. }
  934. static int
  935. vm_map_unref_pages (struct vm_page **pages, uint32_t cnt, int rv)
  936. {
  937. for (uint32_t i = 0; i < cnt; ++i)
  938. if (pages[i])
  939. vm_page_unref (pages[i]);
  940. return (-rv);
  941. }
  942. static void
  943. vm_map_fault_free_pages (struct vm_map_fault_pages *p)
  944. {
  945. cpu_intr_enable ();
  946. (void)vm_map_unref_pages (p->store, (uint32_t)p->nr_pages, 0);
  947. cpu_intr_disable ();
  948. }
  949. static int
  950. vm_map_fault_impl (struct vm_map *map, uintptr_t addr, int prot)
  951. {
  952. struct vm_map_entry *entry, tmp;
  953. struct vm_object *final_obj, *object;
  954. uint64_t final_off, offset;
  955. retry:
  956. {
  957. SXLOCK_SHGUARD (&map->lock);
  958. entry = vm_map_lookup_nearest (map, addr);
  959. if (!entry || addr < entry->start)
  960. return (EFAULT);
  961. else if ((prot & VM_MAP_PROT (entry->flags)) != prot)
  962. return (EACCES);
  963. prot = VM_MAP_PROT (entry->flags);
  964. object = entry->object;
  965. assert (object || (entry->flags & VM_MAP_PHYS));
  966. offset = entry->offset + addr - entry->start;
  967. if ((entry->flags & (VM_MAP_ANON | VM_MAP_PHYS)) == VM_MAP_ANON)
  968. final_off = vm_page_anon_offset (addr), final_obj = map->priv_cache;
  969. else
  970. final_off = offset, final_obj = object;
  971. if (vm_map_fault_soft (map, final_obj, final_off, addr, entry))
  972. return (0);
  973. // Prevent the VM object from going away as we drop the lock.
  974. vm_map_entry_copy (&tmp, entry);
  975. entry = &tmp;
  976. }
  977. cpu_intr_enable ();
  978. CLEANUP (vm_map_cleanup_object) __unused _Auto objg = object;
  979. struct vm_map_fault_pages frames;
  980. uint64_t start_off = offset;
  981. vm_map_fault_get_params (entry, addr, &start_off, &frames.nr_pages);
  982. int n_pages = vm_map_fault_get_data (object, start_off, &frames, prot);
  983. if (n_pages < 0)
  984. return (-n_pages);
  985. else if (unlikely (start_off + n_pages * PAGE_SIZE < offset))
  986. /*
  987. * We didn't cover the faulting page. This is probably due to a truncated
  988. * object. Return an error that maps to SIGBUS.
  989. */
  990. return (EIO);
  991. cpu_intr_disable ();
  992. SXLOCK_SHGUARD (&map->lock);
  993. _Auto e2 = vm_map_lookup_nearest (map, addr);
  994. // Check that the entry is still valid and equal to the one we operated on.
  995. if (!(e2 && e2->object == entry->object &&
  996. addr >= e2->start &&
  997. (prot & VM_MAP_PROT (e2->flags)) == prot &&
  998. addr - (uintptr_t)(offset - start_off) +
  999. n_pages * PAGE_SIZE <= e2->end))
  1000. {
  1001. vm_map_fault_free_pages (&frames);
  1002. goto retry;
  1003. }
  1004. prot = VM_MAP_PROT (e2->flags);
  1005. addr -= offset - start_off;
  1006. final_off -= offset - start_off;
  1007. for (uint32_t i = 0; i < (uint32_t)n_pages;
  1008. ++i, final_off += PAGE_SIZE, addr += PAGE_SIZE)
  1009. {
  1010. struct vm_page *page = frames.store[i];
  1011. if (vm_object_insert (final_obj, page, final_off) == 0 &&
  1012. pmap_enter (map->pmap, addr, vm_page_to_pa (page),
  1013. prot, PMAP_IGNORE_ERRORS) == 0)
  1014. frames.store[i] = NULL;
  1015. }
  1016. pmap_update (map->pmap);
  1017. atomic_add_rlx (&map->hard_faults, 1);
  1018. vm_map_fault_free_pages (&frames);
  1019. return (0);
  1020. }
  1021. int
  1022. vm_map_fault (struct vm_map *map, uintptr_t addr, int prot)
  1023. {
  1024. assert (map != vm_map_get_kernel_map ());
  1025. assert (!cpu_intr_enabled ());
  1026. int ret = vm_map_fault_impl (map, vm_page_trunc (addr), prot);
  1027. cpu_intr_disable ();
  1028. return (ret);
  1029. }
  1030. int
  1031. vm_map_create (struct vm_map **mapp)
  1032. {
  1033. struct vm_map *map = kmem_cache_alloc (&vm_map_cache);
  1034. if (! map)
  1035. return (ENOMEM);
  1036. struct pmap *pmap;
  1037. int error = pmap_create (&pmap);
  1038. if (error)
  1039. goto error_pmap;
  1040. struct vm_object *priv;
  1041. error = vm_object_anon_create (&priv);
  1042. if (error)
  1043. goto error_priv;
  1044. vm_map_init (map, pmap, PMAP_START_ADDRESS, PMAP_END_ADDRESS, priv);
  1045. *mapp = map;
  1046. return (0);
  1047. error_priv:
  1048. pmap_destroy (pmap);
  1049. error_pmap:
  1050. kmem_cache_free (&vm_map_cache, map);
  1051. return (error);
  1052. }
  1053. int
  1054. vm_map_lookup (struct vm_map *map, uintptr_t addr, struct vm_map_entry *entry)
  1055. {
  1056. SXLOCK_SHGUARD (&map->lock);
  1057. _Auto ep = vm_map_lookup_nearest (map, addr);
  1058. if (! ep)
  1059. return (ESRCH);
  1060. vm_map_entry_copy (entry, ep);
  1061. return (0);
  1062. }
  1063. int
  1064. vm_map_anon_alloc (void **outp, struct vm_map *map, size_t size)
  1065. {
  1066. if (!map->priv_cache)
  1067. return (EINVAL);
  1068. uintptr_t va = 0;
  1069. int flags = VM_MAP_FLAGS (VM_PROT_RDWR, VM_PROT_RDWR, VM_INHERIT_DEFAULT,
  1070. VM_ADV_DEFAULT, VM_MAP_ANON);
  1071. int error = vm_map_enter (map, &va, vm_page_round (size), flags,
  1072. map->priv_cache, 0);
  1073. if (! error)
  1074. *outp = (void *)va;
  1075. return (error);
  1076. }
  1077. void
  1078. vm_map_destroy (struct vm_map *map)
  1079. {
  1080. vm_map_entry_list_destroy (map, &map->entry_list, VM_MAP_FREE_ALL);
  1081. pmap_update (map->pmap);
  1082. pmap_destroy (map->pmap);
  1083. vm_object_unref (map->priv_cache);
  1084. kmem_cache_free (&vm_map_cache, map);
  1085. }
  1086. static int
  1087. vm_map_fork_copy_entries (struct vm_map *dst, struct vm_map *src)
  1088. {
  1089. if (vm_map_entry_alloc (&dst->entry_list, src->nr_entries) != 0)
  1090. return (ENOMEM);
  1091. struct vm_map_entry *entry, *out;
  1092. out = list_first_entry (&dst->entry_list, typeof (*out), list_node);
  1093. list_for_each_entry (&src->entry_list, entry, list_node)
  1094. {
  1095. if (VM_MAP_INHERIT (entry->flags) == VM_INHERIT_NONE)
  1096. continue;
  1097. else if (entry->object != src->priv_cache ||
  1098. VM_MAP_INHERIT (entry->flags) == VM_INHERIT_SHARE)
  1099. vm_map_entry_copy (out, entry);
  1100. else
  1101. {
  1102. vm_map_entry_copy_impl (out, entry);
  1103. out->object = dst->priv_cache;
  1104. vm_object_ref (out->object);
  1105. }
  1106. rbtree_insert (&dst->entry_tree, &out->tree_node,
  1107. vm_map_entry_cmp_insert);
  1108. out = list_next_entry (out, list_node);
  1109. }
  1110. dst->nr_entries = src->nr_entries;
  1111. return (0);
  1112. }
  1113. static int
  1114. vm_map_fork_update_obj (struct vm_map *dst, struct vm_map *src)
  1115. {
  1116. struct rdxtree_iter it;
  1117. struct vm_page *page;
  1118. _Auto dst_priv = dst->priv_cache;
  1119. rdxtree_for_each (&src->priv_cache->pages, &it, page)
  1120. {
  1121. _Auto entry = vm_map_lookup_nearest (src, vm_page_anon_va (page));
  1122. if (!entry || VM_MAP_INHERIT (entry->flags) != VM_INHERIT_COPY)
  1123. continue;
  1124. int error = rdxtree_insert (&dst_priv->pages,
  1125. vm_page_btop (page->offset), page);
  1126. if (error)
  1127. return (error);
  1128. vm_page_ref (page);
  1129. ++dst_priv->nr_pages;
  1130. int prot = VM_MAP_PROT (entry->flags);
  1131. if (prot & VM_PROT_WRITE)
  1132. pmap_protect (src->pmap, vm_page_anon_va (page),
  1133. prot & ~VM_PROT_WRITE,
  1134. PMAP_PEF_GLOBAL | PMAP_IGNORE_ERRORS);
  1135. vm_page_unref (page);
  1136. vm_page_set_cow (page);
  1137. }
  1138. pmap_update (src->pmap);
  1139. dst_priv->refcount += dst_priv->nr_pages;
  1140. return (0);
  1141. }
  1142. int
  1143. vm_map_fork (struct vm_map **mapp, struct vm_map *src)
  1144. {
  1145. int error = vm_map_create (mapp);
  1146. if (error)
  1147. return (error);
  1148. struct vm_map *dst = *mapp;
  1149. SXLOCK_EXGUARD (&src->lock);
  1150. MUTEX_GUARD (&src->priv_cache->lock);
  1151. error = vm_map_fork_copy_entries (dst, src);
  1152. if (error || (error = vm_map_fork_update_obj (dst, src)))
  1153. vm_map_destroy (dst);
  1154. return (error);
  1155. }
  1156. static int
  1157. vm_map_iter_cleanup (struct vm_map *map, struct ipc_vme_iter *it,
  1158. uint32_t ix, int error)
  1159. {
  1160. struct list entries;
  1161. list_init (&entries);
  1162. for (; it->cur != ix; --it->cur)
  1163. {
  1164. _Auto page = it->begin + it->cur - 1;
  1165. vm_map_remove_impl (map, page->addr, page->addr + page->size, &entries);
  1166. }
  1167. pmap_update (map->pmap);
  1168. vm_map_entry_list_destroy (map, &entries, VM_MAP_FREE_OBJ);
  1169. return (-error);
  1170. }
  1171. static void
  1172. vm_map_iter_fini (struct sxlock **lockp)
  1173. {
  1174. if (*lockp)
  1175. sxlock_unlock (*lockp);
  1176. }
  1177. static int
  1178. vm_map_iter_copy_one (struct vm_map *in_map, struct ipc_vme_iter *in_it,
  1179. struct vm_map *out_map, struct ipc_vme_iter *out_it)
  1180. {
  1181. _Auto page = in_it->begin[in_it->cur];
  1182. uintptr_t end = page.addr + vm_page_round (page.size);
  1183. do
  1184. {
  1185. _Auto entry = vm_map_lookup_nearest (in_map, page.addr);
  1186. _Auto outp = &out_it->begin[out_it->cur];
  1187. if (! entry)
  1188. return (ESRCH);
  1189. else if ((VM_MAP_MAXPROT (entry->flags) & page.max_prot) !=
  1190. page.max_prot || (page.max_prot & page.prot) != page.prot)
  1191. return (EACCES);
  1192. size_t size = MIN (end - page.addr, page.size);
  1193. if (! size)
  1194. return (EINVAL);
  1195. uint64_t offset = entry->offset + (page.addr - entry->start);
  1196. int flags = VM_MAP_FLAGS (page.max_prot, page.prot,
  1197. VM_MAP_INHERIT (entry->flags),
  1198. VM_MAP_ADVICE (entry->flags), 0),
  1199. error = vm_map_enter_locked (out_map, &outp->addr, size,
  1200. flags, entry->object, offset);
  1201. if (error)
  1202. return (error);
  1203. outp->prot = page.prot;
  1204. outp->max_prot = page.max_prot;
  1205. outp->size = size;
  1206. page.addr += size;
  1207. ++out_it->cur;
  1208. }
  1209. while (page.addr < end && ipc_vme_iter_size (out_it));
  1210. ++in_it->cur;
  1211. return (0);
  1212. }
  1213. int
  1214. vm_map_iter_copy (struct vm_map *r_map, struct ipc_vme_iter *r_it,
  1215. struct ipc_vme_iter *l_it, uint32_t flags)
  1216. {
  1217. struct vm_map *in_map, *out_map;
  1218. struct ipc_vme_iter *in_it, *out_it;
  1219. if (flags & IPC_COPY_FROM)
  1220. {
  1221. in_map = r_map, out_map = vm_map_self ();
  1222. in_it = r_it, out_it = l_it;
  1223. }
  1224. else
  1225. {
  1226. in_map = vm_map_self (), out_map = r_map;
  1227. in_it = l_it, out_it = r_it;
  1228. }
  1229. uint32_t prev = out_it->cur;
  1230. int i = 0, nmax = (int)MIN (ipc_vme_iter_size (in_it),
  1231. ipc_vme_iter_size (out_it));
  1232. struct sxlock *lock CLEANUP (vm_map_iter_fini) = &in_map->lock;
  1233. SXLOCK_EXGUARD (&out_map->lock);
  1234. if (likely (in_map != out_map))
  1235. sxlock_shlock (lock);
  1236. else
  1237. lock = NULL;
  1238. for (; i < nmax; ++i)
  1239. {
  1240. int tmp = vm_map_iter_copy_one (in_map, in_it, out_map, out_it);
  1241. if (unlikely (tmp != 0))
  1242. return (vm_map_iter_cleanup (out_map, out_it, prev, tmp));
  1243. }
  1244. return (i);
  1245. }
  1246. int
  1247. vm_map_reply_pagereq (const uintptr_t *src, uint32_t cnt, struct vm_page **out)
  1248. {
  1249. struct vm_map_entry *entry = NULL;
  1250. _Auto map = vm_map_self ();
  1251. SXLOCK_SHGUARD (&map->lock);
  1252. for (uint32_t i = 0; i < cnt; ++i)
  1253. {
  1254. uintptr_t va = src[i];
  1255. if ((!entry || va < entry->start || va > entry->end) &&
  1256. (!(entry = vm_map_lookup_nearest (map, va)) ||
  1257. !(entry->flags & VM_MAP_PHYS)))
  1258. return (vm_map_unref_pages (out, i, EFAULT));
  1259. uint32_t off = (uint32_t)(va - entry->start) / PAGE_SIZE;
  1260. _Auto page = entry->pages + off;
  1261. vm_page_ref (page);
  1262. out[i] = page;
  1263. }
  1264. return ((int)cnt);
  1265. }
  1266. void
  1267. vm_map_info (struct vm_map *map, struct stream *stream)
  1268. {
  1269. if (! map)
  1270. {
  1271. fmt_xprintf (stream, "vm map is empty\n");
  1272. return;
  1273. }
  1274. const char *name = map == vm_map_get_kernel_map () ? "kernel map" : "map";
  1275. SXLOCK_SHGUARD (&map->lock);
  1276. fmt_xprintf (stream, "vm_map: %s: %016lx-%016lx\n",
  1277. name, map->start, map->end);
  1278. fmt_xprintf (stream, "vm_map: start end "
  1279. "size offset flags type\n");
  1280. struct vm_map_entry *entry;
  1281. list_for_each_entry (&map->entry_list, entry, list_node)
  1282. {
  1283. const char *type = entry->object ? "object" : "null";
  1284. fmt_xprintf (stream, "vm_map: %016lx %016lx %8luk %08llx %08x %s\n",
  1285. entry->start, entry->end,
  1286. (entry->end - entry->start) >> 10,
  1287. entry->offset, entry->flags, type);
  1288. }
  1289. fmt_xprintf (stream, "vm_map: total: %zuk\n", map->size >> 10);
  1290. }