uvm_addr.c 43 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556
  1. /* $OpenBSD: uvm_addr.c,v 1.14 2015/07/17 21:56:14 kettenis Exp $ */
  2. /*
  3. * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl>
  4. *
  5. * Permission to use, copy, modify, and distribute this software for any
  6. * purpose with or without fee is hereby granted, provided that the above
  7. * copyright notice and this permission notice appear in all copies.
  8. *
  9. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  10. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  11. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  12. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  13. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  14. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  15. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  16. */
  17. /* #define DEBUG */
  18. #include <sys/param.h>
  19. #include <uvm/uvm.h>
  20. #include <uvm/uvm_addr.h>
  21. #include <sys/pool.h>
  22. /* Max gap between hint allocations. */
  23. #define UADDR_HINT_MAXGAP (4 * PAGE_SIZE)
  24. /* Number of pivots in pivot allocator. */
  25. #define NUM_PIVOTS 16
  26. /*
  27. * Max number (inclusive) of pages the pivot allocator
  28. * will place between allocations.
  29. *
  30. * The uaddr_pivot_random() function attempts to bias towards
  31. * small space between allocations, so putting a large number here is fine.
  32. */
  33. #define PIVOT_RND 8
  34. /*
  35. * Number of allocations that a pivot can supply before expiring.
  36. * When a pivot expires, a new pivot has to be found.
  37. *
  38. * Must be at least 1.
  39. */
  40. #define PIVOT_EXPIRE 1024
  41. /* Pool with uvm_addr_state structures. */
  42. struct pool uaddr_pool;
  43. struct pool uaddr_hint_pool;
  44. struct pool uaddr_bestfit_pool;
  45. struct pool uaddr_pivot_pool;
  46. struct pool uaddr_rnd_pool;
  47. /* uvm_addr state for hint based selector. */
  48. struct uaddr_hint_state {
  49. struct uvm_addr_state uaddr;
  50. vsize_t max_dist;
  51. };
  52. /* uvm_addr state for bestfit selector. */
  53. struct uaddr_bestfit_state {
  54. struct uvm_addr_state ubf_uaddr;
  55. struct uaddr_free_rbtree ubf_free;
  56. };
  57. /* uvm_addr state for rnd selector. */
  58. struct uaddr_rnd_state {
  59. struct uvm_addr_state ur_uaddr;
  60. #if 0
  61. TAILQ_HEAD(, vm_map_entry) ur_free;
  62. #endif
  63. };
  64. /* Definition of a pivot in pivot selector. */
  65. struct uaddr_pivot {
  66. vaddr_t addr; /* End of prev. allocation. */
  67. int expire;/* Best before date. */
  68. int dir; /* Direction. */
  69. struct vm_map_entry *entry; /* Will contain next alloc. */
  70. };
  71. /* uvm_addr state for pivot selector. */
  72. struct uaddr_pivot_state {
  73. struct uvm_addr_state up_uaddr;
  74. /* Free space tree, for fast pivot selection. */
  75. struct uaddr_free_rbtree up_free;
  76. /* List of pivots. The pointers point to after the last allocation. */
  77. struct uaddr_pivot up_pivots[NUM_PIVOTS];
  78. };
  79. /*
  80. * Free space comparison.
  81. * Compares smaller free-space before larger free-space.
  82. */
  83. static __inline int
  84. uvm_mapent_fspace_cmp(struct vm_map_entry *e1, struct vm_map_entry *e2)
  85. {
  86. if (e1->fspace != e2->fspace)
  87. return (e1->fspace < e2->fspace ? -1 : 1);
  88. return (e1->start < e2->start ? -1 : e1->start > e2->start);
  89. }
  90. /* Forward declaration (see below). */
  91. extern const struct uvm_addr_functions uaddr_kernel_functions;
  92. struct uvm_addr_state uaddr_kbootstrap;
  93. /* Support functions. */
  94. #ifndef SMALL_KERNEL
  95. struct vm_map_entry *uvm_addr_entrybyspace(struct uaddr_free_rbtree*,
  96. vsize_t);
  97. #endif /* !SMALL_KERNEL */
  98. void uaddr_kinsert(struct vm_map *,
  99. struct uvm_addr_state *, struct vm_map_entry *);
  100. void uaddr_kremove(struct vm_map *,
  101. struct uvm_addr_state *, struct vm_map_entry *);
  102. void uaddr_kbootstrapdestroy(struct uvm_addr_state *);
  103. void uaddr_destroy(struct uvm_addr_state *);
  104. void uaddr_hint_destroy(struct uvm_addr_state *);
  105. void uaddr_kbootstrap_destroy(struct uvm_addr_state *);
  106. void uaddr_rnd_destroy(struct uvm_addr_state *);
  107. void uaddr_bestfit_destroy(struct uvm_addr_state *);
  108. void uaddr_pivot_destroy(struct uvm_addr_state *);
  109. #if 0
  110. int uaddr_lin_select(struct vm_map *,
  111. struct uvm_addr_state *, struct vm_map_entry **,
  112. vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
  113. vaddr_t);
  114. #endif
  115. int uaddr_kbootstrap_select(struct vm_map *,
  116. struct uvm_addr_state *, struct vm_map_entry **,
  117. vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
  118. vaddr_t);
  119. int uaddr_rnd_select(struct vm_map *,
  120. struct uvm_addr_state *, struct vm_map_entry **,
  121. vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
  122. vaddr_t);
  123. int uaddr_hint_select(struct vm_map *,
  124. struct uvm_addr_state*, struct vm_map_entry **,
  125. vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
  126. vaddr_t);
  127. int uaddr_bestfit_select(struct vm_map *,
  128. struct uvm_addr_state*, struct vm_map_entry **,
  129. vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
  130. vaddr_t);
  131. #ifndef SMALL_KERNEL
  132. int uaddr_pivot_select(struct vm_map *,
  133. struct uvm_addr_state *, struct vm_map_entry **,
  134. vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
  135. vaddr_t);
  136. int uaddr_stack_brk_select(struct vm_map *,
  137. struct uvm_addr_state *, struct vm_map_entry **,
  138. vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
  139. vaddr_t);
  140. #endif /* !SMALL_KERNEL */
  141. void uaddr_rnd_insert(struct vm_map *,
  142. struct uvm_addr_state *, struct vm_map_entry *);
  143. void uaddr_rnd_remove(struct vm_map *,
  144. struct uvm_addr_state *, struct vm_map_entry *);
  145. void uaddr_bestfit_insert(struct vm_map *,
  146. struct uvm_addr_state *, struct vm_map_entry *);
  147. void uaddr_bestfit_remove(struct vm_map *,
  148. struct uvm_addr_state *, struct vm_map_entry *);
  149. void uaddr_pivot_insert(struct vm_map *,
  150. struct uvm_addr_state *, struct vm_map_entry *);
  151. void uaddr_pivot_remove(struct vm_map *,
  152. struct uvm_addr_state *, struct vm_map_entry *);
  153. #ifndef SMALL_KERNEL
  154. vsize_t uaddr_pivot_random(void);
  155. int uaddr_pivot_newpivot(struct vm_map *,
  156. struct uaddr_pivot_state *, struct uaddr_pivot *,
  157. struct vm_map_entry **, vaddr_t *,
  158. vsize_t, vaddr_t, vaddr_t, vsize_t, vsize_t);
  159. #endif /* !SMALL_KERNEL */
  160. #if defined(DEBUG) || defined(DDB)
  161. void uaddr_pivot_print(struct uvm_addr_state *, boolean_t,
  162. int (*)(const char *, ...));
  163. #if 0
  164. void uaddr_rnd_print(struct uvm_addr_state *, boolean_t,
  165. int (*)(const char *, ...));
  166. #endif
  167. #endif /* DEBUG || DDB */
  168. #ifndef SMALL_KERNEL
  169. /*
  170. * Find smallest entry in tree that will fit sz bytes.
  171. */
  172. struct vm_map_entry *
  173. uvm_addr_entrybyspace(struct uaddr_free_rbtree *free, vsize_t sz)
  174. {
  175. struct vm_map_entry *tmp, *res;
  176. tmp = RB_ROOT(free);
  177. res = NULL;
  178. while (tmp) {
  179. if (tmp->fspace >= sz) {
  180. res = tmp;
  181. tmp = RB_LEFT(tmp, dfree.rbtree);
  182. } else if (tmp->fspace < sz)
  183. tmp = RB_RIGHT(tmp, dfree.rbtree);
  184. }
  185. return res;
  186. }
  187. #endif /* !SMALL_KERNEL */
  188. static __inline vaddr_t
  189. uvm_addr_align_forward(vaddr_t addr, vaddr_t align, vaddr_t offset)
  190. {
  191. vaddr_t adjusted;
  192. KASSERT(offset < align || (align == 0 && offset == 0));
  193. KASSERT((align & (align - 1)) == 0);
  194. KASSERT((offset & PAGE_MASK) == 0);
  195. align = MAX(align, PAGE_SIZE);
  196. adjusted = addr & ~(align - 1);
  197. adjusted += offset;
  198. return (adjusted < addr ? adjusted + align : adjusted);
  199. }
  200. static __inline vaddr_t
  201. uvm_addr_align_backward(vaddr_t addr, vaddr_t align, vaddr_t offset)
  202. {
  203. vaddr_t adjusted;
  204. KASSERT(offset < align || (align == 0 && offset == 0));
  205. KASSERT((align & (align - 1)) == 0);
  206. KASSERT((offset & PAGE_MASK) == 0);
  207. align = MAX(align, PAGE_SIZE);
  208. adjusted = addr & ~(align - 1);
  209. adjusted += offset;
  210. return (adjusted > addr ? adjusted - align : adjusted);
  211. }
  212. /*
  213. * Try to fit the requested space into the entry.
  214. */
  215. int
  216. uvm_addr_fitspace(vaddr_t *min_result, vaddr_t *max_result,
  217. vaddr_t low_addr, vaddr_t high_addr, vsize_t sz,
  218. vaddr_t align, vaddr_t offset,
  219. vsize_t before_gap, vsize_t after_gap)
  220. {
  221. vaddr_t tmp;
  222. vsize_t fspace;
  223. if (low_addr > high_addr)
  224. return ENOMEM;
  225. fspace = high_addr - low_addr;
  226. if (fspace < sz + before_gap + after_gap)
  227. return ENOMEM;
  228. /* Calculate lowest address. */
  229. low_addr += before_gap;
  230. low_addr = uvm_addr_align_forward(tmp = low_addr, align, offset);
  231. if (low_addr < tmp) /* Overflow during alignment. */
  232. return ENOMEM;
  233. if (high_addr - after_gap - sz < low_addr)
  234. return ENOMEM;
  235. /* Calculate highest address. */
  236. high_addr -= after_gap + sz;
  237. high_addr = uvm_addr_align_backward(tmp = high_addr, align, offset);
  238. if (high_addr > tmp) /* Overflow during alignment. */
  239. return ENOMEM;
  240. if (low_addr > high_addr)
  241. return ENOMEM;
  242. *min_result = low_addr;
  243. *max_result = high_addr;
  244. return 0;
  245. }
  246. /*
  247. * Initialize uvm_addr.
  248. */
  249. void
  250. uvm_addr_init(void)
  251. {
  252. pool_init(&uaddr_pool, sizeof(struct uvm_addr_state),
  253. 0, 0, PR_WAITOK, "uaddr", NULL);
  254. pool_setipl(&uaddr_pool, IPL_VM);
  255. pool_init(&uaddr_hint_pool, sizeof(struct uaddr_hint_state),
  256. 0, 0, PR_WAITOK, "uaddrhint", NULL);
  257. pool_setipl(&uaddr_hint_pool, IPL_VM);
  258. pool_init(&uaddr_bestfit_pool, sizeof(struct uaddr_bestfit_state),
  259. 0, 0, PR_WAITOK, "uaddrbest", NULL);
  260. pool_setipl(&uaddr_bestfit_pool, IPL_VM);
  261. pool_init(&uaddr_pivot_pool, sizeof(struct uaddr_pivot_state),
  262. 0, 0, PR_WAITOK, "uaddrpivot", NULL);
  263. pool_setipl(&uaddr_pivot_pool, IPL_VM);
  264. pool_init(&uaddr_rnd_pool, sizeof(struct uaddr_rnd_state),
  265. 0, 0, PR_WAITOK, "uaddrrnd", NULL);
  266. pool_setipl(&uaddr_rnd_pool, IPL_VM);
  267. uaddr_kbootstrap.uaddr_minaddr = PAGE_SIZE;
  268. uaddr_kbootstrap.uaddr_maxaddr = -(vaddr_t)PAGE_SIZE;
  269. uaddr_kbootstrap.uaddr_functions = &uaddr_kernel_functions;
  270. }
  271. /*
  272. * Invoke destructor function of uaddr.
  273. */
  274. void
  275. uvm_addr_destroy(struct uvm_addr_state *uaddr)
  276. {
  277. if (uaddr)
  278. (*uaddr->uaddr_functions->uaddr_destroy)(uaddr);
  279. }
  280. /*
  281. * Move address forward to satisfy align, offset.
  282. */
  283. vaddr_t
  284. uvm_addr_align(vaddr_t addr, vaddr_t align, vaddr_t offset)
  285. {
  286. vaddr_t result = (addr & ~(align - 1)) + offset;
  287. if (result < addr)
  288. result += align;
  289. return result;
  290. }
  291. /*
  292. * Move address backwards to satisfy align, offset.
  293. */
  294. vaddr_t
  295. uvm_addr_align_back(vaddr_t addr, vaddr_t align, vaddr_t offset)
  296. {
  297. vaddr_t result = (addr & ~(align - 1)) + offset;
  298. if (result > addr)
  299. result -= align;
  300. return result;
  301. }
  302. /*
  303. * Directional first fit.
  304. *
  305. * Do a linear search for free space, starting at addr in entry.
  306. * direction == 1: search forward
  307. * direction == -1: search backward
  308. *
  309. * Output: low <= addr <= high and entry will contain addr.
  310. * 0 will be returned if no space is available.
  311. *
  312. * gap describes the space that must appear between the preceding entry.
  313. */
  314. int
  315. uvm_addr_linsearch(struct vm_map *map, struct uvm_addr_state *uaddr,
  316. struct vm_map_entry **entry_out, vaddr_t *addr_out,
  317. vaddr_t hint, vsize_t sz, vaddr_t align, vaddr_t offset,
  318. int direction, vaddr_t low, vaddr_t high,
  319. vsize_t before_gap, vsize_t after_gap)
  320. {
  321. struct vm_map_entry *entry;
  322. vaddr_t low_addr, high_addr;
  323. KASSERT(entry_out != NULL && addr_out != NULL);
  324. KASSERT(direction == -1 || direction == 1);
  325. KASSERT((hint & PAGE_MASK) == 0 && (high & PAGE_MASK) == 0 &&
  326. (low & PAGE_MASK) == 0 &&
  327. (before_gap & PAGE_MASK) == 0 && (after_gap & PAGE_MASK) == 0);
  328. KASSERT(high + sz > high); /* Check for overflow. */
  329. /* Hint magic. */
  330. if (hint == 0)
  331. hint = (direction == 1 ? low : high);
  332. else if (hint > high) {
  333. if (direction != -1)
  334. return ENOMEM;
  335. hint = high;
  336. } else if (hint < low) {
  337. if (direction != 1)
  338. return ENOMEM;
  339. hint = low;
  340. }
  341. for (entry = uvm_map_entrybyaddr(&map->addr,
  342. hint - (direction == -1 ? 1 : 0)); entry != NULL;
  343. entry = (direction == 1 ?
  344. RB_NEXT(uvm_map_addr, &map->addr, entry) :
  345. RB_PREV(uvm_map_addr, &map->addr, entry))) {
  346. if (VMMAP_FREE_START(entry) > high ||
  347. VMMAP_FREE_END(entry) < low) {
  348. break;
  349. }
  350. if (uvm_addr_fitspace(&low_addr, &high_addr,
  351. MAX(low, VMMAP_FREE_START(entry)),
  352. MIN(high, VMMAP_FREE_END(entry)),
  353. sz, align, offset, before_gap, after_gap) == 0) {
  354. *entry_out = entry;
  355. if (hint >= low_addr && hint <= high_addr) {
  356. *addr_out = hint;
  357. } else {
  358. *addr_out = (direction == 1 ?
  359. low_addr : high_addr);
  360. }
  361. return 0;
  362. }
  363. }
  364. return ENOMEM;
  365. }
  366. /*
  367. * Invoke address selector of uaddr.
  368. * uaddr may be NULL, in which case the algorithm will fail with ENOMEM.
  369. *
  370. * Will invoke uvm_addr_isavail to fill in last_out.
  371. */
  372. int
  373. uvm_addr_invoke(struct vm_map *map, struct uvm_addr_state *uaddr,
  374. struct vm_map_entry **entry_out, struct vm_map_entry **last_out,
  375. vaddr_t *addr_out,
  376. vsize_t sz, vaddr_t align, vaddr_t offset, vm_prot_t prot, vaddr_t hint)
  377. {
  378. int error;
  379. if (uaddr == NULL)
  380. return ENOMEM;
  381. hint &= ~((vaddr_t)PAGE_MASK);
  382. if (hint != 0 &&
  383. !(hint >= uaddr->uaddr_minaddr && hint < uaddr->uaddr_maxaddr))
  384. return ENOMEM;
  385. error = (*uaddr->uaddr_functions->uaddr_select)(map, uaddr,
  386. entry_out, addr_out, sz, align, offset, prot, hint);
  387. if (error == 0) {
  388. KASSERT(*entry_out != NULL);
  389. *last_out = NULL;
  390. if (!uvm_map_isavail(map, uaddr, entry_out, last_out,
  391. *addr_out, sz)) {
  392. panic("uvm_addr_invoke: address selector %p "
  393. "(%s 0x%lx-0x%lx) "
  394. "returned unavailable address 0x%lx",
  395. uaddr, uaddr->uaddr_functions->uaddr_name,
  396. uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr,
  397. *addr_out);
  398. }
  399. }
  400. return error;
  401. }
  402. #if defined(DEBUG) || defined(DDB)
  403. void
  404. uvm_addr_print(struct uvm_addr_state *uaddr, const char *slot, boolean_t full,
  405. int (*pr)(const char *, ...))
  406. {
  407. if (uaddr == NULL) {
  408. (*pr)("- uvm_addr %s: NULL\n", slot);
  409. return;
  410. }
  411. (*pr)("- uvm_addr %s: %p (%s 0x%lx-0x%lx)\n", slot, uaddr,
  412. uaddr->uaddr_functions->uaddr_name,
  413. uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr);
  414. if (uaddr->uaddr_functions->uaddr_print == NULL)
  415. return;
  416. (*uaddr->uaddr_functions->uaddr_print)(uaddr, full, pr);
  417. }
  418. #endif /* DEBUG || DDB */
  419. /*
  420. * Destroy a uvm_addr_state structure.
  421. * The uaddr must have been previously allocated from uaddr_state_pool.
  422. */
  423. void
  424. uaddr_destroy(struct uvm_addr_state *uaddr)
  425. {
  426. pool_put(&uaddr_pool, uaddr);
  427. }
  428. #if 0
  429. /*
  430. * Linear allocator.
  431. * This allocator uses a first-fit algorithm.
  432. *
  433. * If hint is set, search will start at the hint position.
  434. * Only searches forward.
  435. */
  436. const struct uvm_addr_functions uaddr_lin_functions = {
  437. .uaddr_select = &uaddr_lin_select,
  438. .uaddr_destroy = &uaddr_destroy,
  439. .uaddr_name = "uaddr_lin"
  440. };
  441. struct uvm_addr_state *
  442. uaddr_lin_create(vaddr_t minaddr, vaddr_t maxaddr)
  443. {
  444. struct uvm_addr_state *uaddr;
  445. uaddr = pool_get(&uaddr_pool, PR_WAITOK);
  446. uaddr->uaddr_minaddr = minaddr;
  447. uaddr->uaddr_maxaddr = maxaddr;
  448. uaddr->uaddr_functions = &uaddr_lin_functions;
  449. return uaddr;
  450. }
  451. int
  452. uaddr_lin_select(struct vm_map *map, struct uvm_addr_state *uaddr,
  453. struct vm_map_entry **entry_out, vaddr_t *addr_out,
  454. vsize_t sz, vaddr_t align, vaddr_t offset,
  455. vm_prot_t prot, vaddr_t hint)
  456. {
  457. vaddr_t guard_sz;
  458. /* Deal with guardpages: search for space with one extra page. */
  459. guard_sz = ((map->flags & VM_MAP_GUARDPAGES) == 0 ? 0 : PAGE_SIZE);
  460. if (uaddr->uaddr_maxaddr - uaddr->uaddr_minaddr < sz + guard_sz)
  461. return ENOMEM;
  462. return uvm_addr_linsearch(map, uaddr, entry_out, addr_out, 0, sz,
  463. align, offset, 1, uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr - sz,
  464. 0, guard_sz);
  465. }
  466. #endif
  467. /*
  468. * Randomized allocator.
  469. * This allocator use uvm_map_hint to acquire a random address and searches
  470. * from there.
  471. */
  472. const struct uvm_addr_functions uaddr_rnd_functions = {
  473. .uaddr_select = &uaddr_rnd_select,
  474. .uaddr_free_insert = &uaddr_rnd_insert,
  475. .uaddr_free_remove = &uaddr_rnd_remove,
  476. .uaddr_destroy = &uaddr_rnd_destroy,
  477. #if defined(DEBUG) || defined(DDB)
  478. #if 0
  479. .uaddr_print = &uaddr_rnd_print,
  480. #endif
  481. #endif /* DEBUG || DDB */
  482. .uaddr_name = "uaddr_rnd"
  483. };
  484. struct uvm_addr_state *
  485. uaddr_rnd_create(vaddr_t minaddr, vaddr_t maxaddr)
  486. {
  487. struct uaddr_rnd_state *uaddr;
  488. uaddr = pool_get(&uaddr_rnd_pool, PR_WAITOK);
  489. uaddr->ur_uaddr.uaddr_minaddr = minaddr;
  490. uaddr->ur_uaddr.uaddr_maxaddr = maxaddr;
  491. uaddr->ur_uaddr.uaddr_functions = &uaddr_rnd_functions;
  492. #if 0
  493. TAILQ_INIT(&uaddr->ur_free);
  494. #endif
  495. return &uaddr->ur_uaddr;
  496. }
  497. int
  498. uaddr_rnd_select(struct vm_map *map, struct uvm_addr_state *uaddr,
  499. struct vm_map_entry **entry_out, vaddr_t *addr_out,
  500. vsize_t sz, vaddr_t align, vaddr_t offset,
  501. vm_prot_t prot, vaddr_t hint)
  502. {
  503. struct vmspace *vm;
  504. vaddr_t minaddr, maxaddr;
  505. vaddr_t guard_sz;
  506. vaddr_t low_addr, high_addr;
  507. struct vm_map_entry *entry, *next;
  508. vsize_t before_gap, after_gap;
  509. vaddr_t tmp;
  510. KASSERT((map->flags & VM_MAP_ISVMSPACE) != 0);
  511. vm = (struct vmspace *)map;
  512. /* Deal with guardpages: search for space with one extra page. */
  513. guard_sz = ((map->flags & VM_MAP_GUARDPAGES) == 0 ? 0 : PAGE_SIZE);
  514. minaddr = uvm_addr_align_forward(uaddr->uaddr_minaddr, align, offset);
  515. maxaddr = uvm_addr_align_backward(uaddr->uaddr_maxaddr - sz - guard_sz,
  516. align, offset);
  517. /* Quick fail if the allocation won't fit. */
  518. if (minaddr >= maxaddr)
  519. return ENOMEM;
  520. /* Select a hint. */
  521. if (hint == 0)
  522. hint = uvm_map_hint(vm, prot, minaddr, maxaddr);
  523. /* Clamp hint to uaddr range. */
  524. hint = MIN(MAX(hint, minaddr), maxaddr);
  525. /* Align hint to align,offset parameters. */
  526. tmp = hint;
  527. hint = uvm_addr_align_forward(tmp, align, offset);
  528. /* Check for overflow during alignment. */
  529. if (hint < tmp || hint > maxaddr)
  530. return ENOMEM; /* Compatibility mode: never look backwards. */
  531. before_gap = 0;
  532. after_gap = guard_sz;
  533. hint -= MIN(hint, before_gap);
  534. /*
  535. * Use the augmented address tree to look up the first entry
  536. * at or after hint with sufficient space.
  537. *
  538. * This code is the original optimized code, but will fail if the
  539. * subtree it looks at does have sufficient space, but fails to meet
  540. * the align constraint.
  541. *
  542. * Guard: subtree is not exhausted and max(fspace) >= required.
  543. */
  544. entry = uvm_map_entrybyaddr(&map->addr, hint);
  545. /* Walk up the tree, until there is at least sufficient space. */
  546. while (entry != NULL &&
  547. entry->fspace_augment < before_gap + after_gap + sz)
  548. entry = RB_PARENT(entry, daddrs.addr_entry);
  549. while (entry != NULL) {
  550. /* Test if this fits. */
  551. if (VMMAP_FREE_END(entry) > hint &&
  552. uvm_map_uaddr_e(map, entry) == uaddr &&
  553. uvm_addr_fitspace(&low_addr, &high_addr,
  554. MAX(uaddr->uaddr_minaddr, VMMAP_FREE_START(entry)),
  555. MIN(uaddr->uaddr_maxaddr, VMMAP_FREE_END(entry)),
  556. sz, align, offset, before_gap, after_gap) == 0) {
  557. *entry_out = entry;
  558. if (hint >= low_addr && hint <= high_addr)
  559. *addr_out = hint;
  560. else
  561. *addr_out = low_addr;
  562. return 0;
  563. }
  564. /* RB_NEXT, but skip subtrees that cannot possible fit. */
  565. next = RB_RIGHT(entry, daddrs.addr_entry);
  566. if (next != NULL &&
  567. next->fspace_augment >= before_gap + after_gap + sz) {
  568. entry = next;
  569. while ((next = RB_LEFT(entry, daddrs.addr_entry)) !=
  570. NULL)
  571. entry = next;
  572. } else {
  573. do_parent:
  574. next = RB_PARENT(entry, daddrs.addr_entry);
  575. if (next == NULL)
  576. entry = NULL;
  577. else if (RB_LEFT(next, daddrs.addr_entry) == entry)
  578. entry = next;
  579. else {
  580. entry = next;
  581. goto do_parent;
  582. }
  583. }
  584. }
  585. /* Lookup failed. */
  586. return ENOMEM;
  587. }
  588. /*
  589. * Destroy a uaddr_rnd_state structure.
  590. */
  591. void
  592. uaddr_rnd_destroy(struct uvm_addr_state *uaddr)
  593. {
  594. pool_put(&uaddr_rnd_pool, uaddr);
  595. }
  596. /*
  597. * Add entry to tailq.
  598. */
  599. void
  600. uaddr_rnd_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p,
  601. struct vm_map_entry *entry)
  602. {
  603. return;
  604. }
  605. /*
  606. * Remove entry from tailq.
  607. */
  608. void
  609. uaddr_rnd_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p,
  610. struct vm_map_entry *entry)
  611. {
  612. return;
  613. }
  614. #if 0
  615. #if defined(DEBUG) || defined(DDB)
  616. void
  617. uaddr_rnd_print(struct uvm_addr_state *uaddr_p, boolean_t full,
  618. int (*pr)(const char*, ...))
  619. {
  620. struct vm_map_entry *entry;
  621. struct uaddr_rnd_state *uaddr;
  622. vaddr_t addr;
  623. size_t count;
  624. vsize_t space;
  625. uaddr = (struct uaddr_rnd_state *)uaddr_p;
  626. addr = 0;
  627. count = 0;
  628. space = 0;
  629. TAILQ_FOREACH(entry, &uaddr->ur_free, dfree.tailq) {
  630. count++;
  631. space += entry->fspace;
  632. if (full) {
  633. (*pr)("\tentry %p: 0x%lx-0x%lx G=0x%lx F=0x%lx\n",
  634. entry, entry->start, entry->end,
  635. entry->guard, entry->fspace);
  636. (*pr)("\t\tfree: 0x%lx-0x%lx\n",
  637. VMMAP_FREE_START(entry), VMMAP_FREE_END(entry));
  638. }
  639. if (entry->start < addr) {
  640. if (!full)
  641. (*pr)("\tentry %p: 0x%lx-0x%lx "
  642. "G=0x%lx F=0x%lx\n",
  643. entry, entry->start, entry->end,
  644. entry->guard, entry->fspace);
  645. (*pr)("\t\tstart=0x%lx, expected at least 0x%lx\n",
  646. entry->start, addr);
  647. }
  648. addr = VMMAP_FREE_END(entry);
  649. }
  650. (*pr)("\t0x%lu entries, 0x%lx free bytes\n", count, space);
  651. }
  652. #endif /* DEBUG || DDB */
  653. #endif
  654. /*
  655. * An allocator that selects an address within distance of the hint.
  656. *
  657. * If no hint is given, the allocator refuses to allocate.
  658. */
  659. const struct uvm_addr_functions uaddr_hint_functions = {
  660. .uaddr_select = &uaddr_hint_select,
  661. .uaddr_destroy = &uaddr_hint_destroy,
  662. .uaddr_name = "uaddr_hint"
  663. };
  664. /*
  665. * Create uaddr_hint state.
  666. */
  667. struct uvm_addr_state *
  668. uaddr_hint_create(vaddr_t minaddr, vaddr_t maxaddr, vsize_t max_dist)
  669. {
  670. struct uaddr_hint_state *ua_hint;
  671. KASSERT(uaddr_hint_pool.pr_size == sizeof(*ua_hint));
  672. ua_hint = pool_get(&uaddr_hint_pool, PR_WAITOK);
  673. ua_hint->uaddr.uaddr_minaddr = minaddr;
  674. ua_hint->uaddr.uaddr_maxaddr = maxaddr;
  675. ua_hint->uaddr.uaddr_functions = &uaddr_hint_functions;
  676. ua_hint->max_dist = max_dist;
  677. return &ua_hint->uaddr;
  678. }
  679. /*
  680. * Destroy uaddr_hint state.
  681. */
  682. void
  683. uaddr_hint_destroy(struct uvm_addr_state *uaddr)
  684. {
  685. pool_put(&uaddr_hint_pool, uaddr);
  686. }
  687. /*
  688. * Hint selector.
  689. *
  690. * Attempts to find an address that is within max_dist of the hint.
  691. */
  692. int
  693. uaddr_hint_select(struct vm_map *map, struct uvm_addr_state *uaddr_param,
  694. struct vm_map_entry **entry_out, vaddr_t *addr_out,
  695. vsize_t sz, vaddr_t align, vaddr_t offset,
  696. vm_prot_t prot, vaddr_t hint)
  697. {
  698. struct uaddr_hint_state *uaddr =
  699. (struct uaddr_hint_state *)uaddr_param;
  700. vsize_t before_gap, after_gap;
  701. vaddr_t low, high;
  702. int dir;
  703. if (hint == 0)
  704. return ENOMEM;
  705. /* Calculate upper and lower bound for selected address. */
  706. high = hint + uaddr->max_dist;
  707. if (high < hint) /* overflow */
  708. high = map->max_offset;
  709. high = MIN(high, uaddr->uaddr.uaddr_maxaddr);
  710. if (high < sz)
  711. return ENOMEM; /* Protect against underflow. */
  712. high -= sz;
  713. /* Calculate lower bound for selected address. */
  714. low = hint - uaddr->max_dist;
  715. if (low > hint) /* underflow */
  716. low = map->min_offset;
  717. low = MAX(low, uaddr->uaddr.uaddr_minaddr);
  718. /* Search strategy setup. */
  719. before_gap = PAGE_SIZE +
  720. (arc4random_uniform(UADDR_HINT_MAXGAP) & ~(vaddr_t)PAGE_MASK);
  721. after_gap = PAGE_SIZE +
  722. (arc4random_uniform(UADDR_HINT_MAXGAP) & ~(vaddr_t)PAGE_MASK);
  723. dir = (arc4random() & 0x01) ? 1 : -1;
  724. /*
  725. * Try to search:
  726. * - forward, with gap
  727. * - backward, with gap
  728. * - forward, without gap
  729. * - backward, without gap
  730. * (Where forward is in the direction specified by dir and
  731. * backward is in the direction specified by -dir).
  732. */
  733. if (uvm_addr_linsearch(map, uaddr_param,
  734. entry_out, addr_out, hint, sz, align, offset,
  735. dir, low, high, before_gap, after_gap) == 0)
  736. return 0;
  737. if (uvm_addr_linsearch(map, uaddr_param,
  738. entry_out, addr_out, hint, sz, align, offset,
  739. -dir, low, high, before_gap, after_gap) == 0)
  740. return 0;
  741. if (uvm_addr_linsearch(map, uaddr_param,
  742. entry_out, addr_out, hint, sz, align, offset,
  743. dir, low, high, 0, 0) == 0)
  744. return 0;
  745. if (uvm_addr_linsearch(map, uaddr_param,
  746. entry_out, addr_out, hint, sz, align, offset,
  747. -dir, low, high, 0, 0) == 0)
  748. return 0;
  749. return ENOMEM;
  750. }
  751. /*
  752. * Kernel allocation bootstrap logic.
  753. */
  754. const struct uvm_addr_functions uaddr_kernel_functions = {
  755. .uaddr_select = &uaddr_kbootstrap_select,
  756. .uaddr_destroy = &uaddr_kbootstrap_destroy,
  757. .uaddr_name = "uaddr_kbootstrap"
  758. };
  759. /*
  760. * Select an address from the map.
  761. *
  762. * This function ignores the uaddr spec and instead uses the map directly.
  763. * Because of that property, the uaddr algorithm can be shared across all
  764. * kernel maps.
  765. */
  766. int
  767. uaddr_kbootstrap_select(struct vm_map *map, struct uvm_addr_state *uaddr,
  768. struct vm_map_entry **entry_out, vaddr_t *addr_out,
  769. vsize_t sz, vaddr_t align, vaddr_t offset, vm_prot_t prot, vaddr_t hint)
  770. {
  771. vaddr_t tmp;
  772. RB_FOREACH(*entry_out, uvm_map_addr, &map->addr) {
  773. if (VMMAP_FREE_END(*entry_out) <= uvm_maxkaddr &&
  774. uvm_addr_fitspace(addr_out, &tmp,
  775. VMMAP_FREE_START(*entry_out), VMMAP_FREE_END(*entry_out),
  776. sz, align, offset, 0, 0) == 0)
  777. return 0;
  778. }
  779. return ENOMEM;
  780. }
  781. /*
  782. * Don't destroy the kernel bootstrap allocator.
  783. */
  784. void
  785. uaddr_kbootstrap_destroy(struct uvm_addr_state *uaddr)
  786. {
  787. KASSERT(uaddr == (struct uvm_addr_state *)&uaddr_kbootstrap);
  788. }
  789. #ifndef SMALL_KERNEL
  790. /*
  791. * Best fit algorithm.
  792. */
  793. const struct uvm_addr_functions uaddr_bestfit_functions = {
  794. .uaddr_select = &uaddr_bestfit_select,
  795. .uaddr_free_insert = &uaddr_bestfit_insert,
  796. .uaddr_free_remove = &uaddr_bestfit_remove,
  797. .uaddr_destroy = &uaddr_bestfit_destroy,
  798. .uaddr_name = "uaddr_bestfit"
  799. };
  800. struct uvm_addr_state *
  801. uaddr_bestfit_create(vaddr_t minaddr, vaddr_t maxaddr)
  802. {
  803. struct uaddr_bestfit_state *uaddr;
  804. uaddr = pool_get(&uaddr_bestfit_pool, PR_WAITOK);
  805. uaddr->ubf_uaddr.uaddr_minaddr = minaddr;
  806. uaddr->ubf_uaddr.uaddr_maxaddr = maxaddr;
  807. uaddr->ubf_uaddr.uaddr_functions = &uaddr_bestfit_functions;
  808. RB_INIT(&uaddr->ubf_free);
  809. return &uaddr->ubf_uaddr;
  810. }
  811. void
  812. uaddr_bestfit_destroy(struct uvm_addr_state *uaddr)
  813. {
  814. pool_put(&uaddr_bestfit_pool, uaddr);
  815. }
  816. void
  817. uaddr_bestfit_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p,
  818. struct vm_map_entry *entry)
  819. {
  820. struct uaddr_bestfit_state *uaddr;
  821. struct vm_map_entry *rb_rv;
  822. uaddr = (struct uaddr_bestfit_state *)uaddr_p;
  823. if ((rb_rv = RB_INSERT(uaddr_free_rbtree, &uaddr->ubf_free, entry)) !=
  824. NULL) {
  825. panic("%s: duplicate insertion: state %p "
  826. "interting %p, colliding with %p", __func__,
  827. uaddr, entry, rb_rv);
  828. }
  829. }
  830. void
  831. uaddr_bestfit_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p,
  832. struct vm_map_entry *entry)
  833. {
  834. struct uaddr_bestfit_state *uaddr;
  835. uaddr = (struct uaddr_bestfit_state *)uaddr_p;
  836. if (RB_REMOVE(uaddr_free_rbtree, &uaddr->ubf_free, entry) != entry)
  837. panic("%s: entry was not in tree", __func__);
  838. }
  839. int
  840. uaddr_bestfit_select(struct vm_map *map, struct uvm_addr_state *uaddr_p,
  841. struct vm_map_entry **entry_out, vaddr_t *addr_out,
  842. vsize_t sz, vaddr_t align, vaddr_t offset,
  843. vm_prot_t prot, vaddr_t hint)
  844. {
  845. vaddr_t min, max;
  846. struct uaddr_bestfit_state *uaddr;
  847. struct vm_map_entry *entry;
  848. vsize_t guardsz;
  849. uaddr = (struct uaddr_bestfit_state *)uaddr_p;
  850. guardsz = ((map->flags & VM_MAP_GUARDPAGES) ? PAGE_SIZE : 0);
  851. /*
  852. * Find smallest item on freelist capable of holding item.
  853. * Deal with guardpages: search for space with one extra page.
  854. */
  855. entry = uvm_addr_entrybyspace(&uaddr->ubf_free, sz + guardsz);
  856. if (entry == NULL)
  857. return ENOMEM;
  858. /* Walk the tree until we find an entry that fits. */
  859. while (uvm_addr_fitspace(&min, &max,
  860. VMMAP_FREE_START(entry), VMMAP_FREE_END(entry),
  861. sz, align, offset, 0, guardsz) != 0) {
  862. entry = RB_NEXT(uaddr_free_rbtree, &uaddr->ubf_free, entry);
  863. if (entry == NULL)
  864. return ENOMEM;
  865. }
  866. /* Return the address that generates the least fragmentation. */
  867. *entry_out = entry;
  868. *addr_out = (min - VMMAP_FREE_START(entry) <=
  869. VMMAP_FREE_END(entry) - guardsz - sz - max ?
  870. min : max);
  871. return 0;
  872. }
  873. #endif /* !SMALL_KERNEL */
  874. #ifndef SMALL_KERNEL
  875. /*
  876. * A userspace allocator based on pivots.
  877. */
  878. const struct uvm_addr_functions uaddr_pivot_functions = {
  879. .uaddr_select = &uaddr_pivot_select,
  880. .uaddr_free_insert = &uaddr_pivot_insert,
  881. .uaddr_free_remove = &uaddr_pivot_remove,
  882. .uaddr_destroy = &uaddr_pivot_destroy,
  883. #if defined(DEBUG) || defined(DDB)
  884. .uaddr_print = &uaddr_pivot_print,
  885. #endif /* DEBUG || DDB */
  886. .uaddr_name = "uaddr_pivot"
  887. };
  888. /*
  889. * A special random function for pivots.
  890. *
  891. * This function will return:
  892. * - a random number
  893. * - a multiple of PAGE_SIZE
  894. * - at least PAGE_SIZE
  895. *
  896. * The random function has a slightly higher change to return a small number.
  897. */
  898. vsize_t
  899. uaddr_pivot_random()
  900. {
  901. int r;
  902. /*
  903. * The sum of two six-sided dice will have a normal distribution.
  904. * We map the highest probable number to 1, by folding the curve
  905. * (think of a graph on a piece of paper, that you fold).
  906. *
  907. * Because the fold happens at PIVOT_RND - 1, the numbers 0 and 1
  908. * have the same and highest probability of happening.
  909. */
  910. r = arc4random_uniform(PIVOT_RND) + arc4random_uniform(PIVOT_RND) -
  911. (PIVOT_RND - 1);
  912. if (r < 0)
  913. r = -r;
  914. /*
  915. * Make the returned value at least PAGE_SIZE and a multiple of
  916. * PAGE_SIZE.
  917. */
  918. return (vaddr_t)(1 + r) << PAGE_SHIFT;
  919. }
  920. /*
  921. * Select a new pivot.
  922. *
  923. * A pivot must:
  924. * - be chosen random
  925. * - have a randomly chosen gap before it, where the uaddr_state starts
  926. * - have a randomly chosen gap after it, before the uaddr_state ends
  927. *
  928. * Furthermore, the pivot must provide sufficient space for the allocation.
  929. * The addr will be set to the selected address.
  930. *
  931. * Returns ENOMEM on failure.
  932. */
  933. int
  934. uaddr_pivot_newpivot(struct vm_map *map, struct uaddr_pivot_state *uaddr,
  935. struct uaddr_pivot *pivot,
  936. struct vm_map_entry **entry_out, vaddr_t *addr_out,
  937. vsize_t sz, vaddr_t align, vaddr_t offset,
  938. vsize_t before_gap, vsize_t after_gap)
  939. {
  940. struct vm_map_entry *entry, *found;
  941. vaddr_t minaddr, maxaddr;
  942. vsize_t dist;
  943. vaddr_t found_minaddr, found_maxaddr;
  944. vaddr_t min, max;
  945. vsize_t arc4_arg;
  946. int fit_error;
  947. u_int32_t path;
  948. minaddr = uaddr->up_uaddr.uaddr_minaddr;
  949. maxaddr = uaddr->up_uaddr.uaddr_maxaddr;
  950. KASSERT(minaddr < maxaddr);
  951. #ifdef DIAGNOSTIC
  952. if (minaddr + 2 * PAGE_SIZE > maxaddr) {
  953. panic("uaddr_pivot_newpivot: cannot grant random pivot "
  954. "in area less than 2 pages (size = 0x%lx)",
  955. maxaddr - minaddr);
  956. }
  957. #endif /* DIAGNOSTIC */
  958. /*
  959. * Gap calculation: 1/32 of the size of the managed area.
  960. *
  961. * At most: sufficient to not get truncated at arc4random.
  962. * At least: 2 PAGE_SIZE
  963. *
  964. * minaddr and maxaddr will be changed according to arc4random.
  965. */
  966. dist = MAX((maxaddr - minaddr) / 32, 2 * (vaddr_t)PAGE_SIZE);
  967. if (dist >> PAGE_SHIFT > 0xffffffff) {
  968. minaddr += (vsize_t)arc4random() << PAGE_SHIFT;
  969. maxaddr -= (vsize_t)arc4random() << PAGE_SHIFT;
  970. } else {
  971. minaddr += (vsize_t)arc4random_uniform(dist >> PAGE_SHIFT) <<
  972. PAGE_SHIFT;
  973. maxaddr -= (vsize_t)arc4random_uniform(dist >> PAGE_SHIFT) <<
  974. PAGE_SHIFT;
  975. }
  976. /*
  977. * A very fast way to find an entry that will be large enough
  978. * to hold the allocation, but still is found more or less
  979. * randomly: the tree path selector has a 50% chance to go for
  980. * a bigger or smaller entry.
  981. *
  982. * Note that the memory may actually be available,
  983. * but the fragmentation may be so bad and the gaps chosen
  984. * so unfortunately, that the allocation will not succeed.
  985. * Or the alignment can only be satisfied by an entry that
  986. * is not visited in the randomly selected path.
  987. *
  988. * This code finds an entry with sufficient space in O(log n) time.
  989. */
  990. path = arc4random();
  991. found = NULL;
  992. entry = RB_ROOT(&uaddr->up_free);
  993. while (entry != NULL) {
  994. fit_error = uvm_addr_fitspace(&min, &max,
  995. MAX(VMMAP_FREE_START(entry), minaddr),
  996. MIN(VMMAP_FREE_END(entry), maxaddr),
  997. sz, align, offset, before_gap, after_gap);
  998. /* It fits, save this entry. */
  999. if (fit_error == 0) {
  1000. found = entry;
  1001. found_minaddr = min;
  1002. found_maxaddr = max;
  1003. }
  1004. /* Next. */
  1005. if (fit_error != 0)
  1006. entry = RB_RIGHT(entry, dfree.rbtree);
  1007. else if ((path & 0x1) == 0) {
  1008. path >>= 1;
  1009. entry = RB_RIGHT(entry, dfree.rbtree);
  1010. } else {
  1011. path >>= 1;
  1012. entry = RB_LEFT(entry, dfree.rbtree);
  1013. }
  1014. }
  1015. if (found == NULL)
  1016. return ENOMEM; /* Not found a large enough region. */
  1017. /*
  1018. * Calculate a random address within found.
  1019. *
  1020. * found_minaddr and found_maxaddr are already aligned, so be sure
  1021. * to select a multiple of align as the offset in the entry.
  1022. * Preferably, arc4random_uniform is used to provide no bias within
  1023. * the entry.
  1024. * However if the size of the entry exceeds arc4random_uniforms
  1025. * argument limit, we simply use arc4random (thus limiting ourselves
  1026. * to 4G * PAGE_SIZE bytes offset).
  1027. */
  1028. if (found_maxaddr == found_minaddr)
  1029. *addr_out = found_minaddr;
  1030. else {
  1031. KASSERT(align >= PAGE_SIZE && (align & (align - 1)) == 0);
  1032. arc4_arg = found_maxaddr - found_minaddr;
  1033. if (arc4_arg > 0xffffffff) {
  1034. *addr_out = found_minaddr +
  1035. (arc4random() & (align - 1));
  1036. } else {
  1037. *addr_out = found_minaddr +
  1038. (arc4random_uniform(arc4_arg) & (align - 1));
  1039. }
  1040. }
  1041. /* Address was found in this entry. */
  1042. *entry_out = found;
  1043. /*
  1044. * Set up new pivot and return selected address.
  1045. *
  1046. * Depending on the direction of the pivot, the pivot must be placed
  1047. * at the bottom or the top of the allocation:
  1048. * - if the pivot moves upwards, place the pivot at the top of the
  1049. * allocation,
  1050. * - if the pivot moves downwards, place the pivot at the bottom
  1051. * of the allocation.
  1052. */
  1053. pivot->entry = found;
  1054. pivot->dir = (arc4random() & 0x1 ? 1 : -1);
  1055. if (pivot->dir > 0)
  1056. pivot->addr = *addr_out + sz;
  1057. else
  1058. pivot->addr = *addr_out;
  1059. pivot->expire = PIVOT_EXPIRE - 1; /* First use is right now. */
  1060. return 0;
  1061. }
  1062. /*
  1063. * Pivot selector.
  1064. *
  1065. * Each time the selector is invoked, it will select a random pivot, which
  1066. * it will use to select memory with. The memory will be placed at the pivot,
  1067. * with a randomly sized gap between the allocation and the pivot.
  1068. * The pivot will then move so it will never revisit this address.
  1069. *
  1070. * Each allocation, the pivot expiry timer ticks. Once the pivot becomes
  1071. * expired, it will be replaced with a newly created pivot. Pivots also
  1072. * automatically expire if they fail to provide memory for an allocation.
  1073. *
  1074. * Expired pivots are replaced using the uaddr_pivot_newpivot() function,
  1075. * which will ensure the pivot points at memory in such a way that the
  1076. * allocation will succeed.
  1077. * As an added bonus, the uaddr_pivot_newpivot() function will perform the
  1078. * allocation immediately and move the pivot as appropriate.
  1079. *
  1080. * If uaddr_pivot_newpivot() fails to find a new pivot that will allow the
  1081. * allocation to succeed, it will not create a new pivot and the allocation
  1082. * will fail.
  1083. *
  1084. * A pivot running into used memory will automatically expire (because it will
  1085. * fail to allocate).
  1086. *
  1087. * Characteristics of the allocator:
  1088. * - best case, an allocation is O(log N)
  1089. * (it would be O(1), if it werent for the need to check if the memory is
  1090. * free; although that can be avoided...)
  1091. * - worst case, an allocation is O(log N)
  1092. * (the uaddr_pivot_newpivot() function has that complexity)
  1093. * - failed allocations always take O(log N)
  1094. * (the uaddr_pivot_newpivot() function will walk that deep into the tree).
  1095. */
  1096. int
  1097. uaddr_pivot_select(struct vm_map *map, struct uvm_addr_state *uaddr_p,
  1098. struct vm_map_entry **entry_out, vaddr_t *addr_out,
  1099. vsize_t sz, vaddr_t align, vaddr_t offset,
  1100. vm_prot_t prot, vaddr_t hint)
  1101. {
  1102. struct uaddr_pivot_state *uaddr;
  1103. struct vm_map_entry *entry;
  1104. struct uaddr_pivot *pivot;
  1105. vaddr_t min, max;
  1106. vsize_t before_gap, after_gap;
  1107. int err;
  1108. /* Hint must be handled by dedicated hint allocator. */
  1109. if (hint != 0)
  1110. return EINVAL;
  1111. /*
  1112. * Select a random pivot and a random gap sizes around the allocation.
  1113. */
  1114. uaddr = (struct uaddr_pivot_state *)uaddr_p;
  1115. pivot = &uaddr->up_pivots[
  1116. arc4random_uniform(nitems(uaddr->up_pivots))];
  1117. before_gap = uaddr_pivot_random();
  1118. after_gap = uaddr_pivot_random();
  1119. if (pivot->addr == 0 || pivot->entry == NULL || pivot->expire == 0)
  1120. goto expired; /* Pivot is invalid (null or expired). */
  1121. /* Attempt to use the pivot to map the entry. */
  1122. entry = pivot->entry;
  1123. if (pivot->dir > 0) {
  1124. if (uvm_addr_fitspace(&min, &max,
  1125. MAX(VMMAP_FREE_START(entry), pivot->addr),
  1126. VMMAP_FREE_END(entry), sz, align, offset,
  1127. before_gap, after_gap) == 0) {
  1128. *addr_out = min;
  1129. *entry_out = entry;
  1130. pivot->addr = min + sz;
  1131. pivot->expire--;
  1132. return 0;
  1133. }
  1134. } else {
  1135. if (uvm_addr_fitspace(&min, &max,
  1136. VMMAP_FREE_START(entry),
  1137. MIN(VMMAP_FREE_END(entry), pivot->addr),
  1138. sz, align, offset, before_gap, after_gap) == 0) {
  1139. *addr_out = max;
  1140. *entry_out = entry;
  1141. pivot->addr = max;
  1142. pivot->expire--;
  1143. return 0;
  1144. }
  1145. }
  1146. expired:
  1147. /*
  1148. * Pivot expired or allocation failed.
  1149. * Use pivot selector to do the allocation and find a new pivot.
  1150. */
  1151. err = uaddr_pivot_newpivot(map, uaddr, pivot, entry_out, addr_out,
  1152. sz, align, offset, before_gap, after_gap);
  1153. return err;
  1154. }
  1155. /*
  1156. * Free the pivot.
  1157. */
  1158. void
  1159. uaddr_pivot_destroy(struct uvm_addr_state *uaddr)
  1160. {
  1161. pool_put(&uaddr_pivot_pool, uaddr);
  1162. }
  1163. /*
  1164. * Insert an entry with free space in the space tree.
  1165. */
  1166. void
  1167. uaddr_pivot_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p,
  1168. struct vm_map_entry *entry)
  1169. {
  1170. struct uaddr_pivot_state *uaddr;
  1171. struct vm_map_entry *rb_rv;
  1172. struct uaddr_pivot *p;
  1173. vaddr_t check_addr;
  1174. vaddr_t start, end;
  1175. uaddr = (struct uaddr_pivot_state *)uaddr_p;
  1176. if ((rb_rv = RB_INSERT(uaddr_free_rbtree, &uaddr->up_free, entry)) !=
  1177. NULL) {
  1178. panic("%s: duplicate insertion: state %p "
  1179. "inserting entry %p which collides with %p", __func__,
  1180. uaddr, entry, rb_rv);
  1181. }
  1182. start = VMMAP_FREE_START(entry);
  1183. end = VMMAP_FREE_END(entry);
  1184. /*
  1185. * Update all pivots that are contained in this entry.
  1186. */
  1187. for (p = &uaddr->up_pivots[0];
  1188. p != &uaddr->up_pivots[nitems(uaddr->up_pivots)]; p++) {
  1189. check_addr = p->addr;
  1190. if (check_addr == 0)
  1191. continue;
  1192. if (p->dir < 0)
  1193. check_addr--;
  1194. if (start <= check_addr &&
  1195. check_addr < end) {
  1196. KASSERT(p->entry == NULL);
  1197. p->entry = entry;
  1198. }
  1199. }
  1200. }
  1201. /*
  1202. * Remove an entry with free space from the space tree.
  1203. */
  1204. void
  1205. uaddr_pivot_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p,
  1206. struct vm_map_entry *entry)
  1207. {
  1208. struct uaddr_pivot_state *uaddr;
  1209. struct uaddr_pivot *p;
  1210. uaddr = (struct uaddr_pivot_state *)uaddr_p;
  1211. if (RB_REMOVE(uaddr_free_rbtree, &uaddr->up_free, entry) != entry)
  1212. panic("%s: entry was not in tree", __func__);
  1213. /*
  1214. * Inform any pivot with this entry that the entry is gone.
  1215. * Note that this does not automatically invalidate the pivot.
  1216. */
  1217. for (p = &uaddr->up_pivots[0];
  1218. p != &uaddr->up_pivots[nitems(uaddr->up_pivots)]; p++) {
  1219. if (p->entry == entry)
  1220. p->entry = NULL;
  1221. }
  1222. }
  1223. /*
  1224. * Create a new pivot selector.
  1225. *
  1226. * Initially, all pivots are in the expired state.
  1227. * Two reasons for this:
  1228. * - it means this allocator will not take a huge amount of time
  1229. * - pivots select better on demand, because the pivot selection will be
  1230. * affected by preceding allocations:
  1231. * the next pivots will likely end up in different segments of free memory,
  1232. * that was segmented by an earlier allocation; better spread.
  1233. */
  1234. struct uvm_addr_state *
  1235. uaddr_pivot_create(vaddr_t minaddr, vaddr_t maxaddr)
  1236. {
  1237. struct uaddr_pivot_state *uaddr;
  1238. uaddr = pool_get(&uaddr_pivot_pool, PR_WAITOK);
  1239. uaddr->up_uaddr.uaddr_minaddr = minaddr;
  1240. uaddr->up_uaddr.uaddr_maxaddr = maxaddr;
  1241. uaddr->up_uaddr.uaddr_functions = &uaddr_pivot_functions;
  1242. RB_INIT(&uaddr->up_free);
  1243. memset(uaddr->up_pivots, 0, sizeof(uaddr->up_pivots));
  1244. return &uaddr->up_uaddr;
  1245. }
  1246. #if defined(DEBUG) || defined(DDB)
  1247. /*
  1248. * Print the uaddr_pivot_state.
  1249. *
  1250. * If full, a listing of all entries in the state will be provided.
  1251. */
  1252. void
  1253. uaddr_pivot_print(struct uvm_addr_state *uaddr_p, boolean_t full,
  1254. int (*pr)(const char *, ...))
  1255. {
  1256. struct uaddr_pivot_state *uaddr;
  1257. struct uaddr_pivot *pivot;
  1258. struct vm_map_entry *entry;
  1259. int i;
  1260. vaddr_t check_addr;
  1261. uaddr = (struct uaddr_pivot_state *)uaddr_p;
  1262. for (i = 0; i < NUM_PIVOTS; i++) {
  1263. pivot = &uaddr->up_pivots[i];
  1264. (*pr)("\tpivot 0x%lx, epires in %d, direction %d\n",
  1265. pivot->addr, pivot->expire, pivot->dir);
  1266. }
  1267. if (!full)
  1268. return;
  1269. if (RB_EMPTY(&uaddr->up_free))
  1270. (*pr)("\tempty\n");
  1271. /* Print list of free space. */
  1272. RB_FOREACH(entry, uaddr_free_rbtree, &uaddr->up_free) {
  1273. (*pr)("\t0x%lx - 0x%lx free (0x%lx bytes)\n",
  1274. VMMAP_FREE_START(entry), VMMAP_FREE_END(entry),
  1275. VMMAP_FREE_END(entry) - VMMAP_FREE_START(entry));
  1276. for (i = 0; i < NUM_PIVOTS; i++) {
  1277. pivot = &uaddr->up_pivots[i];
  1278. check_addr = pivot->addr;
  1279. if (check_addr == 0)
  1280. continue;
  1281. if (pivot->dir < 0)
  1282. check_addr--;
  1283. if (VMMAP_FREE_START(entry) <= check_addr &&
  1284. check_addr < VMMAP_FREE_END(entry)) {
  1285. (*pr)("\t\tcontains pivot %d (0x%lx)\n",
  1286. i, pivot->addr);
  1287. }
  1288. }
  1289. }
  1290. }
  1291. #endif /* DEBUG || DDB */
  1292. #endif /* !SMALL_KERNEL */
  1293. #ifndef SMALL_KERNEL
  1294. /*
  1295. * Strategy for uaddr_stack_brk_select.
  1296. */
  1297. struct uaddr_bs_strat {
  1298. vaddr_t start; /* Start of area. */
  1299. vaddr_t end; /* End of area. */
  1300. int dir; /* Search direction. */
  1301. };
  1302. /*
  1303. * Stack/break allocator.
  1304. *
  1305. * Stack area is grown into in the opposite direction of the stack growth,
  1306. * brk area is grown downward (because sbrk() grows upward).
  1307. *
  1308. * Both areas are grown into proportially: a weighted chance is used to
  1309. * select which one (stack or brk area) to try. If the allocation fails,
  1310. * the other one is tested.
  1311. */
  1312. const struct uvm_addr_functions uaddr_stack_brk_functions = {
  1313. .uaddr_select = &uaddr_stack_brk_select,
  1314. .uaddr_destroy = &uaddr_destroy,
  1315. .uaddr_name = "uaddr_stckbrk"
  1316. };
  1317. /*
  1318. * Stack/brk address selector.
  1319. */
  1320. int
  1321. uaddr_stack_brk_select(struct vm_map *map, struct uvm_addr_state *uaddr,
  1322. struct vm_map_entry **entry_out, vaddr_t *addr_out,
  1323. vsize_t sz, vaddr_t align, vaddr_t offset,
  1324. vm_prot_t prot, vaddr_t hint)
  1325. {
  1326. vsize_t before_gap, after_gap;
  1327. int stack_idx, brk_idx;
  1328. struct uaddr_bs_strat strat[2], *s;
  1329. vsize_t sb_size;
  1330. /*
  1331. * Choose gap size and if the stack is searched before or after the
  1332. * brk area.
  1333. */
  1334. before_gap = ((arc4random() & 0x3) + 1) << PAGE_SHIFT;
  1335. after_gap = ((arc4random() & 0x3) + 1) << PAGE_SHIFT;
  1336. sb_size = (map->s_end - map->s_start) + (map->b_end - map->b_start);
  1337. sb_size >>= PAGE_SHIFT;
  1338. if (arc4random_uniform(MAX(sb_size, 0xffffffff)) >
  1339. map->b_end - map->b_start) {
  1340. brk_idx = 1;
  1341. stack_idx = 0;
  1342. } else {
  1343. brk_idx = 0;
  1344. stack_idx = 1;
  1345. }
  1346. /* Set up stack search strategy. */
  1347. s = &strat[stack_idx];
  1348. s->start = MAX(map->s_start, uaddr->uaddr_minaddr);
  1349. s->end = MIN(map->s_end, uaddr->uaddr_maxaddr);
  1350. #ifdef MACHINE_STACK_GROWS_UP
  1351. s->dir = -1;
  1352. #else
  1353. s->dir = 1;
  1354. #endif
  1355. /* Set up brk search strategy. */
  1356. s = &strat[brk_idx];
  1357. s->start = MAX(map->b_start, uaddr->uaddr_minaddr);
  1358. s->end = MIN(map->b_end, uaddr->uaddr_maxaddr);
  1359. s->dir = -1; /* Opposite of brk() growth. */
  1360. /* Linear search for space. */
  1361. for (s = &strat[0]; s < &strat[nitems(strat)]; s++) {
  1362. if (s->end - s->start < sz)
  1363. continue;
  1364. if (uvm_addr_linsearch(map, uaddr, entry_out, addr_out,
  1365. 0, sz, align, offset, s->dir, s->start, s->end - sz,
  1366. before_gap, after_gap) == 0)
  1367. return 0;
  1368. }
  1369. return ENOMEM;
  1370. }
  1371. struct uvm_addr_state *
  1372. uaddr_stack_brk_create(vaddr_t minaddr, vaddr_t maxaddr)
  1373. {
  1374. struct uvm_addr_state* uaddr;
  1375. uaddr = pool_get(&uaddr_pool, PR_WAITOK);
  1376. uaddr->uaddr_minaddr = minaddr;
  1377. uaddr->uaddr_maxaddr = maxaddr;
  1378. uaddr->uaddr_functions = &uaddr_stack_brk_functions;
  1379. return uaddr;
  1380. }
  1381. #endif /* !SMALL_KERNEL */
  1382. #ifndef SMALL_KERNEL
  1383. RB_GENERATE(uaddr_free_rbtree, vm_map_entry, dfree.rbtree,
  1384. uvm_mapent_fspace_cmp);
  1385. #endif /* !SMALL_KERNEL */