vm_domainset.c 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386
  1. /*-
  2. * SPDX-License-Identifier: BSD-2-Clause
  3. *
  4. * Copyright (c) 2017, Jeffrey Roberson <jeff@freebsd.org>
  5. * All rights reserved.
  6. *
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following conditions
  9. * are met:
  10. * 1. Redistributions of source code must retain the above copyright
  11. * notice unmodified, this list of conditions, and the following
  12. * disclaimer.
  13. * 2. Redistributions in binary form must reproduce the above copyright
  14. * notice, this list of conditions and the following disclaimer in the
  15. * documentation and/or other materials provided with the distribution.
  16. *
  17. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  18. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  19. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  20. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  21. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  22. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  26. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27. *
  28. */
  29. #include <sys/cdefs.h>
  30. #include "opt_vm.h"
  31. #include <sys/param.h>
  32. #include <sys/systm.h>
  33. #include <sys/bitset.h>
  34. #include <sys/domainset.h>
  35. #include <sys/proc.h>
  36. #include <sys/lock.h>
  37. #include <sys/mutex.h>
  38. #include <sys/malloc.h>
  39. #include <sys/rwlock.h>
  40. #include <sys/vmmeter.h>
  41. #include <vm/vm.h>
  42. #include <vm/vm_param.h>
  43. #include <vm/vm_domainset.h>
  44. #include <vm/vm_object.h>
  45. #include <vm/vm_page.h>
  46. #include <vm/vm_phys.h>
  47. #ifdef NUMA
  48. /*
  49. * Iterators are written such that the first nowait pass has as short a
  50. * codepath as possible to eliminate bloat from the allocator. It is
  51. * assumed that most allocations are successful.
  52. */
  53. static int vm_domainset_default_stride = 64;
  54. /*
  55. * Determine which policy is to be used for this allocation.
  56. */
  57. static void
  58. vm_domainset_iter_init(struct vm_domainset_iter *di, struct domainset *ds,
  59. int *iter, struct vm_object *obj, vm_pindex_t pindex)
  60. {
  61. di->di_domain = ds;
  62. di->di_iter = iter;
  63. di->di_policy = ds->ds_policy;
  64. DOMAINSET_COPY(&ds->ds_mask, &di->di_valid_mask);
  65. if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE) {
  66. #if VM_NRESERVLEVEL > 0
  67. if (vm_object_reserv(obj)) {
  68. /*
  69. * Color the pindex so we end up on the correct
  70. * reservation boundary.
  71. */
  72. pindex += obj->pg_color;
  73. #if VM_NRESERVLEVEL > 1
  74. pindex >>= VM_LEVEL_1_ORDER;
  75. #endif
  76. pindex >>= VM_LEVEL_0_ORDER;
  77. } else
  78. #endif
  79. pindex /= vm_domainset_default_stride;
  80. /*
  81. * Offset pindex so the first page of each object does
  82. * not end up in domain 0.
  83. */
  84. if (obj != NULL)
  85. pindex += (((uintptr_t)obj) / sizeof(*obj));
  86. di->di_offset = pindex;
  87. }
  88. /* Skip domains below min on the first pass. */
  89. di->di_minskip = true;
  90. }
  91. static void
  92. vm_domainset_iter_rr(struct vm_domainset_iter *di, int *domain)
  93. {
  94. *domain = di->di_domain->ds_order[
  95. ++(*di->di_iter) % di->di_domain->ds_cnt];
  96. }
  97. static void
  98. vm_domainset_iter_prefer(struct vm_domainset_iter *di, int *domain)
  99. {
  100. int d;
  101. do {
  102. d = di->di_domain->ds_order[
  103. ++(*di->di_iter) % di->di_domain->ds_cnt];
  104. } while (d == di->di_domain->ds_prefer);
  105. *domain = d;
  106. }
  107. static void
  108. vm_domainset_iter_interleave(struct vm_domainset_iter *di, int *domain)
  109. {
  110. int d;
  111. d = di->di_offset % di->di_domain->ds_cnt;
  112. *di->di_iter = d;
  113. *domain = di->di_domain->ds_order[d];
  114. }
  115. static void
  116. vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain)
  117. {
  118. KASSERT(di->di_n > 0,
  119. ("vm_domainset_iter_first: Invalid n %d", di->di_n));
  120. switch (di->di_policy) {
  121. case DOMAINSET_POLICY_FIRSTTOUCH:
  122. /*
  123. * To prevent impossible allocations we convert an invalid
  124. * first-touch to round-robin.
  125. */
  126. /* FALLTHROUGH */
  127. case DOMAINSET_POLICY_INTERLEAVE:
  128. /* FALLTHROUGH */
  129. case DOMAINSET_POLICY_ROUNDROBIN:
  130. vm_domainset_iter_rr(di, domain);
  131. break;
  132. case DOMAINSET_POLICY_PREFER:
  133. vm_domainset_iter_prefer(di, domain);
  134. break;
  135. default:
  136. panic("vm_domainset_iter_first: Unknown policy %d",
  137. di->di_policy);
  138. }
  139. KASSERT(*domain < vm_ndomains,
  140. ("vm_domainset_iter_next: Invalid domain %d", *domain));
  141. }
  142. static void
  143. vm_domainset_iter_first(struct vm_domainset_iter *di, int *domain)
  144. {
  145. switch (di->di_policy) {
  146. case DOMAINSET_POLICY_FIRSTTOUCH:
  147. *domain = PCPU_GET(domain);
  148. if (DOMAINSET_ISSET(*domain, &di->di_valid_mask)) {
  149. /*
  150. * Add an extra iteration because we will visit the
  151. * current domain a second time in the rr iterator.
  152. */
  153. di->di_n = di->di_domain->ds_cnt + 1;
  154. break;
  155. }
  156. /*
  157. * To prevent impossible allocations we convert an invalid
  158. * first-touch to round-robin.
  159. */
  160. /* FALLTHROUGH */
  161. case DOMAINSET_POLICY_ROUNDROBIN:
  162. di->di_n = di->di_domain->ds_cnt;
  163. vm_domainset_iter_rr(di, domain);
  164. break;
  165. case DOMAINSET_POLICY_PREFER:
  166. *domain = di->di_domain->ds_prefer;
  167. di->di_n = di->di_domain->ds_cnt;
  168. break;
  169. case DOMAINSET_POLICY_INTERLEAVE:
  170. vm_domainset_iter_interleave(di, domain);
  171. di->di_n = di->di_domain->ds_cnt;
  172. break;
  173. default:
  174. panic("vm_domainset_iter_first: Unknown policy %d",
  175. di->di_policy);
  176. }
  177. KASSERT(di->di_n > 0,
  178. ("vm_domainset_iter_first: Invalid n %d", di->di_n));
  179. KASSERT(*domain < vm_ndomains,
  180. ("vm_domainset_iter_first: Invalid domain %d", *domain));
  181. }
  182. void
  183. vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj,
  184. vm_pindex_t pindex, int *domain, int *req)
  185. {
  186. struct domainset_ref *dr;
  187. /*
  188. * Object policy takes precedence over thread policy. The policies
  189. * are immutable and unsynchronized. Updates can race but pointer
  190. * loads are assumed to be atomic.
  191. */
  192. if (obj != NULL && obj->domain.dr_policy != NULL)
  193. dr = &obj->domain;
  194. else
  195. dr = &curthread->td_domain;
  196. vm_domainset_iter_init(di, dr->dr_policy, &dr->dr_iter, obj, pindex);
  197. di->di_flags = *req;
  198. *req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) |
  199. VM_ALLOC_NOWAIT;
  200. vm_domainset_iter_first(di, domain);
  201. if (vm_page_count_min_domain(*domain))
  202. vm_domainset_iter_page(di, obj, domain);
  203. }
  204. int
  205. vm_domainset_iter_page(struct vm_domainset_iter *di, struct vm_object *obj,
  206. int *domain)
  207. {
  208. if (__predict_false(DOMAINSET_EMPTY(&di->di_valid_mask)))
  209. return (ENOMEM);
  210. /* If there are more domains to visit we run the iterator. */
  211. while (--di->di_n != 0) {
  212. vm_domainset_iter_next(di, domain);
  213. if (DOMAINSET_ISSET(*domain, &di->di_valid_mask) &&
  214. (!di->di_minskip || !vm_page_count_min_domain(*domain)))
  215. return (0);
  216. }
  217. /* If we skipped domains below min restart the search. */
  218. if (di->di_minskip) {
  219. di->di_minskip = false;
  220. vm_domainset_iter_first(di, domain);
  221. return (0);
  222. }
  223. /* If we visited all domains and this was a NOWAIT we return error. */
  224. if ((di->di_flags & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) == 0)
  225. return (ENOMEM);
  226. /* Wait for one of the domains to accumulate some free pages. */
  227. if (obj != NULL)
  228. VM_OBJECT_WUNLOCK(obj);
  229. vm_wait_doms(&di->di_valid_mask, 0);
  230. if (obj != NULL)
  231. VM_OBJECT_WLOCK(obj);
  232. if ((di->di_flags & VM_ALLOC_WAITFAIL) != 0)
  233. return (ENOMEM);
  234. /* Restart the search. */
  235. vm_domainset_iter_first(di, domain);
  236. return (0);
  237. }
  238. static void
  239. _vm_domainset_iter_policy_init(struct vm_domainset_iter *di, int *domain,
  240. int *flags)
  241. {
  242. di->di_flags = *flags;
  243. *flags = (di->di_flags & ~M_WAITOK) | M_NOWAIT;
  244. vm_domainset_iter_first(di, domain);
  245. if (vm_page_count_min_domain(*domain))
  246. vm_domainset_iter_policy(di, domain);
  247. }
  248. void
  249. vm_domainset_iter_policy_init(struct vm_domainset_iter *di,
  250. struct domainset *ds, int *domain, int *flags)
  251. {
  252. vm_domainset_iter_init(di, ds, &curthread->td_domain.dr_iter, NULL, 0);
  253. _vm_domainset_iter_policy_init(di, domain, flags);
  254. }
  255. void
  256. vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *di,
  257. struct domainset_ref *dr, int *domain, int *flags)
  258. {
  259. vm_domainset_iter_init(di, dr->dr_policy, &dr->dr_iter, NULL, 0);
  260. _vm_domainset_iter_policy_init(di, domain, flags);
  261. }
  262. int
  263. vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain)
  264. {
  265. if (DOMAINSET_EMPTY(&di->di_valid_mask))
  266. return (ENOMEM);
  267. /* If there are more domains to visit we run the iterator. */
  268. while (--di->di_n != 0) {
  269. vm_domainset_iter_next(di, domain);
  270. if (DOMAINSET_ISSET(*domain, &di->di_valid_mask) &&
  271. (!di->di_minskip || !vm_page_count_min_domain(*domain)))
  272. return (0);
  273. }
  274. /* If we skipped domains below min restart the search. */
  275. if (di->di_minskip) {
  276. di->di_minskip = false;
  277. vm_domainset_iter_first(di, domain);
  278. return (0);
  279. }
  280. /* If we visited all domains and this was a NOWAIT we return error. */
  281. if ((di->di_flags & M_WAITOK) == 0)
  282. return (ENOMEM);
  283. /* Wait for one of the domains to accumulate some free pages. */
  284. vm_wait_doms(&di->di_valid_mask, 0);
  285. /* Restart the search. */
  286. vm_domainset_iter_first(di, domain);
  287. return (0);
  288. }
  289. void
  290. vm_domainset_iter_ignore(struct vm_domainset_iter *di, int domain)
  291. {
  292. KASSERT(DOMAINSET_ISSET(domain, &di->di_valid_mask),
  293. ("%s: domain %d not present in di_valid_mask for di %p",
  294. __func__, domain, di));
  295. DOMAINSET_CLR(domain, &di->di_valid_mask);
  296. }
  297. #else /* !NUMA */
  298. int
  299. vm_domainset_iter_page(struct vm_domainset_iter *di, struct vm_object *obj,
  300. int *domain)
  301. {
  302. return (EJUSTRETURN);
  303. }
  304. void
  305. vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj,
  306. vm_pindex_t pindex, int *domain, int *flags)
  307. {
  308. *domain = 0;
  309. }
  310. int
  311. vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain)
  312. {
  313. return (EJUSTRETURN);
  314. }
  315. void
  316. vm_domainset_iter_policy_init(struct vm_domainset_iter *di,
  317. struct domainset *ds, int *domain, int *flags)
  318. {
  319. *domain = 0;
  320. }
  321. void
  322. vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *di,
  323. struct domainset_ref *dr, int *domain, int *flags)
  324. {
  325. *domain = 0;
  326. }
  327. void
  328. vm_domainset_iter_ignore(struct vm_domainset_iter *di __unused,
  329. int domain __unused)
  330. {
  331. }
  332. #endif /* NUMA */