uvm_aobj.c 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557
  1. /* $OpenBSD: uvm_aobj.c,v 1.79 2015/05/07 01:55:44 jsg Exp $ */
  2. /* $NetBSD: uvm_aobj.c,v 1.39 2001/02/18 21:19:08 chs Exp $ */
  3. /*
  4. * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
  5. * Washington University.
  6. * All rights reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. * 1. Redistributions of source code must retain the above copyright
  12. * notice, this list of conditions and the following disclaimer.
  13. * 2. Redistributions in binary form must reproduce the above copyright
  14. * notice, this list of conditions and the following disclaimer in the
  15. * documentation and/or other materials provided with the distribution.
  16. *
  17. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  18. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  19. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  20. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  21. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  22. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  26. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27. *
  28. * from: Id: uvm_aobj.c,v 1.1.2.5 1998/02/06 05:14:38 chs Exp
  29. */
  30. /*
  31. * uvm_aobj.c: anonymous memory uvm_object pager
  32. *
  33. * author: Chuck Silvers <chuq@chuq.com>
  34. * started: Jan-1998
  35. *
  36. * - design mostly from Chuck Cranor
  37. */
  38. #include <sys/param.h>
  39. #include <sys/systm.h>
  40. #include <sys/malloc.h>
  41. #include <sys/kernel.h>
  42. #include <sys/pool.h>
  43. #include <sys/stdint.h>
  44. #include <sys/atomic.h>
  45. #include <uvm/uvm.h>
  46. /*
  47. * an aobj manages anonymous-memory backed uvm_objects. in addition
  48. * to keeping the list of resident pages, it also keeps a list of
  49. * allocated swap blocks. depending on the size of the aobj this list
  50. * of allocated swap blocks is either stored in an array (small objects)
  51. * or in a hash table (large objects).
  52. */
  53. /*
  54. * local structures
  55. */
  56. /*
  57. * for hash tables, we break the address space of the aobj into blocks
  58. * of UAO_SWHASH_CLUSTER_SIZE pages. we require the cluster size to
  59. * be a power of two.
  60. */
  61. #define UAO_SWHASH_CLUSTER_SHIFT 4
  62. #define UAO_SWHASH_CLUSTER_SIZE (1 << UAO_SWHASH_CLUSTER_SHIFT)
  63. /* get the "tag" for this page index */
  64. #define UAO_SWHASH_ELT_TAG(PAGEIDX) \
  65. ((PAGEIDX) >> UAO_SWHASH_CLUSTER_SHIFT)
  66. /* given an ELT and a page index, find the swap slot */
  67. #define UAO_SWHASH_ELT_PAGESLOT_IDX(PAGEIDX) \
  68. ((PAGEIDX) & (UAO_SWHASH_CLUSTER_SIZE - 1))
  69. #define UAO_SWHASH_ELT_PAGESLOT(ELT, PAGEIDX) \
  70. ((ELT)->slots[(PAGEIDX) & (UAO_SWHASH_CLUSTER_SIZE - 1)])
  71. /* given an ELT, return its pageidx base */
  72. #define UAO_SWHASH_ELT_PAGEIDX_BASE(ELT) \
  73. ((ELT)->tag << UAO_SWHASH_CLUSTER_SHIFT)
  74. /*
  75. * the swhash hash function
  76. */
  77. #define UAO_SWHASH_HASH(AOBJ, PAGEIDX) \
  78. (&(AOBJ)->u_swhash[(((PAGEIDX) >> UAO_SWHASH_CLUSTER_SHIFT) \
  79. & (AOBJ)->u_swhashmask)])
  80. /*
  81. * the swhash threshold determines if we will use an array or a
  82. * hash table to store the list of allocated swap blocks.
  83. */
  84. #define UAO_SWHASH_THRESHOLD (UAO_SWHASH_CLUSTER_SIZE * 4)
  85. /*
  86. * the number of buckets in a swhash, with an upper bound
  87. */
  88. #define UAO_SWHASH_MAXBUCKETS 256
  89. #define UAO_SWHASH_BUCKETS(pages) \
  90. (min((pages) >> UAO_SWHASH_CLUSTER_SHIFT, UAO_SWHASH_MAXBUCKETS))
  91. /*
  92. * uao_swhash_elt: when a hash table is being used, this structure defines
  93. * the format of an entry in the bucket list.
  94. */
  95. struct uao_swhash_elt {
  96. LIST_ENTRY(uao_swhash_elt) list; /* the hash list */
  97. voff_t tag; /* our 'tag' */
  98. int count; /* our number of active slots */
  99. int slots[UAO_SWHASH_CLUSTER_SIZE]; /* the slots */
  100. };
  101. /*
  102. * uao_swhash: the swap hash table structure
  103. */
  104. LIST_HEAD(uao_swhash, uao_swhash_elt);
  105. /*
  106. * uao_swhash_elt_pool: pool of uao_swhash_elt structures
  107. */
  108. struct pool uao_swhash_elt_pool;
  109. /*
  110. * uvm_aobj: the actual anon-backed uvm_object
  111. *
  112. * => the uvm_object is at the top of the structure, this allows
  113. * (struct uvm_aobj *) == (struct uvm_object *)
  114. * => only one of u_swslots and u_swhash is used in any given aobj
  115. */
  116. struct uvm_aobj {
  117. struct uvm_object u_obj; /* has: pgops, memt, #pages, #refs */
  118. int u_pages; /* number of pages in entire object */
  119. int u_flags; /* the flags (see uvm_aobj.h) */
  120. /*
  121. * Either an array or hashtable (array of bucket heads) of
  122. * offset -> swapslot mappings for the aobj.
  123. */
  124. #define u_swslots u_swap.slot_array
  125. #define u_swhash u_swap.slot_hash
  126. union swslots {
  127. int *slot_array;
  128. struct uao_swhash *slot_hash;
  129. } u_swap;
  130. u_long u_swhashmask; /* mask for hashtable */
  131. LIST_ENTRY(uvm_aobj) u_list; /* global list of aobjs */
  132. };
  133. /*
  134. * uvm_aobj_pool: pool of uvm_aobj structures
  135. */
  136. struct pool uvm_aobj_pool;
  137. /*
  138. * local functions
  139. */
  140. static struct uao_swhash_elt *uao_find_swhash_elt(struct uvm_aobj *, int,
  141. boolean_t);
  142. static int uao_find_swslot(struct uvm_aobj *, int);
  143. static boolean_t uao_flush(struct uvm_object *, voff_t,
  144. voff_t, int);
  145. static void uao_free(struct uvm_aobj *);
  146. static int uao_get(struct uvm_object *, voff_t,
  147. vm_page_t *, int *, int, vm_prot_t,
  148. int, int);
  149. static boolean_t uao_pagein(struct uvm_aobj *, int, int);
  150. static boolean_t uao_pagein_page(struct uvm_aobj *, int);
  151. void uao_dropswap_range(struct uvm_object *, voff_t, voff_t);
  152. void uao_shrink_flush(struct uvm_object *, int, int);
  153. int uao_shrink_hash(struct uvm_object *, int);
  154. int uao_shrink_array(struct uvm_object *, int);
  155. int uao_shrink_convert(struct uvm_object *, int);
  156. int uao_grow_hash(struct uvm_object *, int);
  157. int uao_grow_array(struct uvm_object *, int);
  158. int uao_grow_convert(struct uvm_object *, int);
  159. /*
  160. * aobj_pager
  161. *
  162. * note that some functions (e.g. put) are handled elsewhere
  163. */
  164. struct uvm_pagerops aobj_pager = {
  165. NULL, /* init */
  166. uao_reference, /* reference */
  167. uao_detach, /* detach */
  168. NULL, /* fault */
  169. uao_flush, /* flush */
  170. uao_get, /* get */
  171. };
  172. /*
  173. * uao_list: global list of active aobjs, locked by uao_list_lock
  174. *
  175. * Lock ordering: generally the locking order is object lock, then list lock.
  176. * in the case of swap off we have to iterate over the list, and thus the
  177. * ordering is reversed. In that case we must use trylocking to prevent
  178. * deadlock.
  179. */
  180. static LIST_HEAD(aobjlist, uvm_aobj) uao_list = LIST_HEAD_INITIALIZER(uao_list);
  181. static struct mutex uao_list_lock = MUTEX_INITIALIZER(IPL_NONE);
  182. /*
  183. * functions
  184. */
  185. /*
  186. * hash table/array related functions
  187. */
  188. /*
  189. * uao_find_swhash_elt: find (or create) a hash table entry for a page
  190. * offset.
  191. */
  192. static struct uao_swhash_elt *
  193. uao_find_swhash_elt(struct uvm_aobj *aobj, int pageidx, boolean_t create)
  194. {
  195. struct uao_swhash *swhash;
  196. struct uao_swhash_elt *elt;
  197. voff_t page_tag;
  198. swhash = UAO_SWHASH_HASH(aobj, pageidx); /* first hash to get bucket */
  199. page_tag = UAO_SWHASH_ELT_TAG(pageidx); /* tag to search for */
  200. /* now search the bucket for the requested tag */
  201. LIST_FOREACH(elt, swhash, list) {
  202. if (elt->tag == page_tag)
  203. return(elt);
  204. }
  205. /* fail now if we are not allowed to create a new entry in the bucket */
  206. if (!create)
  207. return NULL;
  208. /* allocate a new entry for the bucket and init/insert it in */
  209. elt = pool_get(&uao_swhash_elt_pool, PR_NOWAIT | PR_ZERO);
  210. /*
  211. * XXX We cannot sleep here as the hash table might disappear
  212. * from under our feet. And we run the risk of deadlocking
  213. * the pagedeamon. In fact this code will only be called by
  214. * the pagedaemon and allocation will only fail if we
  215. * exhausted the pagedeamon reserve. In that case we're
  216. * doomed anyway, so panic.
  217. */
  218. if (elt == NULL)
  219. panic("%s: can't allocate entry", __func__);
  220. LIST_INSERT_HEAD(swhash, elt, list);
  221. elt->tag = page_tag;
  222. return(elt);
  223. }
  224. /*
  225. * uao_find_swslot: find the swap slot number for an aobj/pageidx
  226. */
  227. __inline static int
  228. uao_find_swslot(struct uvm_aobj *aobj, int pageidx)
  229. {
  230. /* if noswap flag is set, then we never return a slot */
  231. if (aobj->u_flags & UAO_FLAG_NOSWAP)
  232. return(0);
  233. /* if hashing, look in hash table. */
  234. if (aobj->u_pages > UAO_SWHASH_THRESHOLD) {
  235. struct uao_swhash_elt *elt =
  236. uao_find_swhash_elt(aobj, pageidx, FALSE);
  237. if (elt)
  238. return(UAO_SWHASH_ELT_PAGESLOT(elt, pageidx));
  239. else
  240. return(0);
  241. }
  242. /* otherwise, look in the array */
  243. return(aobj->u_swslots[pageidx]);
  244. }
  245. /*
  246. * uao_set_swslot: set the swap slot for a page in an aobj.
  247. *
  248. * => setting a slot to zero frees the slot
  249. */
  250. int
  251. uao_set_swslot(struct uvm_object *uobj, int pageidx, int slot)
  252. {
  253. struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  254. int oldslot;
  255. /* if noswap flag is set, then we can't set a slot */
  256. if (aobj->u_flags & UAO_FLAG_NOSWAP) {
  257. if (slot == 0)
  258. return(0); /* a clear is ok */
  259. /* but a set is not */
  260. printf("uao_set_swslot: uobj = %p\n", uobj);
  261. panic("uao_set_swslot: attempt to set a slot"
  262. " on a NOSWAP object");
  263. }
  264. /* are we using a hash table? if so, add it in the hash. */
  265. if (aobj->u_pages > UAO_SWHASH_THRESHOLD) {
  266. /*
  267. * Avoid allocating an entry just to free it again if
  268. * the page had not swap slot in the first place, and
  269. * we are freeing.
  270. */
  271. struct uao_swhash_elt *elt =
  272. uao_find_swhash_elt(aobj, pageidx, slot ? TRUE : FALSE);
  273. if (elt == NULL) {
  274. KASSERT(slot == 0);
  275. return (0);
  276. }
  277. oldslot = UAO_SWHASH_ELT_PAGESLOT(elt, pageidx);
  278. UAO_SWHASH_ELT_PAGESLOT(elt, pageidx) = slot;
  279. /*
  280. * now adjust the elt's reference counter and free it if we've
  281. * dropped it to zero.
  282. */
  283. /* an allocation? */
  284. if (slot) {
  285. if (oldslot == 0)
  286. elt->count++;
  287. } else { /* freeing slot ... */
  288. if (oldslot) /* to be safe */
  289. elt->count--;
  290. if (elt->count == 0) {
  291. LIST_REMOVE(elt, list);
  292. pool_put(&uao_swhash_elt_pool, elt);
  293. }
  294. }
  295. } else {
  296. /* we are using an array */
  297. oldslot = aobj->u_swslots[pageidx];
  298. aobj->u_swslots[pageidx] = slot;
  299. }
  300. return (oldslot);
  301. }
  302. /*
  303. * end of hash/array functions
  304. */
  305. /*
  306. * uao_free: free all resources held by an aobj, and then free the aobj
  307. *
  308. * => the aobj should be dead
  309. */
  310. static void
  311. uao_free(struct uvm_aobj *aobj)
  312. {
  313. if (aobj->u_pages > UAO_SWHASH_THRESHOLD) {
  314. int i, hashbuckets = aobj->u_swhashmask + 1;
  315. /*
  316. * free the swslots from each hash bucket,
  317. * then the hash bucket, and finally the hash table itself.
  318. */
  319. for (i = 0; i < hashbuckets; i++) {
  320. struct uao_swhash_elt *elt, *next;
  321. for (elt = LIST_FIRST(&aobj->u_swhash[i]);
  322. elt != NULL;
  323. elt = next) {
  324. int j;
  325. for (j = 0; j < UAO_SWHASH_CLUSTER_SIZE; j++) {
  326. int slot = elt->slots[j];
  327. if (slot == 0) {
  328. continue;
  329. }
  330. uvm_swap_free(slot, 1);
  331. /*
  332. * this page is no longer
  333. * only in swap.
  334. */
  335. uvmexp.swpgonly--;
  336. }
  337. next = LIST_NEXT(elt, list);
  338. pool_put(&uao_swhash_elt_pool, elt);
  339. }
  340. }
  341. free(aobj->u_swhash, M_UVMAOBJ, 0);
  342. } else {
  343. int i;
  344. /* free the array */
  345. for (i = 0; i < aobj->u_pages; i++) {
  346. int slot = aobj->u_swslots[i];
  347. if (slot) {
  348. uvm_swap_free(slot, 1);
  349. /* this page is no longer only in swap. */
  350. uvmexp.swpgonly--;
  351. }
  352. }
  353. free(aobj->u_swslots, M_UVMAOBJ, 0);
  354. }
  355. /* finally free the aobj itself */
  356. pool_put(&uvm_aobj_pool, aobj);
  357. }
  358. /*
  359. * pager functions
  360. */
  361. /*
  362. * Shrink an aobj to a given number of pages. The procedure is always the same:
  363. * assess the necessity of data structure conversion (hash to array), secure
  364. * resources, flush pages and drop swap slots.
  365. *
  366. */
  367. void
  368. uao_shrink_flush(struct uvm_object *uobj, int startpg, int endpg)
  369. {
  370. KASSERT(startpg < endpg);
  371. KASSERT(uobj->uo_refs == 1);
  372. uao_flush(uobj, (voff_t)startpg << PAGE_SHIFT,
  373. (voff_t)endpg << PAGE_SHIFT, PGO_FREE);
  374. uao_dropswap_range(uobj, startpg, endpg);
  375. }
  376. int
  377. uao_shrink_hash(struct uvm_object *uobj, int pages)
  378. {
  379. struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  380. struct uao_swhash *new_swhash;
  381. struct uao_swhash_elt *elt;
  382. unsigned long new_hashmask;
  383. int i;
  384. KASSERT(aobj->u_pages > UAO_SWHASH_THRESHOLD);
  385. /*
  386. * If the size of the hash table doesn't change, all we need to do is
  387. * to adjust the page count.
  388. */
  389. if (UAO_SWHASH_BUCKETS(aobj->u_pages) == UAO_SWHASH_BUCKETS(pages)) {
  390. uao_shrink_flush(uobj, pages, aobj->u_pages);
  391. aobj->u_pages = pages;
  392. return 0;
  393. }
  394. new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ,
  395. M_WAITOK | M_CANFAIL, &new_hashmask);
  396. if (new_swhash == NULL)
  397. return ENOMEM;
  398. uao_shrink_flush(uobj, pages, aobj->u_pages);
  399. /*
  400. * Even though the hash table size is changing, the hash of the buckets
  401. * we are interested in copying should not change.
  402. */
  403. for (i = 0; i < UAO_SWHASH_BUCKETS(aobj->u_pages); i++) {
  404. while (LIST_EMPTY(&aobj->u_swhash[i]) == 0) {
  405. elt = LIST_FIRST(&aobj->u_swhash[i]);
  406. LIST_REMOVE(elt, list);
  407. LIST_INSERT_HEAD(&new_swhash[i], elt, list);
  408. }
  409. }
  410. free(aobj->u_swhash, M_UVMAOBJ, 0);
  411. aobj->u_swhash = new_swhash;
  412. aobj->u_pages = pages;
  413. aobj->u_swhashmask = new_hashmask;
  414. return 0;
  415. }
  416. int
  417. uao_shrink_convert(struct uvm_object *uobj, int pages)
  418. {
  419. struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  420. struct uao_swhash_elt *elt;
  421. int i, *new_swslots;
  422. new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ,
  423. M_WAITOK | M_CANFAIL | M_ZERO);
  424. if (new_swslots == NULL)
  425. return ENOMEM;
  426. uao_shrink_flush(uobj, pages, aobj->u_pages);
  427. /* Convert swap slots from hash to array. */
  428. for (i = 0; i < pages; i++) {
  429. elt = uao_find_swhash_elt(aobj, i, FALSE);
  430. if (elt != NULL) {
  431. new_swslots[i] = UAO_SWHASH_ELT_PAGESLOT(elt, i);
  432. if (new_swslots[i] != 0)
  433. elt->count--;
  434. if (elt->count == 0) {
  435. LIST_REMOVE(elt, list);
  436. pool_put(&uao_swhash_elt_pool, elt);
  437. }
  438. }
  439. }
  440. free(aobj->u_swhash, M_UVMAOBJ, 0);
  441. aobj->u_swslots = new_swslots;
  442. aobj->u_pages = pages;
  443. return 0;
  444. }
  445. int
  446. uao_shrink_array(struct uvm_object *uobj, int pages)
  447. {
  448. struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  449. int i, *new_swslots;
  450. new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ,
  451. M_WAITOK | M_CANFAIL | M_ZERO);
  452. if (new_swslots == NULL)
  453. return ENOMEM;
  454. uao_shrink_flush(uobj, pages, aobj->u_pages);
  455. for (i = 0; i < pages; i++)
  456. new_swslots[i] = aobj->u_swslots[i];
  457. free(aobj->u_swslots, M_UVMAOBJ, 0);
  458. aobj->u_swslots = new_swslots;
  459. aobj->u_pages = pages;
  460. return 0;
  461. }
  462. int
  463. uao_shrink(struct uvm_object *uobj, int pages)
  464. {
  465. struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  466. KASSERT(pages < aobj->u_pages);
  467. /*
  468. * Distinguish between three possible cases:
  469. * 1. aobj uses hash and must be converted to array.
  470. * 2. aobj uses array and array size needs to be adjusted.
  471. * 3. aobj uses hash and hash size needs to be adjusted.
  472. */
  473. if (pages > UAO_SWHASH_THRESHOLD)
  474. return uao_shrink_hash(uobj, pages); /* case 3 */
  475. else if (aobj->u_pages > UAO_SWHASH_THRESHOLD)
  476. return uao_shrink_convert(uobj, pages); /* case 1 */
  477. else
  478. return uao_shrink_array(uobj, pages); /* case 2 */
  479. }
  480. /*
  481. * Grow an aobj to a given number of pages. Right now we only adjust the swap
  482. * slots. We could additionally handle page allocation directly, so that they
  483. * don't happen through uvm_fault(). That would allow us to use another
  484. * mechanism for the swap slots other than malloc(). It is thus mandatory that
  485. * the caller of these functions does not allow faults to happen in case of
  486. * growth error.
  487. */
  488. int
  489. uao_grow_array(struct uvm_object *uobj, int pages)
  490. {
  491. struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  492. int i, *new_swslots;
  493. KASSERT(aobj->u_pages <= UAO_SWHASH_THRESHOLD);
  494. new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ,
  495. M_WAITOK | M_CANFAIL | M_ZERO);
  496. if (new_swslots == NULL)
  497. return ENOMEM;
  498. for (i = 0; i < aobj->u_pages; i++)
  499. new_swslots[i] = aobj->u_swslots[i];
  500. free(aobj->u_swslots, M_UVMAOBJ, 0);
  501. aobj->u_swslots = new_swslots;
  502. aobj->u_pages = pages;
  503. return 0;
  504. }
  505. int
  506. uao_grow_hash(struct uvm_object *uobj, int pages)
  507. {
  508. struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  509. struct uao_swhash *new_swhash;
  510. struct uao_swhash_elt *elt;
  511. unsigned long new_hashmask;
  512. int i;
  513. KASSERT(pages > UAO_SWHASH_THRESHOLD);
  514. /*
  515. * If the size of the hash table doesn't change, all we need to do is
  516. * to adjust the page count.
  517. */
  518. if (UAO_SWHASH_BUCKETS(aobj->u_pages) == UAO_SWHASH_BUCKETS(pages)) {
  519. aobj->u_pages = pages;
  520. return 0;
  521. }
  522. KASSERT(UAO_SWHASH_BUCKETS(aobj->u_pages) < UAO_SWHASH_BUCKETS(pages));
  523. new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ,
  524. M_WAITOK | M_CANFAIL, &new_hashmask);
  525. if (new_swhash == NULL)
  526. return ENOMEM;
  527. for (i = 0; i < UAO_SWHASH_BUCKETS(aobj->u_pages); i++) {
  528. while (LIST_EMPTY(&aobj->u_swhash[i]) == 0) {
  529. elt = LIST_FIRST(&aobj->u_swhash[i]);
  530. LIST_REMOVE(elt, list);
  531. LIST_INSERT_HEAD(&new_swhash[i], elt, list);
  532. }
  533. }
  534. free(aobj->u_swhash, M_UVMAOBJ, 0);
  535. aobj->u_swhash = new_swhash;
  536. aobj->u_pages = pages;
  537. aobj->u_swhashmask = new_hashmask;
  538. return 0;
  539. }
  540. int
  541. uao_grow_convert(struct uvm_object *uobj, int pages)
  542. {
  543. struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  544. struct uao_swhash *new_swhash;
  545. struct uao_swhash_elt *elt;
  546. unsigned long new_hashmask;
  547. int i, *old_swslots;
  548. new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ,
  549. M_WAITOK | M_CANFAIL, &new_hashmask);
  550. if (new_swhash == NULL)
  551. return ENOMEM;
  552. /* Set these now, so we can use uao_find_swhash_elt(). */
  553. old_swslots = aobj->u_swslots;
  554. aobj->u_swhash = new_swhash;
  555. aobj->u_swhashmask = new_hashmask;
  556. for (i = 0; i < aobj->u_pages; i++) {
  557. if (old_swslots[i] != 0) {
  558. elt = uao_find_swhash_elt(aobj, i, TRUE);
  559. elt->count++;
  560. UAO_SWHASH_ELT_PAGESLOT(elt, i) = old_swslots[i];
  561. }
  562. }
  563. free(old_swslots, M_UVMAOBJ, 0);
  564. aobj->u_pages = pages;
  565. return 0;
  566. }
  567. int
  568. uao_grow(struct uvm_object *uobj, int pages)
  569. {
  570. struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  571. KASSERT(pages > aobj->u_pages);
  572. /*
  573. * Distinguish between three possible cases:
  574. * 1. aobj uses hash and hash size needs to be adjusted.
  575. * 2. aobj uses array and array size needs to be adjusted.
  576. * 3. aobj uses array and must be converted to hash.
  577. */
  578. if (pages <= UAO_SWHASH_THRESHOLD)
  579. return uao_grow_array(uobj, pages); /* case 2 */
  580. else if (aobj->u_pages > UAO_SWHASH_THRESHOLD)
  581. return uao_grow_hash(uobj, pages); /* case 1 */
  582. else
  583. return uao_grow_convert(uobj, pages);
  584. }
  585. /*
  586. * uao_create: create an aobj of the given size and return its uvm_object.
  587. *
  588. * => for normal use, flags are zero or UAO_FLAG_CANFAIL.
  589. * => for the kernel object, the flags are:
  590. * UAO_FLAG_KERNOBJ - allocate the kernel object (can only happen once)
  591. * UAO_FLAG_KERNSWAP - enable swapping of kernel object (" ")
  592. */
  593. struct uvm_object *
  594. uao_create(vsize_t size, int flags)
  595. {
  596. static struct uvm_aobj kernel_object_store; /* home of kernel_object */
  597. static int kobj_alloced = 0; /* not allocated yet */
  598. int pages = round_page(size) >> PAGE_SHIFT;
  599. int refs = UVM_OBJ_KERN;
  600. int mflags;
  601. struct uvm_aobj *aobj;
  602. /* malloc a new aobj unless we are asked for the kernel object */
  603. if (flags & UAO_FLAG_KERNOBJ) { /* want kernel object? */
  604. if (kobj_alloced)
  605. panic("uao_create: kernel object already allocated");
  606. aobj = &kernel_object_store;
  607. aobj->u_pages = pages;
  608. aobj->u_flags = UAO_FLAG_NOSWAP; /* no swap to start */
  609. /* we are special, we never die */
  610. kobj_alloced = UAO_FLAG_KERNOBJ;
  611. } else if (flags & UAO_FLAG_KERNSWAP) {
  612. aobj = &kernel_object_store;
  613. if (kobj_alloced != UAO_FLAG_KERNOBJ)
  614. panic("uao_create: asked to enable swap on kernel object");
  615. kobj_alloced = UAO_FLAG_KERNSWAP;
  616. } else { /* normal object */
  617. aobj = pool_get(&uvm_aobj_pool, PR_WAITOK);
  618. aobj->u_pages = pages;
  619. aobj->u_flags = 0; /* normal object */
  620. refs = 1; /* normal object so 1 ref */
  621. }
  622. /* allocate hash/array if necessary */
  623. if (flags == 0 || (flags & (UAO_FLAG_KERNSWAP | UAO_FLAG_CANFAIL))) {
  624. if (flags)
  625. mflags = M_NOWAIT;
  626. else
  627. mflags = M_WAITOK;
  628. /* allocate hash table or array depending on object size */
  629. if (aobj->u_pages > UAO_SWHASH_THRESHOLD) {
  630. aobj->u_swhash = hashinit(UAO_SWHASH_BUCKETS(pages),
  631. M_UVMAOBJ, mflags, &aobj->u_swhashmask);
  632. if (aobj->u_swhash == NULL) {
  633. if (flags & UAO_FLAG_CANFAIL) {
  634. pool_put(&uvm_aobj_pool, aobj);
  635. return (NULL);
  636. }
  637. panic("uao_create: hashinit swhash failed");
  638. }
  639. } else {
  640. aobj->u_swslots = mallocarray(pages, sizeof(int),
  641. M_UVMAOBJ, mflags|M_ZERO);
  642. if (aobj->u_swslots == NULL) {
  643. if (flags & UAO_FLAG_CANFAIL) {
  644. pool_put(&uvm_aobj_pool, aobj);
  645. return (NULL);
  646. }
  647. panic("uao_create: malloc swslots failed");
  648. }
  649. }
  650. if (flags & UAO_FLAG_KERNSWAP) {
  651. aobj->u_flags &= ~UAO_FLAG_NOSWAP; /* clear noswap */
  652. return(&aobj->u_obj);
  653. /* done! */
  654. }
  655. }
  656. uvm_objinit(&aobj->u_obj, &aobj_pager, refs);
  657. /* now that aobj is ready, add it to the global list */
  658. mtx_enter(&uao_list_lock);
  659. LIST_INSERT_HEAD(&uao_list, aobj, u_list);
  660. mtx_leave(&uao_list_lock);
  661. return(&aobj->u_obj);
  662. }
  663. /*
  664. * uao_init: set up aobj pager subsystem
  665. *
  666. * => called at boot time from uvm_pager_init()
  667. */
  668. void
  669. uao_init(void)
  670. {
  671. static int uao_initialized;
  672. if (uao_initialized)
  673. return;
  674. uao_initialized = TRUE;
  675. /*
  676. * NOTE: Pages for this pool must not come from a pageable
  677. * kernel map!
  678. */
  679. pool_init(&uao_swhash_elt_pool, sizeof(struct uao_swhash_elt),
  680. 0, 0, PR_WAITOK, "uaoeltpl", NULL);
  681. pool_init(&uvm_aobj_pool, sizeof(struct uvm_aobj), 0, 0, PR_WAITOK,
  682. "aobjpl", NULL);
  683. }
  684. /*
  685. * uao_reference: add a ref to an aobj
  686. */
  687. void
  688. uao_reference(struct uvm_object *uobj)
  689. {
  690. uao_reference_locked(uobj);
  691. }
  692. /*
  693. * uao_reference_locked: add a ref to an aobj
  694. */
  695. void
  696. uao_reference_locked(struct uvm_object *uobj)
  697. {
  698. /* kernel_object already has plenty of references, leave it alone. */
  699. if (UVM_OBJ_IS_KERN_OBJECT(uobj))
  700. return;
  701. uobj->uo_refs++; /* bump! */
  702. }
  703. /*
  704. * uao_detach: drop a reference to an aobj
  705. */
  706. void
  707. uao_detach(struct uvm_object *uobj)
  708. {
  709. uao_detach_locked(uobj);
  710. }
  711. /*
  712. * uao_detach_locked: drop a reference to an aobj
  713. *
  714. * => aobj may freed upon return.
  715. */
  716. void
  717. uao_detach_locked(struct uvm_object *uobj)
  718. {
  719. struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  720. struct vm_page *pg;
  721. /* detaching from kernel_object is a noop. */
  722. if (UVM_OBJ_IS_KERN_OBJECT(uobj)) {
  723. return;
  724. }
  725. uobj->uo_refs--; /* drop ref! */
  726. if (uobj->uo_refs) { /* still more refs? */
  727. return;
  728. }
  729. /* remove the aobj from the global list. */
  730. mtx_enter(&uao_list_lock);
  731. LIST_REMOVE(aobj, u_list);
  732. mtx_leave(&uao_list_lock);
  733. /*
  734. * Free all pages left in the object. If they're busy, wait
  735. * for them to become available before we kill it.
  736. * Release swap resources then free the page.
  737. */
  738. uvm_lock_pageq();
  739. while((pg = RB_ROOT(&uobj->memt)) != NULL) {
  740. if (pg->pg_flags & PG_BUSY) {
  741. atomic_setbits_int(&pg->pg_flags, PG_WANTED);
  742. uvm_unlock_pageq();
  743. UVM_WAIT(pg, 0, "uao_det", 0);
  744. uvm_lock_pageq();
  745. continue;
  746. }
  747. pmap_page_protect(pg, PROT_NONE);
  748. uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT);
  749. uvm_pagefree(pg);
  750. }
  751. uvm_unlock_pageq();
  752. /* finally, free the rest. */
  753. uao_free(aobj);
  754. }
  755. /*
  756. * uao_flush: "flush" pages out of a uvm object
  757. *
  758. * => if PGO_CLEANIT is not set, then we will not block.
  759. * => if PGO_ALLPAGE is set, then all pages in the object are valid targets
  760. * for flushing.
  761. * => NOTE: we are allowed to lock the page queues, so the caller
  762. * must not be holding the lock on them [e.g. pagedaemon had
  763. * better not call us with the queues locked]
  764. * => we return TRUE unless we encountered some sort of I/O error
  765. * XXXJRT currently never happens, as we never directly initiate
  766. * XXXJRT I/O
  767. */
  768. boolean_t
  769. uao_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
  770. {
  771. struct uvm_aobj *aobj = (struct uvm_aobj *) uobj;
  772. struct vm_page *pp;
  773. voff_t curoff;
  774. if (flags & PGO_ALLPAGES) {
  775. start = 0;
  776. stop = (voff_t)aobj->u_pages << PAGE_SHIFT;
  777. } else {
  778. start = trunc_page(start);
  779. stop = round_page(stop);
  780. if (stop > ((voff_t)aobj->u_pages << PAGE_SHIFT)) {
  781. printf("uao_flush: strange, got an out of range "
  782. "flush (fixed)\n");
  783. stop = (voff_t)aobj->u_pages << PAGE_SHIFT;
  784. }
  785. }
  786. /*
  787. * Don't need to do any work here if we're not freeing
  788. * or deactivating pages.
  789. */
  790. if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0)
  791. return (TRUE);
  792. curoff = start;
  793. for (;;) {
  794. if (curoff < stop) {
  795. pp = uvm_pagelookup(uobj, curoff);
  796. curoff += PAGE_SIZE;
  797. if (pp == NULL)
  798. continue;
  799. } else {
  800. break;
  801. }
  802. /* Make sure page is unbusy, else wait for it. */
  803. if (pp->pg_flags & PG_BUSY) {
  804. atomic_setbits_int(&pp->pg_flags, PG_WANTED);
  805. UVM_WAIT(pp, 0, "uaoflsh", 0);
  806. curoff -= PAGE_SIZE;
  807. continue;
  808. }
  809. switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
  810. /*
  811. * XXX In these first 3 cases, we always just
  812. * XXX deactivate the page. We may want to
  813. * XXX handle the different cases more specifically
  814. * XXX in the future.
  815. */
  816. case PGO_CLEANIT|PGO_FREE:
  817. /* FALLTHROUGH */
  818. case PGO_CLEANIT|PGO_DEACTIVATE:
  819. /* FALLTHROUGH */
  820. case PGO_DEACTIVATE:
  821. deactivate_it:
  822. /* skip the page if it's loaned or wired */
  823. if (pp->loan_count != 0 ||
  824. pp->wire_count != 0)
  825. continue;
  826. uvm_lock_pageq();
  827. /* zap all mappings for the page. */
  828. pmap_page_protect(pp, PROT_NONE);
  829. /* ...and deactivate the page. */
  830. uvm_pagedeactivate(pp);
  831. uvm_unlock_pageq();
  832. continue;
  833. case PGO_FREE:
  834. /*
  835. * If there are multiple references to
  836. * the object, just deactivate the page.
  837. */
  838. if (uobj->uo_refs > 1)
  839. goto deactivate_it;
  840. /* XXX skip the page if it's loaned or wired */
  841. if (pp->loan_count != 0 ||
  842. pp->wire_count != 0)
  843. continue;
  844. /* zap all mappings for the page. */
  845. pmap_page_protect(pp, PROT_NONE);
  846. uao_dropswap(uobj, pp->offset >> PAGE_SHIFT);
  847. uvm_lock_pageq();
  848. uvm_pagefree(pp);
  849. uvm_unlock_pageq();
  850. continue;
  851. default:
  852. panic("uao_flush: weird flags");
  853. }
  854. }
  855. return (TRUE);
  856. }
  857. /*
  858. * uao_get: fetch me a page
  859. *
  860. * we have three cases:
  861. * 1: page is resident -> just return the page.
  862. * 2: page is zero-fill -> allocate a new page and zero it.
  863. * 3: page is swapped out -> fetch the page from swap.
  864. *
  865. * cases 1 and 2 can be handled with PGO_LOCKED, case 3 cannot.
  866. * so, if the "center" page hits case 3 (or any page, with PGO_ALLPAGES),
  867. * then we will need to return VM_PAGER_UNLOCK.
  868. *
  869. * => flags: PGO_ALLPAGES: get all of the pages
  870. * PGO_LOCKED: fault data structures are locked
  871. * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx]
  872. * => NOTE: caller must check for released pages!!
  873. */
  874. static int
  875. uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
  876. int *npagesp, int centeridx, vm_prot_t access_type, int advice, int flags)
  877. {
  878. struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  879. voff_t current_offset;
  880. vm_page_t ptmp;
  881. int lcv, gotpages, maxpages, swslot, rv, pageidx;
  882. boolean_t done;
  883. /* get number of pages */
  884. maxpages = *npagesp;
  885. /* step 1: handled the case where fault data structures are locked. */
  886. if (flags & PGO_LOCKED) {
  887. /* step 1a: get pages that are already resident. */
  888. done = TRUE; /* be optimistic */
  889. gotpages = 0; /* # of pages we got so far */
  890. for (lcv = 0, current_offset = offset ; lcv < maxpages ;
  891. lcv++, current_offset += PAGE_SIZE) {
  892. /* do we care about this page? if not, skip it */
  893. if (pps[lcv] == PGO_DONTCARE)
  894. continue;
  895. ptmp = uvm_pagelookup(uobj, current_offset);
  896. /*
  897. * if page is new, attempt to allocate the page,
  898. * zero-fill'd.
  899. */
  900. if (ptmp == NULL && uao_find_swslot(aobj,
  901. current_offset >> PAGE_SHIFT) == 0) {
  902. ptmp = uvm_pagealloc(uobj, current_offset,
  903. NULL, UVM_PGA_ZERO);
  904. if (ptmp) {
  905. /* new page */
  906. atomic_clearbits_int(&ptmp->pg_flags,
  907. PG_BUSY|PG_FAKE);
  908. atomic_setbits_int(&ptmp->pg_flags,
  909. PQ_AOBJ);
  910. UVM_PAGE_OWN(ptmp, NULL);
  911. }
  912. }
  913. /* to be useful must get a non-busy page */
  914. if (ptmp == NULL ||
  915. (ptmp->pg_flags & PG_BUSY) != 0) {
  916. if (lcv == centeridx ||
  917. (flags & PGO_ALLPAGES) != 0)
  918. /* need to do a wait or I/O! */
  919. done = FALSE;
  920. continue;
  921. }
  922. /*
  923. * useful page: busy it and plug it in our
  924. * result array
  925. */
  926. /* caller must un-busy this page */
  927. atomic_setbits_int(&ptmp->pg_flags, PG_BUSY);
  928. UVM_PAGE_OWN(ptmp, "uao_get1");
  929. pps[lcv] = ptmp;
  930. gotpages++;
  931. }
  932. /*
  933. * step 1b: now we've either done everything needed or we
  934. * to unlock and do some waiting or I/O.
  935. */
  936. *npagesp = gotpages;
  937. if (done)
  938. /* bingo! */
  939. return(VM_PAGER_OK);
  940. else
  941. /* EEK! Need to unlock and I/O */
  942. return(VM_PAGER_UNLOCK);
  943. }
  944. /*
  945. * step 2: get non-resident or busy pages.
  946. * data structures are unlocked.
  947. */
  948. for (lcv = 0, current_offset = offset ; lcv < maxpages ;
  949. lcv++, current_offset += PAGE_SIZE) {
  950. /*
  951. * - skip over pages we've already gotten or don't want
  952. * - skip over pages we don't _have_ to get
  953. */
  954. if (pps[lcv] != NULL ||
  955. (lcv != centeridx && (flags & PGO_ALLPAGES) == 0))
  956. continue;
  957. pageidx = current_offset >> PAGE_SHIFT;
  958. /*
  959. * we have yet to locate the current page (pps[lcv]). we
  960. * first look for a page that is already at the current offset.
  961. * if we find a page, we check to see if it is busy or
  962. * released. if that is the case, then we sleep on the page
  963. * until it is no longer busy or released and repeat the lookup.
  964. * if the page we found is neither busy nor released, then we
  965. * busy it (so we own it) and plug it into pps[lcv]. this
  966. * 'break's the following while loop and indicates we are
  967. * ready to move on to the next page in the "lcv" loop above.
  968. *
  969. * if we exit the while loop with pps[lcv] still set to NULL,
  970. * then it means that we allocated a new busy/fake/clean page
  971. * ptmp in the object and we need to do I/O to fill in the data.
  972. */
  973. /* top of "pps" while loop */
  974. while (pps[lcv] == NULL) {
  975. /* look for a resident page */
  976. ptmp = uvm_pagelookup(uobj, current_offset);
  977. /* not resident? allocate one now (if we can) */
  978. if (ptmp == NULL) {
  979. ptmp = uvm_pagealloc(uobj, current_offset,
  980. NULL, 0);
  981. /* out of RAM? */
  982. if (ptmp == NULL) {
  983. uvm_wait("uao_getpage");
  984. /* goto top of pps while loop */
  985. continue;
  986. }
  987. /*
  988. * safe with PQ's unlocked: because we just
  989. * alloc'd the page
  990. */
  991. atomic_setbits_int(&ptmp->pg_flags, PQ_AOBJ);
  992. /*
  993. * got new page ready for I/O. break pps while
  994. * loop. pps[lcv] is still NULL.
  995. */
  996. break;
  997. }
  998. /* page is there, see if we need to wait on it */
  999. if ((ptmp->pg_flags & PG_BUSY) != 0) {
  1000. atomic_setbits_int(&ptmp->pg_flags, PG_WANTED);
  1001. UVM_WAIT(ptmp, FALSE, "uao_get", 0);
  1002. continue; /* goto top of pps while loop */
  1003. }
  1004. /*
  1005. * if we get here then the page has become resident and
  1006. * unbusy between steps 1 and 2. we busy it now (so we
  1007. * own it) and set pps[lcv] (so that we exit the while
  1008. * loop).
  1009. */
  1010. /* we own it, caller must un-busy */
  1011. atomic_setbits_int(&ptmp->pg_flags, PG_BUSY);
  1012. UVM_PAGE_OWN(ptmp, "uao_get2");
  1013. pps[lcv] = ptmp;
  1014. }
  1015. /*
  1016. * if we own the valid page at the correct offset, pps[lcv] will
  1017. * point to it. nothing more to do except go to the next page.
  1018. */
  1019. if (pps[lcv])
  1020. continue; /* next lcv */
  1021. /*
  1022. * we have a "fake/busy/clean" page that we just allocated.
  1023. * do the needed "i/o", either reading from swap or zeroing.
  1024. */
  1025. swslot = uao_find_swslot(aobj, pageidx);
  1026. /* just zero the page if there's nothing in swap. */
  1027. if (swslot == 0) {
  1028. /* page hasn't existed before, just zero it. */
  1029. uvm_pagezero(ptmp);
  1030. } else {
  1031. /* page in the swapped-out page. */
  1032. rv = uvm_swap_get(ptmp, swslot, PGO_SYNCIO);
  1033. /* I/O done. check for errors. */
  1034. if (rv != VM_PAGER_OK) {
  1035. /*
  1036. * remove the swap slot from the aobj
  1037. * and mark the aobj as having no real slot.
  1038. * don't free the swap slot, thus preventing
  1039. * it from being used again.
  1040. */
  1041. swslot = uao_set_swslot(&aobj->u_obj, pageidx,
  1042. SWSLOT_BAD);
  1043. uvm_swap_markbad(swslot, 1);
  1044. if (ptmp->pg_flags & PG_WANTED)
  1045. wakeup(ptmp);
  1046. atomic_clearbits_int(&ptmp->pg_flags,
  1047. PG_WANTED|PG_BUSY);
  1048. UVM_PAGE_OWN(ptmp, NULL);
  1049. uvm_lock_pageq();
  1050. uvm_pagefree(ptmp);
  1051. uvm_unlock_pageq();
  1052. return (rv);
  1053. }
  1054. }
  1055. /*
  1056. * we got the page! clear the fake flag (indicates valid
  1057. * data now in page) and plug into our result array. note
  1058. * that page is still busy.
  1059. *
  1060. * it is the callers job to:
  1061. * => check if the page is released
  1062. * => unbusy the page
  1063. * => activate the page
  1064. */
  1065. /* data is valid ... */
  1066. atomic_clearbits_int(&ptmp->pg_flags, PG_FAKE);
  1067. pmap_clear_modify(ptmp); /* ... and clean */
  1068. pps[lcv] = ptmp;
  1069. } /* lcv loop */
  1070. return(VM_PAGER_OK);
  1071. }
  1072. /*
  1073. * uao_dropswap: release any swap resources from this aobj page.
  1074. */
  1075. int
  1076. uao_dropswap(struct uvm_object *uobj, int pageidx)
  1077. {
  1078. int slot;
  1079. slot = uao_set_swslot(uobj, pageidx, 0);
  1080. if (slot) {
  1081. uvm_swap_free(slot, 1);
  1082. }
  1083. return (slot);
  1084. }
  1085. /*
  1086. * page in every page in every aobj that is paged-out to a range of swslots.
  1087. *
  1088. * => returns TRUE if pagein was aborted due to lack of memory.
  1089. */
  1090. boolean_t
  1091. uao_swap_off(int startslot, int endslot)
  1092. {
  1093. struct uvm_aobj *aobj, *nextaobj, *prevaobj = NULL;
  1094. /* walk the list of all aobjs. */
  1095. mtx_enter(&uao_list_lock);
  1096. for (aobj = LIST_FIRST(&uao_list);
  1097. aobj != NULL;
  1098. aobj = nextaobj) {
  1099. boolean_t rv;
  1100. /*
  1101. * add a ref to the aobj so it doesn't disappear
  1102. * while we're working.
  1103. */
  1104. uao_reference_locked(&aobj->u_obj);
  1105. /*
  1106. * now it's safe to unlock the uao list.
  1107. * note that lock interleaving is alright with IPL_NONE mutexes.
  1108. */
  1109. mtx_leave(&uao_list_lock);
  1110. if (prevaobj) {
  1111. uao_detach_locked(&prevaobj->u_obj);
  1112. prevaobj = NULL;
  1113. }
  1114. /*
  1115. * page in any pages in the swslot range.
  1116. * if there's an error, abort and return the error.
  1117. */
  1118. rv = uao_pagein(aobj, startslot, endslot);
  1119. if (rv) {
  1120. uao_detach_locked(&aobj->u_obj);
  1121. return rv;
  1122. }
  1123. /*
  1124. * we're done with this aobj.
  1125. * relock the list and drop our ref on the aobj.
  1126. */
  1127. mtx_enter(&uao_list_lock);
  1128. nextaobj = LIST_NEXT(aobj, u_list);
  1129. /*
  1130. * prevaobj means that we have an object that we need
  1131. * to drop a reference for. We can't just drop it now with
  1132. * the list locked since that could cause lock recursion in
  1133. * the case where we reduce the refcount to 0. It will be
  1134. * released the next time we drop the list lock.
  1135. */
  1136. prevaobj = aobj;
  1137. }
  1138. /* done with traversal, unlock the list */
  1139. mtx_leave(&uao_list_lock);
  1140. if (prevaobj) {
  1141. uao_detach_locked(&prevaobj->u_obj);
  1142. }
  1143. return FALSE;
  1144. }
  1145. /*
  1146. * page in any pages from aobj in the given range.
  1147. *
  1148. * => returns TRUE if pagein was aborted due to lack of memory.
  1149. */
  1150. static boolean_t
  1151. uao_pagein(struct uvm_aobj *aobj, int startslot, int endslot)
  1152. {
  1153. boolean_t rv;
  1154. if (aobj->u_pages > UAO_SWHASH_THRESHOLD) {
  1155. struct uao_swhash_elt *elt;
  1156. int bucket;
  1157. restart:
  1158. for (bucket = aobj->u_swhashmask; bucket >= 0; bucket--) {
  1159. for (elt = LIST_FIRST(&aobj->u_swhash[bucket]);
  1160. elt != NULL;
  1161. elt = LIST_NEXT(elt, list)) {
  1162. int i;
  1163. for (i = 0; i < UAO_SWHASH_CLUSTER_SIZE; i++) {
  1164. int slot = elt->slots[i];
  1165. /* if slot isn't in range, skip it. */
  1166. if (slot < startslot ||
  1167. slot >= endslot) {
  1168. continue;
  1169. }
  1170. /*
  1171. * process the page,
  1172. * the start over on this object
  1173. * since the swhash elt
  1174. * may have been freed.
  1175. */
  1176. rv = uao_pagein_page(aobj,
  1177. UAO_SWHASH_ELT_PAGEIDX_BASE(elt) + i);
  1178. if (rv) {
  1179. return rv;
  1180. }
  1181. goto restart;
  1182. }
  1183. }
  1184. }
  1185. } else {
  1186. int i;
  1187. for (i = 0; i < aobj->u_pages; i++) {
  1188. int slot = aobj->u_swslots[i];
  1189. /* if the slot isn't in range, skip it */
  1190. if (slot < startslot || slot >= endslot) {
  1191. continue;
  1192. }
  1193. /* process the page. */
  1194. rv = uao_pagein_page(aobj, i);
  1195. if (rv) {
  1196. return rv;
  1197. }
  1198. }
  1199. }
  1200. return FALSE;
  1201. }
  1202. /*
  1203. * page in a page from an aobj. used for swap_off.
  1204. * returns TRUE if pagein was aborted due to lack of memory.
  1205. */
  1206. static boolean_t
  1207. uao_pagein_page(struct uvm_aobj *aobj, int pageidx)
  1208. {
  1209. struct vm_page *pg;
  1210. int rv, slot, npages;
  1211. pg = NULL;
  1212. npages = 1;
  1213. rv = uao_get(&aobj->u_obj, (voff_t)pageidx << PAGE_SHIFT,
  1214. &pg, &npages, 0, PROT_READ | PROT_WRITE, 0, 0);
  1215. switch (rv) {
  1216. case VM_PAGER_OK:
  1217. break;
  1218. case VM_PAGER_ERROR:
  1219. case VM_PAGER_REFAULT:
  1220. /*
  1221. * nothing more to do on errors.
  1222. * VM_PAGER_REFAULT can only mean that the anon was freed,
  1223. * so again there's nothing to do.
  1224. */
  1225. return FALSE;
  1226. }
  1227. /*
  1228. * ok, we've got the page now.
  1229. * mark it as dirty, clear its swslot and un-busy it.
  1230. */
  1231. slot = uao_set_swslot(&aobj->u_obj, pageidx, 0);
  1232. uvm_swap_free(slot, 1);
  1233. atomic_clearbits_int(&pg->pg_flags, PG_BUSY|PG_CLEAN|PG_FAKE);
  1234. UVM_PAGE_OWN(pg, NULL);
  1235. /* deactivate the page (to put it on a page queue). */
  1236. pmap_clear_reference(pg);
  1237. uvm_lock_pageq();
  1238. uvm_pagedeactivate(pg);
  1239. uvm_unlock_pageq();
  1240. return FALSE;
  1241. }
  1242. /*
  1243. * XXX pedro: Once we are comfortable enough with this function, we can adapt
  1244. * uao_free() to use it.
  1245. *
  1246. * uao_dropswap_range: drop swapslots in the range.
  1247. *
  1248. * => aobj must be locked and is returned locked.
  1249. * => start is inclusive. end is exclusive.
  1250. */
  1251. void
  1252. uao_dropswap_range(struct uvm_object *uobj, voff_t start, voff_t end)
  1253. {
  1254. struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  1255. int swpgonlydelta = 0;
  1256. /* KASSERT(mutex_owned(uobj->vmobjlock)); */
  1257. if (end == 0) {
  1258. end = INT64_MAX;
  1259. }
  1260. if (aobj->u_pages > UAO_SWHASH_THRESHOLD) {
  1261. int i, hashbuckets = aobj->u_swhashmask + 1;
  1262. voff_t taghi;
  1263. voff_t taglo;
  1264. taglo = UAO_SWHASH_ELT_TAG(start);
  1265. taghi = UAO_SWHASH_ELT_TAG(end);
  1266. for (i = 0; i < hashbuckets; i++) {
  1267. struct uao_swhash_elt *elt, *next;
  1268. for (elt = LIST_FIRST(&aobj->u_swhash[i]);
  1269. elt != NULL;
  1270. elt = next) {
  1271. int startidx, endidx;
  1272. int j;
  1273. next = LIST_NEXT(elt, list);
  1274. if (elt->tag < taglo || taghi < elt->tag) {
  1275. continue;
  1276. }
  1277. if (elt->tag == taglo) {
  1278. startidx =
  1279. UAO_SWHASH_ELT_PAGESLOT_IDX(start);
  1280. } else {
  1281. startidx = 0;
  1282. }
  1283. if (elt->tag == taghi) {
  1284. endidx =
  1285. UAO_SWHASH_ELT_PAGESLOT_IDX(end);
  1286. } else {
  1287. endidx = UAO_SWHASH_CLUSTER_SIZE;
  1288. }
  1289. for (j = startidx; j < endidx; j++) {
  1290. int slot = elt->slots[j];
  1291. KASSERT(uvm_pagelookup(&aobj->u_obj,
  1292. (voff_t)(UAO_SWHASH_ELT_PAGEIDX_BASE(elt)
  1293. + j) << PAGE_SHIFT) == NULL);
  1294. if (slot > 0) {
  1295. uvm_swap_free(slot, 1);
  1296. swpgonlydelta++;
  1297. KASSERT(elt->count > 0);
  1298. elt->slots[j] = 0;
  1299. elt->count--;
  1300. }
  1301. }
  1302. if (elt->count == 0) {
  1303. LIST_REMOVE(elt, list);
  1304. pool_put(&uao_swhash_elt_pool, elt);
  1305. }
  1306. }
  1307. }
  1308. } else {
  1309. int i;
  1310. if (aobj->u_pages < end) {
  1311. end = aobj->u_pages;
  1312. }
  1313. for (i = start; i < end; i++) {
  1314. int slot = aobj->u_swslots[i];
  1315. if (slot > 0) {
  1316. uvm_swap_free(slot, 1);
  1317. swpgonlydelta++;
  1318. }
  1319. }
  1320. }
  1321. /*
  1322. * adjust the counter of pages only in swap for all
  1323. * the swap slots we've freed.
  1324. */
  1325. if (swpgonlydelta > 0) {
  1326. KASSERT(uvmexp.swpgonly >= swpgonlydelta);
  1327. uvmexp.swpgonly -= swpgonlydelta;
  1328. }
  1329. }