uvm_pdaemon.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984
  1. /* $OpenBSD: uvm_pdaemon.c,v 1.75 2014/12/17 19:42:15 tedu Exp $ */
  2. /* $NetBSD: uvm_pdaemon.c,v 1.23 2000/08/20 10:24:14 bjh21 Exp $ */
  3. /*
  4. * Copyright (c) 1997 Charles D. Cranor and Washington University.
  5. * Copyright (c) 1991, 1993, The Regents of the University of California.
  6. *
  7. * All rights reserved.
  8. *
  9. * This code is derived from software contributed to Berkeley by
  10. * The Mach Operating System project at Carnegie-Mellon University.
  11. *
  12. * Redistribution and use in source and binary forms, with or without
  13. * modification, are permitted provided that the following conditions
  14. * are met:
  15. * 1. Redistributions of source code must retain the above copyright
  16. * notice, this list of conditions and the following disclaimer.
  17. * 2. Redistributions in binary form must reproduce the above copyright
  18. * notice, this list of conditions and the following disclaimer in the
  19. * documentation and/or other materials provided with the distribution.
  20. * 3. Neither the name of the University nor the names of its contributors
  21. * may be used to endorse or promote products derived from this software
  22. * without specific prior written permission.
  23. *
  24. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  25. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  26. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  27. * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  28. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  29. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  30. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  31. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  32. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  33. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  34. * SUCH DAMAGE.
  35. *
  36. * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94
  37. * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
  38. *
  39. *
  40. * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  41. * All rights reserved.
  42. *
  43. * Permission to use, copy, modify and distribute this software and
  44. * its documentation is hereby granted, provided that both the copyright
  45. * notice and this permission notice appear in all copies of the
  46. * software, derivative works or modified versions, and any portions
  47. * thereof, and that both notices appear in supporting documentation.
  48. *
  49. * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  50. * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  51. * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  52. *
  53. * Carnegie Mellon requests users of this software to return to
  54. *
  55. * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
  56. * School of Computer Science
  57. * Carnegie Mellon University
  58. * Pittsburgh PA 15213-3890
  59. *
  60. * any improvements or extensions that they make and grant Carnegie the
  61. * rights to redistribute these changes.
  62. */
  63. /*
  64. * uvm_pdaemon.c: the page daemon
  65. */
  66. #include <sys/param.h>
  67. #include <sys/systm.h>
  68. #include <sys/kernel.h>
  69. #include <sys/pool.h>
  70. #include <sys/buf.h>
  71. #include <sys/mount.h>
  72. #include <sys/atomic.h>
  73. #ifdef HIBERNATE
  74. #include <sys/hibernate.h>
  75. #endif
  76. #include <uvm/uvm.h>
  77. /*
  78. * UVMPD_NUMDIRTYREACTS is how many dirty pages the pagedaemon will reactivate
  79. * in a pass thru the inactive list when swap is full. the value should be
  80. * "small"... if it's too large we'll cycle the active pages thru the inactive
  81. * queue too quickly to for them to be referenced and avoid being freed.
  82. */
  83. #define UVMPD_NUMDIRTYREACTS 16
  84. /*
  85. * local prototypes
  86. */
  87. void uvmpd_scan(void);
  88. boolean_t uvmpd_scan_inactive(struct pglist *);
  89. void uvmpd_tune(void);
  90. void uvmpd_drop(struct pglist *);
  91. /*
  92. * uvm_wait: wait (sleep) for the page daemon to free some pages
  93. *
  94. * => should be called with all locks released
  95. * => should _not_ be called by the page daemon (to avoid deadlock)
  96. */
  97. void
  98. uvm_wait(const char *wmsg)
  99. {
  100. int timo = 0;
  101. /* check for page daemon going to sleep (waiting for itself) */
  102. if (curproc == uvm.pagedaemon_proc) {
  103. printf("uvm_wait emergency bufbackoff\n");
  104. if (bufbackoff(NULL, 4) == 0)
  105. return;
  106. /*
  107. * now we have a problem: the pagedaemon wants to go to
  108. * sleep until it frees more memory. but how can it
  109. * free more memory if it is asleep? that is a deadlock.
  110. * we have two options:
  111. * [1] panic now
  112. * [2] put a timeout on the sleep, thus causing the
  113. * pagedaemon to only pause (rather than sleep forever)
  114. *
  115. * note that option [2] will only help us if we get lucky
  116. * and some other process on the system breaks the deadlock
  117. * by exiting or freeing memory (thus allowing the pagedaemon
  118. * to continue). for now we panic if DEBUG is defined,
  119. * otherwise we hope for the best with option [2] (better
  120. * yet, this should never happen in the first place!).
  121. */
  122. printf("pagedaemon: deadlock detected!\n");
  123. timo = hz >> 3; /* set timeout */
  124. #if defined(DEBUG)
  125. /* DEBUG: panic so we can debug it */
  126. panic("pagedaemon deadlock");
  127. #endif
  128. }
  129. uvm_lock_fpageq();
  130. wakeup(&uvm.pagedaemon); /* wake the daemon! */
  131. msleep(&uvmexp.free, &uvm.fpageqlock, PVM | PNORELOCK, wmsg, timo);
  132. }
  133. /*
  134. * uvmpd_tune: tune paging parameters
  135. *
  136. * => called whenever memory is added to (or removed from?) the system
  137. * => caller must call with page queues locked
  138. */
  139. void
  140. uvmpd_tune(void)
  141. {
  142. uvmexp.freemin = uvmexp.npages / 30;
  143. /* between 16k and 512k */
  144. /* XXX: what are these values good for? */
  145. uvmexp.freemin = max(uvmexp.freemin, (16*1024) >> PAGE_SHIFT);
  146. #if 0
  147. uvmexp.freemin = min(uvmexp.freemin, (512*1024) >> PAGE_SHIFT);
  148. #endif
  149. /* Make sure there's always a user page free. */
  150. if (uvmexp.freemin < uvmexp.reserve_kernel + 1)
  151. uvmexp.freemin = uvmexp.reserve_kernel + 1;
  152. uvmexp.freetarg = (uvmexp.freemin * 4) / 3;
  153. if (uvmexp.freetarg <= uvmexp.freemin)
  154. uvmexp.freetarg = uvmexp.freemin + 1;
  155. /* uvmexp.inactarg: computed in main daemon loop */
  156. uvmexp.wiredmax = uvmexp.npages / 3;
  157. }
  158. /*
  159. * uvm_pageout: the main loop for the pagedaemon
  160. */
  161. void
  162. uvm_pageout(void *arg)
  163. {
  164. struct uvm_constraint_range constraint;
  165. struct uvm_pmalloc *pma;
  166. int work_done;
  167. int npages = 0;
  168. /* ensure correct priority and set paging parameters... */
  169. uvm.pagedaemon_proc = curproc;
  170. (void) spl0();
  171. uvm_lock_pageq();
  172. npages = uvmexp.npages;
  173. uvmpd_tune();
  174. uvm_unlock_pageq();
  175. for (;;) {
  176. long size;
  177. work_done = 0; /* No work done this iteration. */
  178. uvm_lock_fpageq();
  179. if (TAILQ_EMPTY(&uvm.pmr_control.allocs)) {
  180. msleep(&uvm.pagedaemon, &uvm.fpageqlock, PVM,
  181. "pgdaemon", 0);
  182. uvmexp.pdwoke++;
  183. }
  184. if ((pma = TAILQ_FIRST(&uvm.pmr_control.allocs)) != NULL) {
  185. pma->pm_flags |= UVM_PMA_BUSY;
  186. constraint = pma->pm_constraint;
  187. } else
  188. constraint = no_constraint;
  189. uvm_unlock_fpageq();
  190. /* now lock page queues and recompute inactive count */
  191. uvm_lock_pageq();
  192. if (npages != uvmexp.npages) { /* check for new pages? */
  193. npages = uvmexp.npages;
  194. uvmpd_tune();
  195. }
  196. uvmexp.inactarg = (uvmexp.active + uvmexp.inactive) / 3;
  197. if (uvmexp.inactarg <= uvmexp.freetarg) {
  198. uvmexp.inactarg = uvmexp.freetarg + 1;
  199. }
  200. /* Reclaim pages from the buffer cache if possible. */
  201. size = 0;
  202. if (pma != NULL)
  203. size += pma->pm_size >> PAGE_SHIFT;
  204. if (uvmexp.free - BUFPAGES_DEFICIT < uvmexp.freetarg)
  205. size += uvmexp.freetarg - (uvmexp.free -
  206. BUFPAGES_DEFICIT);
  207. (void) bufbackoff(&constraint, size * 2);
  208. /* Scan if needed to meet our targets. */
  209. if (pma != NULL ||
  210. ((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freetarg) ||
  211. ((uvmexp.inactive + BUFPAGES_INACT) < uvmexp.inactarg)) {
  212. uvmpd_scan();
  213. work_done = 1; /* XXX we hope... */
  214. }
  215. /*
  216. * if there's any free memory to be had,
  217. * wake up any waiters.
  218. */
  219. uvm_lock_fpageq();
  220. if (uvmexp.free > uvmexp.reserve_kernel ||
  221. uvmexp.paging == 0) {
  222. wakeup(&uvmexp.free);
  223. }
  224. if (pma != NULL) {
  225. pma->pm_flags &= ~UVM_PMA_BUSY;
  226. if (!work_done)
  227. pma->pm_flags |= UVM_PMA_FAIL;
  228. if (pma->pm_flags & (UVM_PMA_FAIL | UVM_PMA_FREED)) {
  229. pma->pm_flags &= ~UVM_PMA_LINKED;
  230. TAILQ_REMOVE(&uvm.pmr_control.allocs, pma,
  231. pmq);
  232. }
  233. wakeup(pma);
  234. }
  235. uvm_unlock_fpageq();
  236. /* scan done. unlock page queues (only lock we are holding) */
  237. uvm_unlock_pageq();
  238. sched_pause();
  239. }
  240. /*NOTREACHED*/
  241. }
  242. /*
  243. * uvm_aiodone_daemon: main loop for the aiodone daemon.
  244. */
  245. void
  246. uvm_aiodone_daemon(void *arg)
  247. {
  248. int s, free;
  249. struct buf *bp, *nbp;
  250. uvm.aiodoned_proc = curproc;
  251. for (;;) {
  252. /*
  253. * Check for done aio structures. If we've got structures to
  254. * process, do so. Otherwise sleep while avoiding races.
  255. */
  256. mtx_enter(&uvm.aiodoned_lock);
  257. while ((bp = TAILQ_FIRST(&uvm.aio_done)) == NULL)
  258. msleep(&uvm.aiodoned, &uvm.aiodoned_lock,
  259. PVM, "aiodoned", 0);
  260. /* Take the list for ourselves. */
  261. TAILQ_INIT(&uvm.aio_done);
  262. mtx_leave(&uvm.aiodoned_lock);
  263. /* process each i/o that's done. */
  264. free = uvmexp.free;
  265. while (bp != NULL) {
  266. if (bp->b_flags & B_PDAEMON) {
  267. uvmexp.paging -= bp->b_bufsize >> PAGE_SHIFT;
  268. }
  269. nbp = TAILQ_NEXT(bp, b_freelist);
  270. s = splbio(); /* b_iodone must by called at splbio */
  271. (*bp->b_iodone)(bp);
  272. splx(s);
  273. bp = nbp;
  274. sched_pause();
  275. }
  276. uvm_lock_fpageq();
  277. wakeup(free <= uvmexp.reserve_kernel ? &uvm.pagedaemon :
  278. &uvmexp.free);
  279. uvm_unlock_fpageq();
  280. }
  281. }
  282. /*
  283. * uvmpd_scan_inactive: scan an inactive list for pages to clean or free.
  284. *
  285. * => called with page queues locked
  286. * => we work on meeting our free target by converting inactive pages
  287. * into free pages.
  288. * => we handle the building of swap-backed clusters
  289. * => we return TRUE if we are exiting because we met our target
  290. */
  291. boolean_t
  292. uvmpd_scan_inactive(struct pglist *pglst)
  293. {
  294. boolean_t retval = FALSE; /* assume we haven't hit target */
  295. int free, result;
  296. struct vm_page *p, *nextpg;
  297. struct uvm_object *uobj;
  298. struct vm_page *pps[MAXBSIZE >> PAGE_SHIFT], **ppsp;
  299. int npages;
  300. struct vm_page *swpps[MAXBSIZE >> PAGE_SHIFT]; /* XXX: see below */
  301. int swnpages, swcpages; /* XXX: see below */
  302. int swslot;
  303. struct vm_anon *anon;
  304. boolean_t swap_backed;
  305. vaddr_t start;
  306. int dirtyreacts;
  307. /*
  308. * note: we currently keep swap-backed pages on a separate inactive
  309. * list from object-backed pages. however, merging the two lists
  310. * back together again hasn't been ruled out. thus, we keep our
  311. * swap cluster in "swpps" rather than in pps (allows us to mix
  312. * clustering types in the event of a mixed inactive queue).
  313. */
  314. /*
  315. * swslot is non-zero if we are building a swap cluster. we want
  316. * to stay in the loop while we have a page to scan or we have
  317. * a swap-cluster to build.
  318. */
  319. swslot = 0;
  320. swnpages = swcpages = 0;
  321. free = 0;
  322. dirtyreacts = 0;
  323. for (p = TAILQ_FIRST(pglst); p != NULL || swslot != 0; p = nextpg) {
  324. /*
  325. * note that p can be NULL iff we have traversed the whole
  326. * list and need to do one final swap-backed clustered pageout.
  327. */
  328. uobj = NULL;
  329. anon = NULL;
  330. if (p) {
  331. /*
  332. * update our copy of "free" and see if we've met
  333. * our target
  334. */
  335. free = uvmexp.free - BUFPAGES_DEFICIT;
  336. if (free + uvmexp.paging >= uvmexp.freetarg << 2 ||
  337. dirtyreacts == UVMPD_NUMDIRTYREACTS) {
  338. retval = TRUE;
  339. if (swslot == 0) {
  340. /* exit now if no swap-i/o pending */
  341. break;
  342. }
  343. /* set p to null to signal final swap i/o */
  344. p = NULL;
  345. }
  346. }
  347. if (p) { /* if (we have a new page to consider) */
  348. /*
  349. * we are below target and have a new page to consider.
  350. */
  351. uvmexp.pdscans++;
  352. nextpg = TAILQ_NEXT(p, pageq);
  353. /*
  354. * move referenced pages back to active queue and
  355. * skip to next page (unlikely to happen since
  356. * inactive pages shouldn't have any valid mappings
  357. * and we cleared reference before deactivating).
  358. */
  359. if (pmap_is_referenced(p)) {
  360. uvm_pageactivate(p);
  361. uvmexp.pdreact++;
  362. continue;
  363. }
  364. /*
  365. * the only time we expect to see an ownerless page
  366. * (i.e. a page with no uobject and !PQ_ANON) is if an
  367. * anon has loaned a page from a uvm_object and the
  368. * uvm_object has dropped the ownership. in that
  369. * case, the anon can "take over" the loaned page
  370. * and make it its own.
  371. */
  372. /* is page part of an anon or ownerless ? */
  373. if ((p->pg_flags & PQ_ANON) || p->uobject == NULL) {
  374. anon = p->uanon;
  375. KASSERT(anon != NULL);
  376. /*
  377. * if the page is ownerless, claim it in the
  378. * name of "anon"!
  379. */
  380. if ((p->pg_flags & PQ_ANON) == 0) {
  381. KASSERT(p->loan_count > 0);
  382. p->loan_count--;
  383. atomic_setbits_int(&p->pg_flags,
  384. PQ_ANON);
  385. /* anon now owns it */
  386. }
  387. if (p->pg_flags & PG_BUSY) {
  388. uvmexp.pdbusy++;
  389. /* someone else owns page, skip it */
  390. continue;
  391. }
  392. uvmexp.pdanscan++;
  393. } else {
  394. uobj = p->uobject;
  395. KASSERT(uobj != NULL);
  396. if (p->pg_flags & PG_BUSY) {
  397. uvmexp.pdbusy++;
  398. /* someone else owns page, skip it */
  399. continue;
  400. }
  401. uvmexp.pdobscan++;
  402. }
  403. /*
  404. * we now have the page queues locked.
  405. * the page is not busy. if the page is clean we
  406. * can free it now and continue.
  407. */
  408. if (p->pg_flags & PG_CLEAN) {
  409. if (p->pg_flags & PQ_SWAPBACKED) {
  410. /* this page now lives only in swap */
  411. uvmexp.swpgonly++;
  412. }
  413. /* zap all mappings with pmap_page_protect... */
  414. pmap_page_protect(p, PROT_NONE);
  415. uvm_pagefree(p);
  416. uvmexp.pdfreed++;
  417. if (anon) {
  418. /*
  419. * an anonymous page can only be clean
  420. * if it has backing store assigned.
  421. */
  422. KASSERT(anon->an_swslot != 0);
  423. /* remove from object */
  424. anon->an_page = NULL;
  425. }
  426. continue;
  427. }
  428. /*
  429. * this page is dirty, skip it if we'll have met our
  430. * free target when all the current pageouts complete.
  431. */
  432. if (free + uvmexp.paging > uvmexp.freetarg << 2) {
  433. continue;
  434. }
  435. /*
  436. * this page is dirty, but we can't page it out
  437. * since all pages in swap are only in swap.
  438. * reactivate it so that we eventually cycle
  439. * all pages thru the inactive queue.
  440. */
  441. KASSERT(uvmexp.swpgonly <= uvmexp.swpages);
  442. if ((p->pg_flags & PQ_SWAPBACKED) &&
  443. uvmexp.swpgonly == uvmexp.swpages) {
  444. dirtyreacts++;
  445. uvm_pageactivate(p);
  446. continue;
  447. }
  448. /*
  449. * if the page is swap-backed and dirty and swap space
  450. * is full, free any swap allocated to the page
  451. * so that other pages can be paged out.
  452. */
  453. KASSERT(uvmexp.swpginuse <= uvmexp.swpages);
  454. if ((p->pg_flags & PQ_SWAPBACKED) &&
  455. uvmexp.swpginuse == uvmexp.swpages) {
  456. if ((p->pg_flags & PQ_ANON) &&
  457. p->uanon->an_swslot) {
  458. uvm_swap_free(p->uanon->an_swslot, 1);
  459. p->uanon->an_swslot = 0;
  460. }
  461. if (p->pg_flags & PQ_AOBJ) {
  462. uao_dropswap(p->uobject,
  463. p->offset >> PAGE_SHIFT);
  464. }
  465. }
  466. /*
  467. * the page we are looking at is dirty. we must
  468. * clean it before it can be freed. to do this we
  469. * first mark the page busy so that no one else will
  470. * touch the page. we write protect all the mappings
  471. * of the page so that no one touches it while it is
  472. * in I/O.
  473. */
  474. swap_backed = ((p->pg_flags & PQ_SWAPBACKED) != 0);
  475. atomic_setbits_int(&p->pg_flags, PG_BUSY);
  476. UVM_PAGE_OWN(p, "scan_inactive");
  477. pmap_page_protect(p, PROT_READ);
  478. uvmexp.pgswapout++;
  479. /*
  480. * for swap-backed pages we need to (re)allocate
  481. * swap space.
  482. */
  483. if (swap_backed) {
  484. /* free old swap slot (if any) */
  485. if (anon) {
  486. if (anon->an_swslot) {
  487. uvm_swap_free(anon->an_swslot,
  488. 1);
  489. anon->an_swslot = 0;
  490. }
  491. } else {
  492. uao_dropswap(uobj,
  493. p->offset >> PAGE_SHIFT);
  494. }
  495. /* start new cluster (if necessary) */
  496. if (swslot == 0) {
  497. swnpages = MAXBSIZE >> PAGE_SHIFT;
  498. swslot = uvm_swap_alloc(&swnpages,
  499. TRUE);
  500. if (swslot == 0) {
  501. /* no swap? give up! */
  502. atomic_clearbits_int(
  503. &p->pg_flags,
  504. PG_BUSY);
  505. UVM_PAGE_OWN(p, NULL);
  506. continue;
  507. }
  508. swcpages = 0; /* cluster is empty */
  509. }
  510. /* add block to cluster */
  511. swpps[swcpages] = p;
  512. if (anon)
  513. anon->an_swslot = swslot + swcpages;
  514. else
  515. uao_set_swslot(uobj,
  516. p->offset >> PAGE_SHIFT,
  517. swslot + swcpages);
  518. swcpages++;
  519. }
  520. } else {
  521. /* if p == NULL we must be doing a last swap i/o */
  522. swap_backed = TRUE;
  523. }
  524. /*
  525. * now consider doing the pageout.
  526. *
  527. * for swap-backed pages, we do the pageout if we have either
  528. * filled the cluster (in which case (swnpages == swcpages) or
  529. * run out of pages (p == NULL).
  530. *
  531. * for object pages, we always do the pageout.
  532. */
  533. if (swap_backed) {
  534. if (p) { /* if we just added a page to cluster */
  535. /* cluster not full yet? */
  536. if (swcpages < swnpages)
  537. continue;
  538. }
  539. /* starting I/O now... set up for it */
  540. npages = swcpages;
  541. ppsp = swpps;
  542. /* for swap-backed pages only */
  543. start = (vaddr_t) swslot;
  544. /* if this is final pageout we could have a few
  545. * extra swap blocks */
  546. if (swcpages < swnpages) {
  547. uvm_swap_free(swslot + swcpages,
  548. (swnpages - swcpages));
  549. }
  550. } else {
  551. /* normal object pageout */
  552. ppsp = pps;
  553. npages = sizeof(pps) / sizeof(struct vm_page *);
  554. /* not looked at because PGO_ALLPAGES is set */
  555. start = 0;
  556. }
  557. /*
  558. * now do the pageout.
  559. *
  560. * for swap_backed pages we have already built the cluster.
  561. * for !swap_backed pages, uvm_pager_put will call the object's
  562. * "make put cluster" function to build a cluster on our behalf.
  563. *
  564. * we pass the PGO_PDFREECLUST flag to uvm_pager_put to instruct
  565. * it to free the cluster pages for us on a successful I/O (it
  566. * always does this for un-successful I/O requests). this
  567. * allows us to do clustered pageout without having to deal
  568. * with cluster pages at this level.
  569. *
  570. * note locking semantics of uvm_pager_put with PGO_PDFREECLUST:
  571. * IN: locked: page queues
  572. * OUT: locked:
  573. * !locked: pageqs
  574. */
  575. uvmexp.pdpageouts++;
  576. result = uvm_pager_put(swap_backed ? NULL : uobj, p,
  577. &ppsp, &npages, PGO_ALLPAGES|PGO_PDFREECLUST, start, 0);
  578. /*
  579. * if we did i/o to swap, zero swslot to indicate that we are
  580. * no longer building a swap-backed cluster.
  581. */
  582. if (swap_backed)
  583. swslot = 0; /* done with this cluster */
  584. /*
  585. * first, we check for VM_PAGER_PEND which means that the
  586. * async I/O is in progress and the async I/O done routine
  587. * will clean up after us. in this case we move on to the
  588. * next page.
  589. *
  590. * there is a very remote chance that the pending async i/o can
  591. * finish _before_ we get here. if that happens, our page "p"
  592. * may no longer be on the inactive queue. so we verify this
  593. * when determining the next page (starting over at the head if
  594. * we've lost our inactive page).
  595. */
  596. if (result == VM_PAGER_PEND) {
  597. uvmexp.paging += npages;
  598. uvm_lock_pageq();
  599. uvmexp.pdpending++;
  600. if (p) {
  601. if (p->pg_flags & PQ_INACTIVE)
  602. nextpg = TAILQ_NEXT(p, pageq);
  603. else
  604. nextpg = TAILQ_FIRST(pglst);
  605. } else {
  606. nextpg = NULL;
  607. }
  608. continue;
  609. }
  610. /* clean up "p" if we have one */
  611. if (p) {
  612. /*
  613. * the I/O request to "p" is done and uvm_pager_put
  614. * has freed any cluster pages it may have allocated
  615. * during I/O. all that is left for us to do is
  616. * clean up page "p" (which is still PG_BUSY).
  617. *
  618. * our result could be one of the following:
  619. * VM_PAGER_OK: successful pageout
  620. *
  621. * VM_PAGER_AGAIN: tmp resource shortage, we skip
  622. * to next page
  623. * VM_PAGER_{FAIL,ERROR,BAD}: an error. we
  624. * "reactivate" page to get it out of the way (it
  625. * will eventually drift back into the inactive
  626. * queue for a retry).
  627. * VM_PAGER_UNLOCK: should never see this as it is
  628. * only valid for "get" operations
  629. */
  630. /* relock p's object: page queues not lock yet, so
  631. * no need for "try" */
  632. #ifdef DIAGNOSTIC
  633. if (result == VM_PAGER_UNLOCK)
  634. panic("pagedaemon: pageout returned "
  635. "invalid 'unlock' code");
  636. #endif
  637. /* handle PG_WANTED now */
  638. if (p->pg_flags & PG_WANTED)
  639. wakeup(p);
  640. atomic_clearbits_int(&p->pg_flags, PG_BUSY|PG_WANTED);
  641. UVM_PAGE_OWN(p, NULL);
  642. /* released during I/O? Can only happen for anons */
  643. if (p->pg_flags & PG_RELEASED) {
  644. KASSERT(anon != NULL);
  645. /*
  646. * remove page so we can get nextpg,
  647. * also zero out anon so we don't use
  648. * it after the free.
  649. */
  650. anon->an_page = NULL;
  651. p->uanon = NULL;
  652. uvm_anfree(anon); /* kills anon */
  653. pmap_page_protect(p, PROT_NONE);
  654. anon = NULL;
  655. uvm_lock_pageq();
  656. nextpg = TAILQ_NEXT(p, pageq);
  657. /* free released page */
  658. uvm_pagefree(p);
  659. } else { /* page was not released during I/O */
  660. uvm_lock_pageq();
  661. nextpg = TAILQ_NEXT(p, pageq);
  662. if (result != VM_PAGER_OK) {
  663. /* pageout was a failure... */
  664. if (result != VM_PAGER_AGAIN)
  665. uvm_pageactivate(p);
  666. pmap_clear_reference(p);
  667. /* XXXCDC: if (swap_backed) FREE p's
  668. * swap block? */
  669. } else {
  670. /* pageout was a success... */
  671. pmap_clear_reference(p);
  672. pmap_clear_modify(p);
  673. atomic_setbits_int(&p->pg_flags,
  674. PG_CLEAN);
  675. }
  676. }
  677. /*
  678. * drop object lock (if there is an object left). do
  679. * a safety check of nextpg to make sure it is on the
  680. * inactive queue (it should be since PG_BUSY pages on
  681. * the inactive queue can't be re-queued [note: not
  682. * true for active queue]).
  683. */
  684. if (nextpg && (nextpg->pg_flags & PQ_INACTIVE) == 0) {
  685. nextpg = TAILQ_FIRST(pglst); /* reload! */
  686. }
  687. } else {
  688. /*
  689. * if p is null in this loop, make sure it stays null
  690. * in the next loop.
  691. */
  692. nextpg = NULL;
  693. /*
  694. * lock page queues here just so they're always locked
  695. * at the end of the loop.
  696. */
  697. uvm_lock_pageq();
  698. }
  699. }
  700. return (retval);
  701. }
  702. /*
  703. * uvmpd_scan: scan the page queues and attempt to meet our targets.
  704. *
  705. * => called with pageq's locked
  706. */
  707. void
  708. uvmpd_scan(void)
  709. {
  710. int free, inactive_shortage, swap_shortage, pages_freed;
  711. struct vm_page *p, *nextpg;
  712. struct uvm_object *uobj;
  713. boolean_t got_it;
  714. uvmexp.pdrevs++; /* counter */
  715. uobj = NULL;
  716. /*
  717. * get current "free" page count
  718. */
  719. free = uvmexp.free - BUFPAGES_DEFICIT;
  720. #ifndef __SWAP_BROKEN
  721. /*
  722. * swap out some processes if we are below our free target.
  723. * we need to unlock the page queues for this.
  724. */
  725. if (free < uvmexp.freetarg) {
  726. uvmexp.pdswout++;
  727. uvm_unlock_pageq();
  728. uvm_swapout_threads();
  729. uvm_lock_pageq();
  730. }
  731. #endif
  732. /*
  733. * now we want to work on meeting our targets. first we work on our
  734. * free target by converting inactive pages into free pages. then
  735. * we work on meeting our inactive target by converting active pages
  736. * to inactive ones.
  737. */
  738. /*
  739. * alternate starting queue between swap and object based on the
  740. * low bit of uvmexp.pdrevs (which we bump by one each call).
  741. */
  742. got_it = FALSE;
  743. pages_freed = uvmexp.pdfreed; /* XXX - int */
  744. if ((uvmexp.pdrevs & 1) != 0 && uvmexp.nswapdev != 0)
  745. got_it = uvmpd_scan_inactive(&uvm.page_inactive_swp);
  746. if (!got_it)
  747. got_it = uvmpd_scan_inactive(&uvm.page_inactive_obj);
  748. if (!got_it && (uvmexp.pdrevs & 1) == 0 && uvmexp.nswapdev != 0)
  749. (void) uvmpd_scan_inactive(&uvm.page_inactive_swp);
  750. pages_freed = uvmexp.pdfreed - pages_freed;
  751. /*
  752. * we have done the scan to get free pages. now we work on meeting
  753. * our inactive target.
  754. */
  755. inactive_shortage = uvmexp.inactarg - uvmexp.inactive - BUFPAGES_INACT;
  756. /*
  757. * detect if we're not going to be able to page anything out
  758. * until we free some swap resources from active pages.
  759. */
  760. swap_shortage = 0;
  761. if (uvmexp.free < uvmexp.freetarg &&
  762. uvmexp.swpginuse == uvmexp.swpages &&
  763. uvmexp.swpgonly < uvmexp.swpages &&
  764. pages_freed == 0) {
  765. swap_shortage = uvmexp.freetarg - uvmexp.free;
  766. }
  767. for (p = TAILQ_FIRST(&uvm.page_active);
  768. p != NULL && (inactive_shortage > 0 || swap_shortage > 0);
  769. p = nextpg) {
  770. nextpg = TAILQ_NEXT(p, pageq);
  771. if (p->pg_flags & PG_BUSY)
  772. continue;
  773. /* is page anon owned or ownerless? */
  774. if ((p->pg_flags & PQ_ANON) || p->uobject == NULL) {
  775. KASSERT(p->uanon != NULL);
  776. /* take over the page? */
  777. if ((p->pg_flags & PQ_ANON) == 0) {
  778. KASSERT(p->loan_count > 0);
  779. p->loan_count--;
  780. atomic_setbits_int(&p->pg_flags, PQ_ANON);
  781. }
  782. }
  783. /* skip this page if it's busy. */
  784. if ((p->pg_flags & PG_BUSY) != 0) {
  785. continue;
  786. }
  787. /*
  788. * if there's a shortage of swap, free any swap allocated
  789. * to this page so that other pages can be paged out.
  790. */
  791. if (swap_shortage > 0) {
  792. if ((p->pg_flags & PQ_ANON) && p->uanon->an_swslot) {
  793. uvm_swap_free(p->uanon->an_swslot, 1);
  794. p->uanon->an_swslot = 0;
  795. atomic_clearbits_int(&p->pg_flags, PG_CLEAN);
  796. swap_shortage--;
  797. }
  798. if (p->pg_flags & PQ_AOBJ) {
  799. int slot = uao_set_swslot(p->uobject,
  800. p->offset >> PAGE_SHIFT, 0);
  801. if (slot) {
  802. uvm_swap_free(slot, 1);
  803. atomic_clearbits_int(&p->pg_flags,
  804. PG_CLEAN);
  805. swap_shortage--;
  806. }
  807. }
  808. }
  809. /*
  810. * deactivate this page if there's a shortage of
  811. * inactive pages.
  812. */
  813. if (inactive_shortage > 0) {
  814. pmap_page_protect(p, PROT_NONE);
  815. /* no need to check wire_count as pg is "active" */
  816. uvm_pagedeactivate(p);
  817. uvmexp.pddeact++;
  818. inactive_shortage--;
  819. }
  820. }
  821. }
  822. #ifdef HIBERNATE
  823. /*
  824. * uvmpd_drop: drop clean pages from list
  825. */
  826. void
  827. uvmpd_drop(struct pglist *pglst)
  828. {
  829. struct vm_page *p, *nextpg;
  830. for (p = TAILQ_FIRST(pglst); p != NULL; p = nextpg) {
  831. nextpg = TAILQ_NEXT(p, pageq);
  832. if (p->pg_flags & PQ_ANON || p->uobject == NULL)
  833. continue;
  834. if (p->pg_flags & PG_BUSY)
  835. continue;
  836. if (p->pg_flags & PG_CLEAN) {
  837. /*
  838. * we now have the page queues locked.
  839. * the page is not busy. if the page is clean we
  840. * can free it now and continue.
  841. */
  842. if (p->pg_flags & PG_CLEAN) {
  843. if (p->pg_flags & PQ_SWAPBACKED) {
  844. /* this page now lives only in swap */
  845. uvmexp.swpgonly++;
  846. }
  847. /* zap all mappings with pmap_page_protect... */
  848. pmap_page_protect(p, PROT_NONE);
  849. uvm_pagefree(p);
  850. }
  851. }
  852. }
  853. }
  854. void
  855. uvmpd_hibernate(void)
  856. {
  857. uvm_lock_pageq();
  858. uvmpd_drop(&uvm.page_inactive_swp);
  859. uvmpd_drop(&uvm.page_inactive_obj);
  860. uvmpd_drop(&uvm.page_active);
  861. uvm_unlock_pageq();
  862. }
  863. #endif