vm_object.c 78 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863
  1. /*-
  2. * SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU)
  3. *
  4. * Copyright (c) 1991, 1993
  5. * The Regents of the University of California. All rights reserved.
  6. *
  7. * This code is derived from software contributed to Berkeley by
  8. * The Mach Operating System project at Carnegie-Mellon University.
  9. *
  10. * Redistribution and use in source and binary forms, with or without
  11. * modification, are permitted provided that the following conditions
  12. * are met:
  13. * 1. Redistributions of source code must retain the above copyright
  14. * notice, this list of conditions and the following disclaimer.
  15. * 2. Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in the
  17. * documentation and/or other materials provided with the distribution.
  18. * 3. Neither the name of the University nor the names of its contributors
  19. * may be used to endorse or promote products derived from this software
  20. * without specific prior written permission.
  21. *
  22. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  23. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  24. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  25. * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  26. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  27. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  28. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  29. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  30. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  31. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32. * SUCH DAMAGE.
  33. *
  34. *
  35. * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  36. * All rights reserved.
  37. *
  38. * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  39. *
  40. * Permission to use, copy, modify and distribute this software and
  41. * its documentation is hereby granted, provided that both the copyright
  42. * notice and this permission notice appear in all copies of the
  43. * software, derivative works or modified versions, and any portions
  44. * thereof, and that both notices appear in supporting documentation.
  45. *
  46. * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  47. * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  48. * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  49. *
  50. * Carnegie Mellon requests users of this software to return to
  51. *
  52. * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
  53. * School of Computer Science
  54. * Carnegie Mellon University
  55. * Pittsburgh PA 15213-3890
  56. *
  57. * any improvements or extensions that they make and grant Carnegie the
  58. * rights to redistribute these changes.
  59. */
  60. /*
  61. * Virtual memory object module.
  62. */
  63. #include "opt_vm.h"
  64. #include <sys/systm.h>
  65. #include <sys/blockcount.h>
  66. #include <sys/cpuset.h>
  67. #include <sys/jail.h>
  68. #include <sys/limits.h>
  69. #include <sys/lock.h>
  70. #include <sys/mman.h>
  71. #include <sys/mount.h>
  72. #include <sys/kernel.h>
  73. #include <sys/mutex.h>
  74. #include <sys/pctrie.h>
  75. #include <sys/proc.h>
  76. #include <sys/refcount.h>
  77. #include <sys/sx.h>
  78. #include <sys/sysctl.h>
  79. #include <sys/resourcevar.h>
  80. #include <sys/refcount.h>
  81. #include <sys/rwlock.h>
  82. #include <sys/user.h>
  83. #include <sys/vnode.h>
  84. #include <sys/vmmeter.h>
  85. #include <vm/vm.h>
  86. #include <vm/vm_param.h>
  87. #include <vm/pmap.h>
  88. #include <vm/vm_map.h>
  89. #include <vm/vm_object.h>
  90. #include <vm/vm_page.h>
  91. #include <vm/vm_pageout.h>
  92. #include <vm/vm_pager.h>
  93. #include <vm/vm_phys.h>
  94. #include <vm/vm_pagequeue.h>
  95. #include <vm/swap_pager.h>
  96. #include <vm/vm_kern.h>
  97. #include <vm/vm_extern.h>
  98. #include <vm/vm_radix.h>
  99. #include <vm/vm_reserv.h>
  100. #include <vm/uma.h>
  101. static int old_msync;
  102. SYSCTL_INT(_vm, OID_AUTO, old_msync, CTLFLAG_RW, &old_msync, 0,
  103. "Use old (insecure) msync behavior");
  104. static int vm_object_page_collect_flush(vm_object_t object, vm_page_t p,
  105. int pagerflags, int flags, boolean_t *allclean,
  106. boolean_t *eio);
  107. static boolean_t vm_object_page_remove_write(vm_page_t p, int flags,
  108. boolean_t *allclean);
  109. static void vm_object_backing_remove(vm_object_t object);
  110. /*
  111. * Virtual memory objects maintain the actual data
  112. * associated with allocated virtual memory. A given
  113. * page of memory exists within exactly one object.
  114. *
  115. * An object is only deallocated when all "references"
  116. * are given up. Only one "reference" to a given
  117. * region of an object should be writeable.
  118. *
  119. * Associated with each object is a list of all resident
  120. * memory pages belonging to that object; this list is
  121. * maintained by the "vm_page" module, and locked by the object's
  122. * lock.
  123. *
  124. * Each object also records a "pager" routine which is
  125. * used to retrieve (and store) pages to the proper backing
  126. * storage. In addition, objects may be backed by other
  127. * objects from which they were virtual-copied.
  128. *
  129. * The only items within the object structure which are
  130. * modified after time of creation are:
  131. * reference count locked by object's lock
  132. * pager routine locked by object's lock
  133. *
  134. */
  135. struct object_q vm_object_list;
  136. struct mtx vm_object_list_mtx; /* lock for object list and count */
  137. struct vm_object kernel_object_store;
  138. static SYSCTL_NODE(_vm_stats, OID_AUTO, object, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
  139. "VM object stats");
  140. static COUNTER_U64_DEFINE_EARLY(object_collapses);
  141. SYSCTL_COUNTER_U64(_vm_stats_object, OID_AUTO, collapses, CTLFLAG_RD,
  142. &object_collapses,
  143. "VM object collapses");
  144. static COUNTER_U64_DEFINE_EARLY(object_bypasses);
  145. SYSCTL_COUNTER_U64(_vm_stats_object, OID_AUTO, bypasses, CTLFLAG_RD,
  146. &object_bypasses,
  147. "VM object bypasses");
  148. static COUNTER_U64_DEFINE_EARLY(object_collapse_waits);
  149. SYSCTL_COUNTER_U64(_vm_stats_object, OID_AUTO, collapse_waits, CTLFLAG_RD,
  150. &object_collapse_waits,
  151. "Number of sleeps for collapse");
  152. static uma_zone_t obj_zone;
  153. static int vm_object_zinit(void *mem, int size, int flags);
  154. #ifdef INVARIANTS
  155. static void vm_object_zdtor(void *mem, int size, void *arg);
  156. static void
  157. vm_object_zdtor(void *mem, int size, void *arg)
  158. {
  159. vm_object_t object;
  160. object = (vm_object_t)mem;
  161. KASSERT(object->ref_count == 0,
  162. ("object %p ref_count = %d", object, object->ref_count));
  163. KASSERT(TAILQ_EMPTY(&object->memq),
  164. ("object %p has resident pages in its memq", object));
  165. KASSERT(vm_radix_is_empty(&object->rtree),
  166. ("object %p has resident pages in its trie", object));
  167. #if VM_NRESERVLEVEL > 0
  168. KASSERT(LIST_EMPTY(&object->rvq),
  169. ("object %p has reservations",
  170. object));
  171. #endif
  172. KASSERT(!vm_object_busied(object),
  173. ("object %p busy = %d", object, blockcount_read(&object->busy)));
  174. KASSERT(object->resident_page_count == 0,
  175. ("object %p resident_page_count = %d",
  176. object, object->resident_page_count));
  177. KASSERT(atomic_load_int(&object->shadow_count) == 0,
  178. ("object %p shadow_count = %d",
  179. object, atomic_load_int(&object->shadow_count)));
  180. KASSERT(object->type == OBJT_DEAD,
  181. ("object %p has non-dead type %d",
  182. object, object->type));
  183. KASSERT(object->charge == 0 && object->cred == NULL,
  184. ("object %p has non-zero charge %ju (%p)",
  185. object, (uintmax_t)object->charge, object->cred));
  186. }
  187. #endif
  188. static int
  189. vm_object_zinit(void *mem, int size, int flags)
  190. {
  191. vm_object_t object;
  192. object = (vm_object_t)mem;
  193. rw_init_flags(&object->lock, "vmobject", RW_DUPOK | RW_NEW);
  194. /* These are true for any object that has been freed */
  195. object->type = OBJT_DEAD;
  196. vm_radix_init(&object->rtree);
  197. refcount_init(&object->ref_count, 0);
  198. blockcount_init(&object->paging_in_progress);
  199. blockcount_init(&object->busy);
  200. object->resident_page_count = 0;
  201. atomic_store_int(&object->shadow_count, 0);
  202. object->flags = OBJ_DEAD;
  203. mtx_lock(&vm_object_list_mtx);
  204. TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
  205. mtx_unlock(&vm_object_list_mtx);
  206. return (0);
  207. }
  208. static void
  209. _vm_object_allocate(objtype_t type, vm_pindex_t size, u_short flags,
  210. vm_object_t object, void *handle)
  211. {
  212. TAILQ_INIT(&object->memq);
  213. LIST_INIT(&object->shadow_head);
  214. object->type = type;
  215. object->flags = flags;
  216. if ((flags & OBJ_SWAP) != 0) {
  217. pctrie_init(&object->un_pager.swp.swp_blks);
  218. object->un_pager.swp.writemappings = 0;
  219. }
  220. /*
  221. * Ensure that swap_pager_swapoff() iteration over object_list
  222. * sees up to date type and pctrie head if it observed
  223. * non-dead object.
  224. */
  225. atomic_thread_fence_rel();
  226. object->pg_color = 0;
  227. object->size = size;
  228. object->domain.dr_policy = NULL;
  229. object->generation = 1;
  230. object->cleangeneration = 1;
  231. refcount_init(&object->ref_count, 1);
  232. object->memattr = VM_MEMATTR_DEFAULT;
  233. object->cred = NULL;
  234. object->charge = 0;
  235. object->handle = handle;
  236. object->backing_object = NULL;
  237. object->backing_object_offset = (vm_ooffset_t) 0;
  238. #if VM_NRESERVLEVEL > 0
  239. LIST_INIT(&object->rvq);
  240. #endif
  241. umtx_shm_object_init(object);
  242. }
  243. /*
  244. * vm_object_init:
  245. *
  246. * Initialize the VM objects module.
  247. */
  248. void
  249. vm_object_init(void)
  250. {
  251. TAILQ_INIT(&vm_object_list);
  252. mtx_init(&vm_object_list_mtx, "vm object_list", NULL, MTX_DEF);
  253. rw_init(&kernel_object->lock, "kernel vm object");
  254. vm_radix_init(&kernel_object->rtree);
  255. _vm_object_allocate(OBJT_PHYS, atop(VM_MAX_KERNEL_ADDRESS -
  256. VM_MIN_KERNEL_ADDRESS), OBJ_UNMANAGED, kernel_object, NULL);
  257. #if VM_NRESERVLEVEL > 0
  258. kernel_object->flags |= OBJ_COLORED;
  259. kernel_object->pg_color = (u_short)atop(VM_MIN_KERNEL_ADDRESS);
  260. #endif
  261. kernel_object->un_pager.phys.ops = &default_phys_pg_ops;
  262. /*
  263. * The lock portion of struct vm_object must be type stable due
  264. * to vm_pageout_fallback_object_lock locking a vm object
  265. * without holding any references to it.
  266. *
  267. * paging_in_progress is valid always. Lockless references to
  268. * the objects may acquire pip and then check OBJ_DEAD.
  269. */
  270. obj_zone = uma_zcreate("VM OBJECT", sizeof (struct vm_object), NULL,
  271. #ifdef INVARIANTS
  272. vm_object_zdtor,
  273. #else
  274. NULL,
  275. #endif
  276. vm_object_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
  277. vm_radix_zinit();
  278. }
  279. void
  280. vm_object_clear_flag(vm_object_t object, u_short bits)
  281. {
  282. VM_OBJECT_ASSERT_WLOCKED(object);
  283. object->flags &= ~bits;
  284. }
  285. /*
  286. * Sets the default memory attribute for the specified object. Pages
  287. * that are allocated to this object are by default assigned this memory
  288. * attribute.
  289. *
  290. * Presently, this function must be called before any pages are allocated
  291. * to the object. In the future, this requirement may be relaxed for
  292. * "default" and "swap" objects.
  293. */
  294. int
  295. vm_object_set_memattr(vm_object_t object, vm_memattr_t memattr)
  296. {
  297. VM_OBJECT_ASSERT_WLOCKED(object);
  298. if (object->type == OBJT_DEAD)
  299. return (KERN_INVALID_ARGUMENT);
  300. if (!TAILQ_EMPTY(&object->memq))
  301. return (KERN_FAILURE);
  302. object->memattr = memattr;
  303. return (KERN_SUCCESS);
  304. }
  305. void
  306. vm_object_pip_add(vm_object_t object, short i)
  307. {
  308. if (i > 0)
  309. blockcount_acquire(&object->paging_in_progress, i);
  310. }
  311. void
  312. vm_object_pip_wakeup(vm_object_t object)
  313. {
  314. vm_object_pip_wakeupn(object, 1);
  315. }
  316. void
  317. vm_object_pip_wakeupn(vm_object_t object, short i)
  318. {
  319. if (i > 0)
  320. blockcount_release(&object->paging_in_progress, i);
  321. }
  322. /*
  323. * Atomically drop the object lock and wait for pip to drain. This protects
  324. * from sleep/wakeup races due to identity changes. The lock is not re-acquired
  325. * on return.
  326. */
  327. static void
  328. vm_object_pip_sleep(vm_object_t object, const char *waitid)
  329. {
  330. (void)blockcount_sleep(&object->paging_in_progress, &object->lock,
  331. waitid, PVM | PDROP);
  332. }
  333. void
  334. vm_object_pip_wait(vm_object_t object, const char *waitid)
  335. {
  336. VM_OBJECT_ASSERT_WLOCKED(object);
  337. blockcount_wait(&object->paging_in_progress, &object->lock, waitid,
  338. PVM);
  339. }
  340. void
  341. vm_object_pip_wait_unlocked(vm_object_t object, const char *waitid)
  342. {
  343. VM_OBJECT_ASSERT_UNLOCKED(object);
  344. blockcount_wait(&object->paging_in_progress, NULL, waitid, PVM);
  345. }
  346. /*
  347. * vm_object_allocate:
  348. *
  349. * Returns a new object with the given size.
  350. */
  351. vm_object_t
  352. vm_object_allocate(objtype_t type, vm_pindex_t size)
  353. {
  354. vm_object_t object;
  355. u_short flags;
  356. switch (type) {
  357. case OBJT_DEAD:
  358. panic("vm_object_allocate: can't create OBJT_DEAD");
  359. case OBJT_SWAP:
  360. flags = OBJ_COLORED | OBJ_SWAP;
  361. break;
  362. case OBJT_DEVICE:
  363. case OBJT_SG:
  364. flags = OBJ_FICTITIOUS | OBJ_UNMANAGED;
  365. break;
  366. case OBJT_MGTDEVICE:
  367. flags = OBJ_FICTITIOUS;
  368. break;
  369. case OBJT_PHYS:
  370. flags = OBJ_UNMANAGED;
  371. break;
  372. case OBJT_VNODE:
  373. flags = 0;
  374. break;
  375. default:
  376. panic("vm_object_allocate: type %d is undefined or dynamic",
  377. type);
  378. }
  379. object = (vm_object_t)uma_zalloc(obj_zone, M_WAITOK);
  380. _vm_object_allocate(type, size, flags, object, NULL);
  381. return (object);
  382. }
  383. vm_object_t
  384. vm_object_allocate_dyn(objtype_t dyntype, vm_pindex_t size, u_short flags)
  385. {
  386. vm_object_t object;
  387. MPASS(dyntype >= OBJT_FIRST_DYN /* && dyntype < nitems(pagertab) */);
  388. object = (vm_object_t)uma_zalloc(obj_zone, M_WAITOK);
  389. _vm_object_allocate(dyntype, size, flags, object, NULL);
  390. return (object);
  391. }
  392. /*
  393. * vm_object_allocate_anon:
  394. *
  395. * Returns a new default object of the given size and marked as
  396. * anonymous memory for special split/collapse handling. Color
  397. * to be initialized by the caller.
  398. */
  399. vm_object_t
  400. vm_object_allocate_anon(vm_pindex_t size, vm_object_t backing_object,
  401. struct ucred *cred, vm_size_t charge)
  402. {
  403. vm_object_t handle, object;
  404. if (backing_object == NULL)
  405. handle = NULL;
  406. else if ((backing_object->flags & OBJ_ANON) != 0)
  407. handle = backing_object->handle;
  408. else
  409. handle = backing_object;
  410. object = uma_zalloc(obj_zone, M_WAITOK);
  411. _vm_object_allocate(OBJT_SWAP, size,
  412. OBJ_ANON | OBJ_ONEMAPPING | OBJ_SWAP, object, handle);
  413. object->cred = cred;
  414. object->charge = cred != NULL ? charge : 0;
  415. return (object);
  416. }
  417. static void
  418. vm_object_reference_vnode(vm_object_t object)
  419. {
  420. u_int old;
  421. /*
  422. * vnode objects need the lock for the first reference
  423. * to serialize with vnode_object_deallocate().
  424. */
  425. if (!refcount_acquire_if_gt(&object->ref_count, 0)) {
  426. VM_OBJECT_RLOCK(object);
  427. old = refcount_acquire(&object->ref_count);
  428. if (object->type == OBJT_VNODE && old == 0)
  429. vref(object->handle);
  430. VM_OBJECT_RUNLOCK(object);
  431. }
  432. }
  433. /*
  434. * vm_object_reference:
  435. *
  436. * Acquires a reference to the given object.
  437. */
  438. void
  439. vm_object_reference(vm_object_t object)
  440. {
  441. if (object == NULL)
  442. return;
  443. if (object->type == OBJT_VNODE)
  444. vm_object_reference_vnode(object);
  445. else
  446. refcount_acquire(&object->ref_count);
  447. KASSERT((object->flags & OBJ_DEAD) == 0,
  448. ("vm_object_reference: Referenced dead object."));
  449. }
  450. /*
  451. * vm_object_reference_locked:
  452. *
  453. * Gets another reference to the given object.
  454. *
  455. * The object must be locked.
  456. */
  457. void
  458. vm_object_reference_locked(vm_object_t object)
  459. {
  460. u_int old;
  461. VM_OBJECT_ASSERT_LOCKED(object);
  462. old = refcount_acquire(&object->ref_count);
  463. if (object->type == OBJT_VNODE && old == 0)
  464. vref(object->handle);
  465. KASSERT((object->flags & OBJ_DEAD) == 0,
  466. ("vm_object_reference: Referenced dead object."));
  467. }
  468. /*
  469. * Handle deallocating an object of type OBJT_VNODE.
  470. */
  471. static void
  472. vm_object_deallocate_vnode(vm_object_t object)
  473. {
  474. struct vnode *vp = (struct vnode *) object->handle;
  475. bool last;
  476. KASSERT(object->type == OBJT_VNODE,
  477. ("vm_object_deallocate_vnode: not a vnode object"));
  478. KASSERT(vp != NULL, ("vm_object_deallocate_vnode: missing vp"));
  479. /* Object lock to protect handle lookup. */
  480. last = refcount_release(&object->ref_count);
  481. VM_OBJECT_RUNLOCK(object);
  482. if (!last)
  483. return;
  484. if (!umtx_shm_vnobj_persistent)
  485. umtx_shm_object_terminated(object);
  486. /* vrele may need the vnode lock. */
  487. vrele(vp);
  488. }
  489. /*
  490. * We dropped a reference on an object and discovered that it had a
  491. * single remaining shadow. This is a sibling of the reference we
  492. * dropped. Attempt to collapse the sibling and backing object.
  493. */
  494. static vm_object_t
  495. vm_object_deallocate_anon(vm_object_t backing_object)
  496. {
  497. vm_object_t object;
  498. /* Fetch the final shadow. */
  499. object = LIST_FIRST(&backing_object->shadow_head);
  500. KASSERT(object != NULL &&
  501. atomic_load_int(&backing_object->shadow_count) == 1,
  502. ("vm_object_anon_deallocate: ref_count: %d, shadow_count: %d",
  503. backing_object->ref_count,
  504. atomic_load_int(&backing_object->shadow_count)));
  505. KASSERT((object->flags & OBJ_ANON) != 0,
  506. ("invalid shadow object %p", object));
  507. if (!VM_OBJECT_TRYWLOCK(object)) {
  508. /*
  509. * Prevent object from disappearing since we do not have a
  510. * reference.
  511. */
  512. vm_object_pip_add(object, 1);
  513. VM_OBJECT_WUNLOCK(backing_object);
  514. VM_OBJECT_WLOCK(object);
  515. vm_object_pip_wakeup(object);
  516. } else
  517. VM_OBJECT_WUNLOCK(backing_object);
  518. /*
  519. * Check for a collapse/terminate race with the last reference holder.
  520. */
  521. if ((object->flags & (OBJ_DEAD | OBJ_COLLAPSING)) != 0 ||
  522. !refcount_acquire_if_not_zero(&object->ref_count)) {
  523. VM_OBJECT_WUNLOCK(object);
  524. return (NULL);
  525. }
  526. backing_object = object->backing_object;
  527. if (backing_object != NULL && (backing_object->flags & OBJ_ANON) != 0)
  528. vm_object_collapse(object);
  529. VM_OBJECT_WUNLOCK(object);
  530. return (object);
  531. }
  532. /*
  533. * vm_object_deallocate:
  534. *
  535. * Release a reference to the specified object,
  536. * gained either through a vm_object_allocate
  537. * or a vm_object_reference call. When all references
  538. * are gone, storage associated with this object
  539. * may be relinquished.
  540. *
  541. * No object may be locked.
  542. */
  543. void
  544. vm_object_deallocate(vm_object_t object)
  545. {
  546. vm_object_t temp;
  547. bool released;
  548. while (object != NULL) {
  549. /*
  550. * If the reference count goes to 0 we start calling
  551. * vm_object_terminate() on the object chain. A ref count
  552. * of 1 may be a special case depending on the shadow count
  553. * being 0 or 1. These cases require a write lock on the
  554. * object.
  555. */
  556. if ((object->flags & OBJ_ANON) == 0)
  557. released = refcount_release_if_gt(&object->ref_count, 1);
  558. else
  559. released = refcount_release_if_gt(&object->ref_count, 2);
  560. if (released)
  561. return;
  562. if (object->type == OBJT_VNODE) {
  563. VM_OBJECT_RLOCK(object);
  564. if (object->type == OBJT_VNODE) {
  565. vm_object_deallocate_vnode(object);
  566. return;
  567. }
  568. VM_OBJECT_RUNLOCK(object);
  569. }
  570. VM_OBJECT_WLOCK(object);
  571. KASSERT(object->ref_count > 0,
  572. ("vm_object_deallocate: object deallocated too many times: %d",
  573. object->type));
  574. /*
  575. * If this is not the final reference to an anonymous
  576. * object we may need to collapse the shadow chain.
  577. */
  578. if (!refcount_release(&object->ref_count)) {
  579. if (object->ref_count > 1 ||
  580. atomic_load_int(&object->shadow_count) == 0) {
  581. if ((object->flags & OBJ_ANON) != 0 &&
  582. object->ref_count == 1)
  583. vm_object_set_flag(object,
  584. OBJ_ONEMAPPING);
  585. VM_OBJECT_WUNLOCK(object);
  586. return;
  587. }
  588. /* Handle collapsing last ref on anonymous objects. */
  589. object = vm_object_deallocate_anon(object);
  590. continue;
  591. }
  592. /*
  593. * Handle the final reference to an object. We restart
  594. * the loop with the backing object to avoid recursion.
  595. */
  596. umtx_shm_object_terminated(object);
  597. temp = object->backing_object;
  598. if (temp != NULL) {
  599. KASSERT(object->type == OBJT_SWAP,
  600. ("shadowed tmpfs v_object 2 %p", object));
  601. vm_object_backing_remove(object);
  602. }
  603. KASSERT((object->flags & OBJ_DEAD) == 0,
  604. ("vm_object_deallocate: Terminating dead object."));
  605. vm_object_set_flag(object, OBJ_DEAD);
  606. vm_object_terminate(object);
  607. object = temp;
  608. }
  609. }
  610. void
  611. vm_object_destroy(vm_object_t object)
  612. {
  613. uma_zfree(obj_zone, object);
  614. }
  615. static void
  616. vm_object_sub_shadow(vm_object_t object)
  617. {
  618. KASSERT(object->shadow_count >= 1,
  619. ("object %p sub_shadow count zero", object));
  620. atomic_subtract_int(&object->shadow_count, 1);
  621. }
  622. static void
  623. vm_object_backing_remove_locked(vm_object_t object)
  624. {
  625. vm_object_t backing_object;
  626. backing_object = object->backing_object;
  627. VM_OBJECT_ASSERT_WLOCKED(object);
  628. VM_OBJECT_ASSERT_WLOCKED(backing_object);
  629. KASSERT((object->flags & OBJ_COLLAPSING) == 0,
  630. ("vm_object_backing_remove: Removing collapsing object."));
  631. vm_object_sub_shadow(backing_object);
  632. if ((object->flags & OBJ_SHADOWLIST) != 0) {
  633. LIST_REMOVE(object, shadow_list);
  634. vm_object_clear_flag(object, OBJ_SHADOWLIST);
  635. }
  636. object->backing_object = NULL;
  637. }
  638. static void
  639. vm_object_backing_remove(vm_object_t object)
  640. {
  641. vm_object_t backing_object;
  642. VM_OBJECT_ASSERT_WLOCKED(object);
  643. backing_object = object->backing_object;
  644. if ((object->flags & OBJ_SHADOWLIST) != 0) {
  645. VM_OBJECT_WLOCK(backing_object);
  646. vm_object_backing_remove_locked(object);
  647. VM_OBJECT_WUNLOCK(backing_object);
  648. } else {
  649. object->backing_object = NULL;
  650. vm_object_sub_shadow(backing_object);
  651. }
  652. }
  653. static void
  654. vm_object_backing_insert_locked(vm_object_t object, vm_object_t backing_object)
  655. {
  656. VM_OBJECT_ASSERT_WLOCKED(object);
  657. atomic_add_int(&backing_object->shadow_count, 1);
  658. if ((backing_object->flags & OBJ_ANON) != 0) {
  659. VM_OBJECT_ASSERT_WLOCKED(backing_object);
  660. LIST_INSERT_HEAD(&backing_object->shadow_head, object,
  661. shadow_list);
  662. vm_object_set_flag(object, OBJ_SHADOWLIST);
  663. }
  664. object->backing_object = backing_object;
  665. }
  666. static void
  667. vm_object_backing_insert(vm_object_t object, vm_object_t backing_object)
  668. {
  669. VM_OBJECT_ASSERT_WLOCKED(object);
  670. if ((backing_object->flags & OBJ_ANON) != 0) {
  671. VM_OBJECT_WLOCK(backing_object);
  672. vm_object_backing_insert_locked(object, backing_object);
  673. VM_OBJECT_WUNLOCK(backing_object);
  674. } else {
  675. object->backing_object = backing_object;
  676. atomic_add_int(&backing_object->shadow_count, 1);
  677. }
  678. }
  679. /*
  680. * Insert an object into a backing_object's shadow list with an additional
  681. * reference to the backing_object added.
  682. */
  683. static void
  684. vm_object_backing_insert_ref(vm_object_t object, vm_object_t backing_object)
  685. {
  686. VM_OBJECT_ASSERT_WLOCKED(object);
  687. if ((backing_object->flags & OBJ_ANON) != 0) {
  688. VM_OBJECT_WLOCK(backing_object);
  689. KASSERT((backing_object->flags & OBJ_DEAD) == 0,
  690. ("shadowing dead anonymous object"));
  691. vm_object_reference_locked(backing_object);
  692. vm_object_backing_insert_locked(object, backing_object);
  693. vm_object_clear_flag(backing_object, OBJ_ONEMAPPING);
  694. VM_OBJECT_WUNLOCK(backing_object);
  695. } else {
  696. vm_object_reference(backing_object);
  697. atomic_add_int(&backing_object->shadow_count, 1);
  698. object->backing_object = backing_object;
  699. }
  700. }
  701. /*
  702. * Transfer a backing reference from backing_object to object.
  703. */
  704. static void
  705. vm_object_backing_transfer(vm_object_t object, vm_object_t backing_object)
  706. {
  707. vm_object_t new_backing_object;
  708. /*
  709. * Note that the reference to backing_object->backing_object
  710. * moves from within backing_object to within object.
  711. */
  712. vm_object_backing_remove_locked(object);
  713. new_backing_object = backing_object->backing_object;
  714. if (new_backing_object == NULL)
  715. return;
  716. if ((new_backing_object->flags & OBJ_ANON) != 0) {
  717. VM_OBJECT_WLOCK(new_backing_object);
  718. vm_object_backing_remove_locked(backing_object);
  719. vm_object_backing_insert_locked(object, new_backing_object);
  720. VM_OBJECT_WUNLOCK(new_backing_object);
  721. } else {
  722. /*
  723. * shadow_count for new_backing_object is left
  724. * unchanged, its reference provided by backing_object
  725. * is replaced by object.
  726. */
  727. object->backing_object = new_backing_object;
  728. backing_object->backing_object = NULL;
  729. }
  730. }
  731. /*
  732. * Wait for a concurrent collapse to settle.
  733. */
  734. static void
  735. vm_object_collapse_wait(vm_object_t object)
  736. {
  737. VM_OBJECT_ASSERT_WLOCKED(object);
  738. while ((object->flags & OBJ_COLLAPSING) != 0) {
  739. vm_object_pip_wait(object, "vmcolwait");
  740. counter_u64_add(object_collapse_waits, 1);
  741. }
  742. }
  743. /*
  744. * Waits for a backing object to clear a pending collapse and returns
  745. * it locked if it is an ANON object.
  746. */
  747. static vm_object_t
  748. vm_object_backing_collapse_wait(vm_object_t object)
  749. {
  750. vm_object_t backing_object;
  751. VM_OBJECT_ASSERT_WLOCKED(object);
  752. for (;;) {
  753. backing_object = object->backing_object;
  754. if (backing_object == NULL ||
  755. (backing_object->flags & OBJ_ANON) == 0)
  756. return (NULL);
  757. VM_OBJECT_WLOCK(backing_object);
  758. if ((backing_object->flags & (OBJ_DEAD | OBJ_COLLAPSING)) == 0)
  759. break;
  760. VM_OBJECT_WUNLOCK(object);
  761. vm_object_pip_sleep(backing_object, "vmbckwait");
  762. counter_u64_add(object_collapse_waits, 1);
  763. VM_OBJECT_WLOCK(object);
  764. }
  765. return (backing_object);
  766. }
  767. /*
  768. * vm_object_terminate_pages removes any remaining pageable pages
  769. * from the object and resets the object to an empty state.
  770. */
  771. static void
  772. vm_object_terminate_pages(vm_object_t object)
  773. {
  774. vm_page_t p, p_next;
  775. VM_OBJECT_ASSERT_WLOCKED(object);
  776. /*
  777. * Free any remaining pageable pages. This also removes them from the
  778. * paging queues. However, don't free wired pages, just remove them
  779. * from the object. Rather than incrementally removing each page from
  780. * the object, the page and object are reset to any empty state.
  781. */
  782. TAILQ_FOREACH_SAFE(p, &object->memq, listq, p_next) {
  783. vm_page_assert_unbusied(p);
  784. KASSERT(p->object == object &&
  785. (p->ref_count & VPRC_OBJREF) != 0,
  786. ("vm_object_terminate_pages: page %p is inconsistent", p));
  787. p->object = NULL;
  788. if (vm_page_drop(p, VPRC_OBJREF) == VPRC_OBJREF) {
  789. VM_CNT_INC(v_pfree);
  790. vm_page_free(p);
  791. }
  792. }
  793. /*
  794. * If the object contained any pages, then reset it to an empty state.
  795. * None of the object's fields, including "resident_page_count", were
  796. * modified by the preceding loop.
  797. */
  798. if (object->resident_page_count != 0) {
  799. vm_radix_reclaim_allnodes(&object->rtree);
  800. TAILQ_INIT(&object->memq);
  801. object->resident_page_count = 0;
  802. if (object->type == OBJT_VNODE)
  803. vdrop(object->handle);
  804. }
  805. }
  806. /*
  807. * vm_object_terminate actually destroys the specified object, freeing
  808. * up all previously used resources.
  809. *
  810. * The object must be locked.
  811. * This routine may block.
  812. */
  813. void
  814. vm_object_terminate(vm_object_t object)
  815. {
  816. VM_OBJECT_ASSERT_WLOCKED(object);
  817. KASSERT((object->flags & OBJ_DEAD) != 0,
  818. ("terminating non-dead obj %p", object));
  819. KASSERT((object->flags & OBJ_COLLAPSING) == 0,
  820. ("terminating collapsing obj %p", object));
  821. KASSERT(object->backing_object == NULL,
  822. ("terminating shadow obj %p", object));
  823. /*
  824. * Wait for the pageout daemon and other current users to be
  825. * done with the object. Note that new paging_in_progress
  826. * users can come after this wait, but they must check
  827. * OBJ_DEAD flag set (without unlocking the object), and avoid
  828. * the object being terminated.
  829. */
  830. vm_object_pip_wait(object, "objtrm");
  831. KASSERT(object->ref_count == 0,
  832. ("vm_object_terminate: object with references, ref_count=%d",
  833. object->ref_count));
  834. if ((object->flags & OBJ_PG_DTOR) == 0)
  835. vm_object_terminate_pages(object);
  836. #if VM_NRESERVLEVEL > 0
  837. if (__predict_false(!LIST_EMPTY(&object->rvq)))
  838. vm_reserv_break_all(object);
  839. #endif
  840. KASSERT(object->cred == NULL || (object->flags & OBJ_SWAP) != 0,
  841. ("%s: non-swap obj %p has cred", __func__, object));
  842. /*
  843. * Let the pager know object is dead.
  844. */
  845. vm_pager_deallocate(object);
  846. VM_OBJECT_WUNLOCK(object);
  847. vm_object_destroy(object);
  848. }
  849. /*
  850. * Make the page read-only so that we can clear the object flags. However, if
  851. * this is a nosync mmap then the object is likely to stay dirty so do not
  852. * mess with the page and do not clear the object flags. Returns TRUE if the
  853. * page should be flushed, and FALSE otherwise.
  854. */
  855. static boolean_t
  856. vm_object_page_remove_write(vm_page_t p, int flags, boolean_t *allclean)
  857. {
  858. vm_page_assert_busied(p);
  859. /*
  860. * If we have been asked to skip nosync pages and this is a
  861. * nosync page, skip it. Note that the object flags were not
  862. * cleared in this case so we do not have to set them.
  863. */
  864. if ((flags & OBJPC_NOSYNC) != 0 && (p->a.flags & PGA_NOSYNC) != 0) {
  865. *allclean = FALSE;
  866. return (FALSE);
  867. } else {
  868. pmap_remove_write(p);
  869. return (p->dirty != 0);
  870. }
  871. }
  872. /*
  873. * vm_object_page_clean
  874. *
  875. * Clean all dirty pages in the specified range of object. Leaves page
  876. * on whatever queue it is currently on. If NOSYNC is set then do not
  877. * write out pages with PGA_NOSYNC set (originally comes from MAP_NOSYNC),
  878. * leaving the object dirty.
  879. *
  880. * For swap objects backing tmpfs regular files, do not flush anything,
  881. * but remove write protection on the mapped pages to update mtime through
  882. * mmaped writes.
  883. *
  884. * When stuffing pages asynchronously, allow clustering. XXX we need a
  885. * synchronous clustering mode implementation.
  886. *
  887. * Odd semantics: if start == end, we clean everything.
  888. *
  889. * The object must be locked.
  890. *
  891. * Returns FALSE if some page from the range was not written, as
  892. * reported by the pager, and TRUE otherwise.
  893. */
  894. boolean_t
  895. vm_object_page_clean(vm_object_t object, vm_ooffset_t start, vm_ooffset_t end,
  896. int flags)
  897. {
  898. vm_page_t np, p;
  899. vm_pindex_t pi, tend, tstart;
  900. int curgeneration, n, pagerflags;
  901. boolean_t eio, res, allclean;
  902. VM_OBJECT_ASSERT_WLOCKED(object);
  903. if (!vm_object_mightbedirty(object) || object->resident_page_count == 0)
  904. return (TRUE);
  905. pagerflags = (flags & (OBJPC_SYNC | OBJPC_INVAL)) != 0 ?
  906. VM_PAGER_PUT_SYNC : VM_PAGER_CLUSTER_OK;
  907. pagerflags |= (flags & OBJPC_INVAL) != 0 ? VM_PAGER_PUT_INVAL : 0;
  908. tstart = OFF_TO_IDX(start);
  909. tend = (end == 0) ? object->size : OFF_TO_IDX(end + PAGE_MASK);
  910. allclean = tstart == 0 && tend >= object->size;
  911. res = TRUE;
  912. rescan:
  913. curgeneration = object->generation;
  914. for (p = vm_page_find_least(object, tstart); p != NULL; p = np) {
  915. pi = p->pindex;
  916. if (pi >= tend)
  917. break;
  918. np = TAILQ_NEXT(p, listq);
  919. if (vm_page_none_valid(p))
  920. continue;
  921. if (vm_page_busy_acquire(p, VM_ALLOC_WAITFAIL) == 0) {
  922. if (object->generation != curgeneration &&
  923. (flags & OBJPC_SYNC) != 0)
  924. goto rescan;
  925. np = vm_page_find_least(object, pi);
  926. continue;
  927. }
  928. if (!vm_object_page_remove_write(p, flags, &allclean)) {
  929. vm_page_xunbusy(p);
  930. continue;
  931. }
  932. if (object->type == OBJT_VNODE) {
  933. n = vm_object_page_collect_flush(object, p, pagerflags,
  934. flags, &allclean, &eio);
  935. if (eio) {
  936. res = FALSE;
  937. allclean = FALSE;
  938. }
  939. if (object->generation != curgeneration &&
  940. (flags & OBJPC_SYNC) != 0)
  941. goto rescan;
  942. /*
  943. * If the VOP_PUTPAGES() did a truncated write, so
  944. * that even the first page of the run is not fully
  945. * written, vm_pageout_flush() returns 0 as the run
  946. * length. Since the condition that caused truncated
  947. * write may be permanent, e.g. exhausted free space,
  948. * accepting n == 0 would cause an infinite loop.
  949. *
  950. * Forwarding the iterator leaves the unwritten page
  951. * behind, but there is not much we can do there if
  952. * filesystem refuses to write it.
  953. */
  954. if (n == 0) {
  955. n = 1;
  956. allclean = FALSE;
  957. }
  958. } else {
  959. n = 1;
  960. vm_page_xunbusy(p);
  961. }
  962. np = vm_page_find_least(object, pi + n);
  963. }
  964. #if 0
  965. VOP_FSYNC(vp, (pagerflags & VM_PAGER_PUT_SYNC) ? MNT_WAIT : 0);
  966. #endif
  967. /*
  968. * Leave updating cleangeneration for tmpfs objects to tmpfs
  969. * scan. It needs to update mtime, which happens for other
  970. * filesystems during page writeouts.
  971. */
  972. if (allclean && object->type == OBJT_VNODE)
  973. object->cleangeneration = curgeneration;
  974. return (res);
  975. }
  976. static int
  977. vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int pagerflags,
  978. int flags, boolean_t *allclean, boolean_t *eio)
  979. {
  980. vm_page_t ma[vm_pageout_page_count], p_first, tp;
  981. int count, i, mreq, runlen;
  982. vm_page_lock_assert(p, MA_NOTOWNED);
  983. vm_page_assert_xbusied(p);
  984. VM_OBJECT_ASSERT_WLOCKED(object);
  985. count = 1;
  986. mreq = 0;
  987. for (tp = p; count < vm_pageout_page_count; count++) {
  988. tp = vm_page_next(tp);
  989. if (tp == NULL || vm_page_tryxbusy(tp) == 0)
  990. break;
  991. if (!vm_object_page_remove_write(tp, flags, allclean)) {
  992. vm_page_xunbusy(tp);
  993. break;
  994. }
  995. }
  996. for (p_first = p; count < vm_pageout_page_count; count++) {
  997. tp = vm_page_prev(p_first);
  998. if (tp == NULL || vm_page_tryxbusy(tp) == 0)
  999. break;
  1000. if (!vm_object_page_remove_write(tp, flags, allclean)) {
  1001. vm_page_xunbusy(tp);
  1002. break;
  1003. }
  1004. p_first = tp;
  1005. mreq++;
  1006. }
  1007. for (tp = p_first, i = 0; i < count; tp = TAILQ_NEXT(tp, listq), i++)
  1008. ma[i] = tp;
  1009. vm_pageout_flush(ma, count, pagerflags, mreq, &runlen, eio);
  1010. return (runlen);
  1011. }
  1012. /*
  1013. * Note that there is absolutely no sense in writing out
  1014. * anonymous objects, so we track down the vnode object
  1015. * to write out.
  1016. * We invalidate (remove) all pages from the address space
  1017. * for semantic correctness.
  1018. *
  1019. * If the backing object is a device object with unmanaged pages, then any
  1020. * mappings to the specified range of pages must be removed before this
  1021. * function is called.
  1022. *
  1023. * Note: certain anonymous maps, such as MAP_NOSYNC maps,
  1024. * may start out with a NULL object.
  1025. */
  1026. boolean_t
  1027. vm_object_sync(vm_object_t object, vm_ooffset_t offset, vm_size_t size,
  1028. boolean_t syncio, boolean_t invalidate)
  1029. {
  1030. vm_object_t backing_object;
  1031. struct vnode *vp;
  1032. struct mount *mp;
  1033. int error, flags, fsync_after;
  1034. boolean_t res;
  1035. if (object == NULL)
  1036. return (TRUE);
  1037. res = TRUE;
  1038. error = 0;
  1039. VM_OBJECT_WLOCK(object);
  1040. while ((backing_object = object->backing_object) != NULL) {
  1041. VM_OBJECT_WLOCK(backing_object);
  1042. offset += object->backing_object_offset;
  1043. VM_OBJECT_WUNLOCK(object);
  1044. object = backing_object;
  1045. if (object->size < OFF_TO_IDX(offset + size))
  1046. size = IDX_TO_OFF(object->size) - offset;
  1047. }
  1048. /*
  1049. * Flush pages if writing is allowed, invalidate them
  1050. * if invalidation requested. Pages undergoing I/O
  1051. * will be ignored by vm_object_page_remove().
  1052. *
  1053. * We cannot lock the vnode and then wait for paging
  1054. * to complete without deadlocking against vm_fault.
  1055. * Instead we simply call vm_object_page_remove() and
  1056. * allow it to block internally on a page-by-page
  1057. * basis when it encounters pages undergoing async
  1058. * I/O.
  1059. */
  1060. if (object->type == OBJT_VNODE &&
  1061. vm_object_mightbedirty(object) != 0 &&
  1062. ((vp = object->handle)->v_vflag & VV_NOSYNC) == 0) {
  1063. VM_OBJECT_WUNLOCK(object);
  1064. (void)vn_start_write(vp, &mp, V_WAIT);
  1065. vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
  1066. if (syncio && !invalidate && offset == 0 &&
  1067. atop(size) == object->size) {
  1068. /*
  1069. * If syncing the whole mapping of the file,
  1070. * it is faster to schedule all the writes in
  1071. * async mode, also allowing the clustering,
  1072. * and then wait for i/o to complete.
  1073. */
  1074. flags = 0;
  1075. fsync_after = TRUE;
  1076. } else {
  1077. flags = (syncio || invalidate) ? OBJPC_SYNC : 0;
  1078. flags |= invalidate ? (OBJPC_SYNC | OBJPC_INVAL) : 0;
  1079. fsync_after = FALSE;
  1080. }
  1081. VM_OBJECT_WLOCK(object);
  1082. res = vm_object_page_clean(object, offset, offset + size,
  1083. flags);
  1084. VM_OBJECT_WUNLOCK(object);
  1085. if (fsync_after) {
  1086. for (;;) {
  1087. error = VOP_FSYNC(vp, MNT_WAIT, curthread);
  1088. if (error != ERELOOKUP)
  1089. break;
  1090. /*
  1091. * Allow SU/bufdaemon to handle more
  1092. * dependencies in the meantime.
  1093. */
  1094. VOP_UNLOCK(vp);
  1095. vn_finished_write(mp);
  1096. (void)vn_start_write(vp, &mp, V_WAIT);
  1097. vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
  1098. }
  1099. }
  1100. VOP_UNLOCK(vp);
  1101. vn_finished_write(mp);
  1102. if (error != 0)
  1103. res = FALSE;
  1104. VM_OBJECT_WLOCK(object);
  1105. }
  1106. if ((object->type == OBJT_VNODE ||
  1107. object->type == OBJT_DEVICE) && invalidate) {
  1108. if (object->type == OBJT_DEVICE)
  1109. /*
  1110. * The option OBJPR_NOTMAPPED must be passed here
  1111. * because vm_object_page_remove() cannot remove
  1112. * unmanaged mappings.
  1113. */
  1114. flags = OBJPR_NOTMAPPED;
  1115. else if (old_msync)
  1116. flags = 0;
  1117. else
  1118. flags = OBJPR_CLEANONLY;
  1119. vm_object_page_remove(object, OFF_TO_IDX(offset),
  1120. OFF_TO_IDX(offset + size + PAGE_MASK), flags);
  1121. }
  1122. VM_OBJECT_WUNLOCK(object);
  1123. return (res);
  1124. }
  1125. /*
  1126. * Determine whether the given advice can be applied to the object. Advice is
  1127. * not applied to unmanaged pages since they never belong to page queues, and
  1128. * since MADV_FREE is destructive, it can apply only to anonymous pages that
  1129. * have been mapped at most once.
  1130. */
  1131. static bool
  1132. vm_object_advice_applies(vm_object_t object, int advice)
  1133. {
  1134. if ((object->flags & OBJ_UNMANAGED) != 0)
  1135. return (false);
  1136. if (advice != MADV_FREE)
  1137. return (true);
  1138. return ((object->flags & (OBJ_ONEMAPPING | OBJ_ANON)) ==
  1139. (OBJ_ONEMAPPING | OBJ_ANON));
  1140. }
  1141. static void
  1142. vm_object_madvise_freespace(vm_object_t object, int advice, vm_pindex_t pindex,
  1143. vm_size_t size)
  1144. {
  1145. if (advice == MADV_FREE)
  1146. vm_pager_freespace(object, pindex, size);
  1147. }
  1148. /*
  1149. * vm_object_madvise:
  1150. *
  1151. * Implements the madvise function at the object/page level.
  1152. *
  1153. * MADV_WILLNEED (any object)
  1154. *
  1155. * Activate the specified pages if they are resident.
  1156. *
  1157. * MADV_DONTNEED (any object)
  1158. *
  1159. * Deactivate the specified pages if they are resident.
  1160. *
  1161. * MADV_FREE (OBJT_SWAP objects, OBJ_ONEMAPPING only)
  1162. *
  1163. * Deactivate and clean the specified pages if they are
  1164. * resident. This permits the process to reuse the pages
  1165. * without faulting or the kernel to reclaim the pages
  1166. * without I/O.
  1167. */
  1168. void
  1169. vm_object_madvise(vm_object_t object, vm_pindex_t pindex, vm_pindex_t end,
  1170. int advice)
  1171. {
  1172. vm_pindex_t tpindex;
  1173. vm_object_t backing_object, tobject;
  1174. vm_page_t m, tm;
  1175. if (object == NULL)
  1176. return;
  1177. relookup:
  1178. VM_OBJECT_WLOCK(object);
  1179. if (!vm_object_advice_applies(object, advice)) {
  1180. VM_OBJECT_WUNLOCK(object);
  1181. return;
  1182. }
  1183. for (m = vm_page_find_least(object, pindex); pindex < end; pindex++) {
  1184. tobject = object;
  1185. /*
  1186. * If the next page isn't resident in the top-level object, we
  1187. * need to search the shadow chain. When applying MADV_FREE, we
  1188. * take care to release any swap space used to store
  1189. * non-resident pages.
  1190. */
  1191. if (m == NULL || pindex < m->pindex) {
  1192. /*
  1193. * Optimize a common case: if the top-level object has
  1194. * no backing object, we can skip over the non-resident
  1195. * range in constant time.
  1196. */
  1197. if (object->backing_object == NULL) {
  1198. tpindex = (m != NULL && m->pindex < end) ?
  1199. m->pindex : end;
  1200. vm_object_madvise_freespace(object, advice,
  1201. pindex, tpindex - pindex);
  1202. if ((pindex = tpindex) == end)
  1203. break;
  1204. goto next_page;
  1205. }
  1206. tpindex = pindex;
  1207. do {
  1208. vm_object_madvise_freespace(tobject, advice,
  1209. tpindex, 1);
  1210. /*
  1211. * Prepare to search the next object in the
  1212. * chain.
  1213. */
  1214. backing_object = tobject->backing_object;
  1215. if (backing_object == NULL)
  1216. goto next_pindex;
  1217. VM_OBJECT_WLOCK(backing_object);
  1218. tpindex +=
  1219. OFF_TO_IDX(tobject->backing_object_offset);
  1220. if (tobject != object)
  1221. VM_OBJECT_WUNLOCK(tobject);
  1222. tobject = backing_object;
  1223. if (!vm_object_advice_applies(tobject, advice))
  1224. goto next_pindex;
  1225. } while ((tm = vm_page_lookup(tobject, tpindex)) ==
  1226. NULL);
  1227. } else {
  1228. next_page:
  1229. tm = m;
  1230. m = TAILQ_NEXT(m, listq);
  1231. }
  1232. /*
  1233. * If the page is not in a normal state, skip it. The page
  1234. * can not be invalidated while the object lock is held.
  1235. */
  1236. if (!vm_page_all_valid(tm) || vm_page_wired(tm))
  1237. goto next_pindex;
  1238. KASSERT((tm->flags & PG_FICTITIOUS) == 0,
  1239. ("vm_object_madvise: page %p is fictitious", tm));
  1240. KASSERT((tm->oflags & VPO_UNMANAGED) == 0,
  1241. ("vm_object_madvise: page %p is not managed", tm));
  1242. if (vm_page_tryxbusy(tm) == 0) {
  1243. if (object != tobject)
  1244. VM_OBJECT_WUNLOCK(object);
  1245. if (advice == MADV_WILLNEED) {
  1246. /*
  1247. * Reference the page before unlocking and
  1248. * sleeping so that the page daemon is less
  1249. * likely to reclaim it.
  1250. */
  1251. vm_page_aflag_set(tm, PGA_REFERENCED);
  1252. }
  1253. if (!vm_page_busy_sleep(tm, "madvpo", 0))
  1254. VM_OBJECT_WUNLOCK(tobject);
  1255. goto relookup;
  1256. }
  1257. vm_page_advise(tm, advice);
  1258. vm_page_xunbusy(tm);
  1259. vm_object_madvise_freespace(tobject, advice, tm->pindex, 1);
  1260. next_pindex:
  1261. if (tobject != object)
  1262. VM_OBJECT_WUNLOCK(tobject);
  1263. }
  1264. VM_OBJECT_WUNLOCK(object);
  1265. }
  1266. /*
  1267. * vm_object_shadow:
  1268. *
  1269. * Create a new object which is backed by the
  1270. * specified existing object range. The source
  1271. * object reference is deallocated.
  1272. *
  1273. * The new object and offset into that object
  1274. * are returned in the source parameters.
  1275. */
  1276. void
  1277. vm_object_shadow(vm_object_t *object, vm_ooffset_t *offset, vm_size_t length,
  1278. struct ucred *cred, bool shared)
  1279. {
  1280. vm_object_t source;
  1281. vm_object_t result;
  1282. source = *object;
  1283. /*
  1284. * Don't create the new object if the old object isn't shared.
  1285. *
  1286. * If we hold the only reference we can guarantee that it won't
  1287. * increase while we have the map locked. Otherwise the race is
  1288. * harmless and we will end up with an extra shadow object that
  1289. * will be collapsed later.
  1290. */
  1291. if (source != NULL && source->ref_count == 1 &&
  1292. (source->flags & OBJ_ANON) != 0)
  1293. return;
  1294. /*
  1295. * Allocate a new object with the given length.
  1296. */
  1297. result = vm_object_allocate_anon(atop(length), source, cred, length);
  1298. /*
  1299. * Store the offset into the source object, and fix up the offset into
  1300. * the new object.
  1301. */
  1302. result->backing_object_offset = *offset;
  1303. if (shared || source != NULL) {
  1304. VM_OBJECT_WLOCK(result);
  1305. /*
  1306. * The new object shadows the source object, adding a
  1307. * reference to it. Our caller changes his reference
  1308. * to point to the new object, removing a reference to
  1309. * the source object. Net result: no change of
  1310. * reference count, unless the caller needs to add one
  1311. * more reference due to forking a shared map entry.
  1312. */
  1313. if (shared) {
  1314. vm_object_reference_locked(result);
  1315. vm_object_clear_flag(result, OBJ_ONEMAPPING);
  1316. }
  1317. /*
  1318. * Try to optimize the result object's page color when
  1319. * shadowing in order to maintain page coloring
  1320. * consistency in the combined shadowed object.
  1321. */
  1322. if (source != NULL) {
  1323. vm_object_backing_insert(result, source);
  1324. result->domain = source->domain;
  1325. #if VM_NRESERVLEVEL > 0
  1326. vm_object_set_flag(result,
  1327. (source->flags & OBJ_COLORED));
  1328. result->pg_color = (source->pg_color +
  1329. OFF_TO_IDX(*offset)) & ((1 << (VM_NFREEORDER -
  1330. 1)) - 1);
  1331. #endif
  1332. }
  1333. VM_OBJECT_WUNLOCK(result);
  1334. }
  1335. /*
  1336. * Return the new things
  1337. */
  1338. *offset = 0;
  1339. *object = result;
  1340. }
  1341. /*
  1342. * vm_object_split:
  1343. *
  1344. * Split the pages in a map entry into a new object. This affords
  1345. * easier removal of unused pages, and keeps object inheritance from
  1346. * being a negative impact on memory usage.
  1347. */
  1348. void
  1349. vm_object_split(vm_map_entry_t entry)
  1350. {
  1351. vm_page_t m, m_next;
  1352. vm_object_t orig_object, new_object, backing_object;
  1353. vm_pindex_t idx, offidxstart;
  1354. vm_size_t size;
  1355. orig_object = entry->object.vm_object;
  1356. KASSERT((orig_object->flags & OBJ_ONEMAPPING) != 0,
  1357. ("vm_object_split: Splitting object with multiple mappings."));
  1358. if ((orig_object->flags & OBJ_ANON) == 0)
  1359. return;
  1360. if (orig_object->ref_count <= 1)
  1361. return;
  1362. VM_OBJECT_WUNLOCK(orig_object);
  1363. offidxstart = OFF_TO_IDX(entry->offset);
  1364. size = atop(entry->end - entry->start);
  1365. new_object = vm_object_allocate_anon(size, orig_object,
  1366. orig_object->cred, ptoa(size));
  1367. /*
  1368. * We must wait for the orig_object to complete any in-progress
  1369. * collapse so that the swap blocks are stable below. The
  1370. * additional reference on backing_object by new object will
  1371. * prevent further collapse operations until split completes.
  1372. */
  1373. VM_OBJECT_WLOCK(orig_object);
  1374. vm_object_collapse_wait(orig_object);
  1375. /*
  1376. * At this point, the new object is still private, so the order in
  1377. * which the original and new objects are locked does not matter.
  1378. */
  1379. VM_OBJECT_WLOCK(new_object);
  1380. new_object->domain = orig_object->domain;
  1381. backing_object = orig_object->backing_object;
  1382. if (backing_object != NULL) {
  1383. vm_object_backing_insert_ref(new_object, backing_object);
  1384. new_object->backing_object_offset =
  1385. orig_object->backing_object_offset + entry->offset;
  1386. }
  1387. if (orig_object->cred != NULL) {
  1388. crhold(orig_object->cred);
  1389. KASSERT(orig_object->charge >= ptoa(size),
  1390. ("orig_object->charge < 0"));
  1391. orig_object->charge -= ptoa(size);
  1392. }
  1393. /*
  1394. * Mark the split operation so that swap_pager_getpages() knows
  1395. * that the object is in transition.
  1396. */
  1397. vm_object_set_flag(orig_object, OBJ_SPLIT);
  1398. #ifdef INVARIANTS
  1399. idx = 0;
  1400. #endif
  1401. retry:
  1402. m = vm_page_find_least(orig_object, offidxstart);
  1403. KASSERT(m == NULL || idx <= m->pindex - offidxstart,
  1404. ("%s: object %p was repopulated", __func__, orig_object));
  1405. for (; m != NULL && (idx = m->pindex - offidxstart) < size;
  1406. m = m_next) {
  1407. m_next = TAILQ_NEXT(m, listq);
  1408. /*
  1409. * We must wait for pending I/O to complete before we can
  1410. * rename the page.
  1411. *
  1412. * We do not have to VM_PROT_NONE the page as mappings should
  1413. * not be changed by this operation.
  1414. */
  1415. if (vm_page_tryxbusy(m) == 0) {
  1416. VM_OBJECT_WUNLOCK(new_object);
  1417. if (vm_page_busy_sleep(m, "spltwt", 0))
  1418. VM_OBJECT_WLOCK(orig_object);
  1419. VM_OBJECT_WLOCK(new_object);
  1420. goto retry;
  1421. }
  1422. /*
  1423. * The page was left invalid. Likely placed there by
  1424. * an incomplete fault. Just remove and ignore.
  1425. */
  1426. if (vm_page_none_valid(m)) {
  1427. if (vm_page_remove(m))
  1428. vm_page_free(m);
  1429. continue;
  1430. }
  1431. /* vm_page_rename() will dirty the page. */
  1432. if (vm_page_rename(m, new_object, idx)) {
  1433. vm_page_xunbusy(m);
  1434. VM_OBJECT_WUNLOCK(new_object);
  1435. VM_OBJECT_WUNLOCK(orig_object);
  1436. vm_radix_wait();
  1437. VM_OBJECT_WLOCK(orig_object);
  1438. VM_OBJECT_WLOCK(new_object);
  1439. goto retry;
  1440. }
  1441. #if VM_NRESERVLEVEL > 0
  1442. /*
  1443. * If some of the reservation's allocated pages remain with
  1444. * the original object, then transferring the reservation to
  1445. * the new object is neither particularly beneficial nor
  1446. * particularly harmful as compared to leaving the reservation
  1447. * with the original object. If, however, all of the
  1448. * reservation's allocated pages are transferred to the new
  1449. * object, then transferring the reservation is typically
  1450. * beneficial. Determining which of these two cases applies
  1451. * would be more costly than unconditionally renaming the
  1452. * reservation.
  1453. */
  1454. vm_reserv_rename(m, new_object, orig_object, offidxstart);
  1455. #endif
  1456. }
  1457. /*
  1458. * swap_pager_copy() can sleep, in which case the orig_object's
  1459. * and new_object's locks are released and reacquired.
  1460. */
  1461. swap_pager_copy(orig_object, new_object, offidxstart, 0);
  1462. TAILQ_FOREACH(m, &new_object->memq, listq)
  1463. vm_page_xunbusy(m);
  1464. vm_object_clear_flag(orig_object, OBJ_SPLIT);
  1465. VM_OBJECT_WUNLOCK(orig_object);
  1466. VM_OBJECT_WUNLOCK(new_object);
  1467. entry->object.vm_object = new_object;
  1468. entry->offset = 0LL;
  1469. vm_object_deallocate(orig_object);
  1470. VM_OBJECT_WLOCK(new_object);
  1471. }
  1472. static vm_page_t
  1473. vm_object_collapse_scan_wait(vm_object_t object, vm_page_t p)
  1474. {
  1475. vm_object_t backing_object;
  1476. VM_OBJECT_ASSERT_WLOCKED(object);
  1477. backing_object = object->backing_object;
  1478. VM_OBJECT_ASSERT_WLOCKED(backing_object);
  1479. KASSERT(p == NULL || p->object == object || p->object == backing_object,
  1480. ("invalid ownership %p %p %p", p, object, backing_object));
  1481. /* The page is only NULL when rename fails. */
  1482. if (p == NULL) {
  1483. VM_OBJECT_WUNLOCK(object);
  1484. VM_OBJECT_WUNLOCK(backing_object);
  1485. vm_radix_wait();
  1486. VM_OBJECT_WLOCK(object);
  1487. } else if (p->object == object) {
  1488. VM_OBJECT_WUNLOCK(backing_object);
  1489. if (vm_page_busy_sleep(p, "vmocol", 0))
  1490. VM_OBJECT_WLOCK(object);
  1491. } else {
  1492. VM_OBJECT_WUNLOCK(object);
  1493. if (!vm_page_busy_sleep(p, "vmocol", 0))
  1494. VM_OBJECT_WUNLOCK(backing_object);
  1495. VM_OBJECT_WLOCK(object);
  1496. }
  1497. VM_OBJECT_WLOCK(backing_object);
  1498. return (TAILQ_FIRST(&backing_object->memq));
  1499. }
  1500. static bool
  1501. vm_object_scan_all_shadowed(vm_object_t object)
  1502. {
  1503. vm_object_t backing_object;
  1504. vm_page_t p, pp;
  1505. vm_pindex_t backing_offset_index, new_pindex, pi, ps;
  1506. VM_OBJECT_ASSERT_WLOCKED(object);
  1507. VM_OBJECT_ASSERT_WLOCKED(object->backing_object);
  1508. backing_object = object->backing_object;
  1509. if ((backing_object->flags & OBJ_ANON) == 0)
  1510. return (false);
  1511. pi = backing_offset_index = OFF_TO_IDX(object->backing_object_offset);
  1512. p = vm_page_find_least(backing_object, pi);
  1513. ps = swap_pager_find_least(backing_object, pi);
  1514. /*
  1515. * Only check pages inside the parent object's range and
  1516. * inside the parent object's mapping of the backing object.
  1517. */
  1518. for (;; pi++) {
  1519. if (p != NULL && p->pindex < pi)
  1520. p = TAILQ_NEXT(p, listq);
  1521. if (ps < pi)
  1522. ps = swap_pager_find_least(backing_object, pi);
  1523. if (p == NULL && ps >= backing_object->size)
  1524. break;
  1525. else if (p == NULL)
  1526. pi = ps;
  1527. else
  1528. pi = MIN(p->pindex, ps);
  1529. new_pindex = pi - backing_offset_index;
  1530. if (new_pindex >= object->size)
  1531. break;
  1532. if (p != NULL) {
  1533. /*
  1534. * If the backing object page is busy a
  1535. * grandparent or older page may still be
  1536. * undergoing CoW. It is not safe to collapse
  1537. * the backing object until it is quiesced.
  1538. */
  1539. if (vm_page_tryxbusy(p) == 0)
  1540. return (false);
  1541. /*
  1542. * We raced with the fault handler that left
  1543. * newly allocated invalid page on the object
  1544. * queue and retried.
  1545. */
  1546. if (!vm_page_all_valid(p))
  1547. goto unbusy_ret;
  1548. }
  1549. /*
  1550. * See if the parent has the page or if the parent's object
  1551. * pager has the page. If the parent has the page but the page
  1552. * is not valid, the parent's object pager must have the page.
  1553. *
  1554. * If this fails, the parent does not completely shadow the
  1555. * object and we might as well give up now.
  1556. */
  1557. pp = vm_page_lookup(object, new_pindex);
  1558. /*
  1559. * The valid check here is stable due to object lock
  1560. * being required to clear valid and initiate paging.
  1561. * Busy of p disallows fault handler to validate pp.
  1562. */
  1563. if ((pp == NULL || vm_page_none_valid(pp)) &&
  1564. !vm_pager_has_page(object, new_pindex, NULL, NULL))
  1565. goto unbusy_ret;
  1566. if (p != NULL)
  1567. vm_page_xunbusy(p);
  1568. }
  1569. return (true);
  1570. unbusy_ret:
  1571. if (p != NULL)
  1572. vm_page_xunbusy(p);
  1573. return (false);
  1574. }
  1575. static void
  1576. vm_object_collapse_scan(vm_object_t object)
  1577. {
  1578. vm_object_t backing_object;
  1579. vm_page_t next, p, pp;
  1580. vm_pindex_t backing_offset_index, new_pindex;
  1581. VM_OBJECT_ASSERT_WLOCKED(object);
  1582. VM_OBJECT_ASSERT_WLOCKED(object->backing_object);
  1583. backing_object = object->backing_object;
  1584. backing_offset_index = OFF_TO_IDX(object->backing_object_offset);
  1585. /*
  1586. * Our scan
  1587. */
  1588. for (p = TAILQ_FIRST(&backing_object->memq); p != NULL; p = next) {
  1589. next = TAILQ_NEXT(p, listq);
  1590. new_pindex = p->pindex - backing_offset_index;
  1591. /*
  1592. * Check for busy page
  1593. */
  1594. if (vm_page_tryxbusy(p) == 0) {
  1595. next = vm_object_collapse_scan_wait(object, p);
  1596. continue;
  1597. }
  1598. KASSERT(object->backing_object == backing_object,
  1599. ("vm_object_collapse_scan: backing object mismatch %p != %p",
  1600. object->backing_object, backing_object));
  1601. KASSERT(p->object == backing_object,
  1602. ("vm_object_collapse_scan: object mismatch %p != %p",
  1603. p->object, backing_object));
  1604. if (p->pindex < backing_offset_index ||
  1605. new_pindex >= object->size) {
  1606. vm_pager_freespace(backing_object, p->pindex, 1);
  1607. KASSERT(!pmap_page_is_mapped(p),
  1608. ("freeing mapped page %p", p));
  1609. if (vm_page_remove(p))
  1610. vm_page_free(p);
  1611. continue;
  1612. }
  1613. if (!vm_page_all_valid(p)) {
  1614. KASSERT(!pmap_page_is_mapped(p),
  1615. ("freeing mapped page %p", p));
  1616. if (vm_page_remove(p))
  1617. vm_page_free(p);
  1618. continue;
  1619. }
  1620. pp = vm_page_lookup(object, new_pindex);
  1621. if (pp != NULL && vm_page_tryxbusy(pp) == 0) {
  1622. vm_page_xunbusy(p);
  1623. /*
  1624. * The page in the parent is busy and possibly not
  1625. * (yet) valid. Until its state is finalized by the
  1626. * busy bit owner, we can't tell whether it shadows the
  1627. * original page.
  1628. */
  1629. next = vm_object_collapse_scan_wait(object, pp);
  1630. continue;
  1631. }
  1632. if (pp != NULL && vm_page_none_valid(pp)) {
  1633. /*
  1634. * The page was invalid in the parent. Likely placed
  1635. * there by an incomplete fault. Just remove and
  1636. * ignore. p can replace it.
  1637. */
  1638. if (vm_page_remove(pp))
  1639. vm_page_free(pp);
  1640. pp = NULL;
  1641. }
  1642. if (pp != NULL || vm_pager_has_page(object, new_pindex, NULL,
  1643. NULL)) {
  1644. /*
  1645. * The page already exists in the parent OR swap exists
  1646. * for this location in the parent. Leave the parent's
  1647. * page alone. Destroy the original page from the
  1648. * backing object.
  1649. */
  1650. vm_pager_freespace(backing_object, p->pindex, 1);
  1651. KASSERT(!pmap_page_is_mapped(p),
  1652. ("freeing mapped page %p", p));
  1653. if (vm_page_remove(p))
  1654. vm_page_free(p);
  1655. if (pp != NULL)
  1656. vm_page_xunbusy(pp);
  1657. continue;
  1658. }
  1659. /*
  1660. * Page does not exist in parent, rename the page from the
  1661. * backing object to the main object.
  1662. *
  1663. * If the page was mapped to a process, it can remain mapped
  1664. * through the rename. vm_page_rename() will dirty the page.
  1665. */
  1666. if (vm_page_rename(p, object, new_pindex)) {
  1667. vm_page_xunbusy(p);
  1668. next = vm_object_collapse_scan_wait(object, NULL);
  1669. continue;
  1670. }
  1671. /* Use the old pindex to free the right page. */
  1672. vm_pager_freespace(backing_object, new_pindex +
  1673. backing_offset_index, 1);
  1674. #if VM_NRESERVLEVEL > 0
  1675. /*
  1676. * Rename the reservation.
  1677. */
  1678. vm_reserv_rename(p, object, backing_object,
  1679. backing_offset_index);
  1680. #endif
  1681. vm_page_xunbusy(p);
  1682. }
  1683. return;
  1684. }
  1685. /*
  1686. * vm_object_collapse:
  1687. *
  1688. * Collapse an object with the object backing it.
  1689. * Pages in the backing object are moved into the
  1690. * parent, and the backing object is deallocated.
  1691. */
  1692. void
  1693. vm_object_collapse(vm_object_t object)
  1694. {
  1695. vm_object_t backing_object, new_backing_object;
  1696. VM_OBJECT_ASSERT_WLOCKED(object);
  1697. while (TRUE) {
  1698. KASSERT((object->flags & (OBJ_DEAD | OBJ_ANON)) == OBJ_ANON,
  1699. ("collapsing invalid object"));
  1700. /*
  1701. * Wait for the backing_object to finish any pending
  1702. * collapse so that the caller sees the shortest possible
  1703. * shadow chain.
  1704. */
  1705. backing_object = vm_object_backing_collapse_wait(object);
  1706. if (backing_object == NULL)
  1707. return;
  1708. KASSERT(object->ref_count > 0 &&
  1709. object->ref_count > atomic_load_int(&object->shadow_count),
  1710. ("collapse with invalid ref %d or shadow %d count.",
  1711. object->ref_count, atomic_load_int(&object->shadow_count)));
  1712. KASSERT((backing_object->flags &
  1713. (OBJ_COLLAPSING | OBJ_DEAD)) == 0,
  1714. ("vm_object_collapse: Backing object already collapsing."));
  1715. KASSERT((object->flags & (OBJ_COLLAPSING | OBJ_DEAD)) == 0,
  1716. ("vm_object_collapse: object is already collapsing."));
  1717. /*
  1718. * We know that we can either collapse the backing object if
  1719. * the parent is the only reference to it, or (perhaps) have
  1720. * the parent bypass the object if the parent happens to shadow
  1721. * all the resident pages in the entire backing object.
  1722. */
  1723. if (backing_object->ref_count == 1) {
  1724. KASSERT(atomic_load_int(&backing_object->shadow_count)
  1725. == 1,
  1726. ("vm_object_collapse: shadow_count: %d",
  1727. atomic_load_int(&backing_object->shadow_count)));
  1728. vm_object_pip_add(object, 1);
  1729. vm_object_set_flag(object, OBJ_COLLAPSING);
  1730. vm_object_pip_add(backing_object, 1);
  1731. vm_object_set_flag(backing_object, OBJ_DEAD);
  1732. /*
  1733. * If there is exactly one reference to the backing
  1734. * object, we can collapse it into the parent.
  1735. */
  1736. vm_object_collapse_scan(object);
  1737. /*
  1738. * Move the pager from backing_object to object.
  1739. *
  1740. * swap_pager_copy() can sleep, in which case the
  1741. * backing_object's and object's locks are released and
  1742. * reacquired.
  1743. */
  1744. swap_pager_copy(backing_object, object,
  1745. OFF_TO_IDX(object->backing_object_offset), TRUE);
  1746. /*
  1747. * Object now shadows whatever backing_object did.
  1748. */
  1749. vm_object_clear_flag(object, OBJ_COLLAPSING);
  1750. vm_object_backing_transfer(object, backing_object);
  1751. object->backing_object_offset +=
  1752. backing_object->backing_object_offset;
  1753. VM_OBJECT_WUNLOCK(object);
  1754. vm_object_pip_wakeup(object);
  1755. /*
  1756. * Discard backing_object.
  1757. *
  1758. * Since the backing object has no pages, no pager left,
  1759. * and no object references within it, all that is
  1760. * necessary is to dispose of it.
  1761. */
  1762. KASSERT(backing_object->ref_count == 1, (
  1763. "backing_object %p was somehow re-referenced during collapse!",
  1764. backing_object));
  1765. vm_object_pip_wakeup(backing_object);
  1766. (void)refcount_release(&backing_object->ref_count);
  1767. umtx_shm_object_terminated(backing_object);
  1768. vm_object_terminate(backing_object);
  1769. counter_u64_add(object_collapses, 1);
  1770. VM_OBJECT_WLOCK(object);
  1771. } else {
  1772. /*
  1773. * If we do not entirely shadow the backing object,
  1774. * there is nothing we can do so we give up.
  1775. *
  1776. * The object lock and backing_object lock must not
  1777. * be dropped during this sequence.
  1778. */
  1779. if (!vm_object_scan_all_shadowed(object)) {
  1780. VM_OBJECT_WUNLOCK(backing_object);
  1781. break;
  1782. }
  1783. /*
  1784. * Make the parent shadow the next object in the
  1785. * chain. Deallocating backing_object will not remove
  1786. * it, since its reference count is at least 2.
  1787. */
  1788. vm_object_backing_remove_locked(object);
  1789. new_backing_object = backing_object->backing_object;
  1790. if (new_backing_object != NULL) {
  1791. vm_object_backing_insert_ref(object,
  1792. new_backing_object);
  1793. object->backing_object_offset +=
  1794. backing_object->backing_object_offset;
  1795. }
  1796. /*
  1797. * Drop the reference count on backing_object. Since
  1798. * its ref_count was at least 2, it will not vanish.
  1799. */
  1800. (void)refcount_release(&backing_object->ref_count);
  1801. KASSERT(backing_object->ref_count >= 1, (
  1802. "backing_object %p was somehow dereferenced during collapse!",
  1803. backing_object));
  1804. VM_OBJECT_WUNLOCK(backing_object);
  1805. counter_u64_add(object_bypasses, 1);
  1806. }
  1807. /*
  1808. * Try again with this object's new backing object.
  1809. */
  1810. }
  1811. }
  1812. /*
  1813. * vm_object_page_remove:
  1814. *
  1815. * For the given object, either frees or invalidates each of the
  1816. * specified pages. In general, a page is freed. However, if a page is
  1817. * wired for any reason other than the existence of a managed, wired
  1818. * mapping, then it may be invalidated but not removed from the object.
  1819. * Pages are specified by the given range ["start", "end") and the option
  1820. * OBJPR_CLEANONLY. As a special case, if "end" is zero, then the range
  1821. * extends from "start" to the end of the object. If the option
  1822. * OBJPR_CLEANONLY is specified, then only the non-dirty pages within the
  1823. * specified range are affected. If the option OBJPR_NOTMAPPED is
  1824. * specified, then the pages within the specified range must have no
  1825. * mappings. Otherwise, if this option is not specified, any mappings to
  1826. * the specified pages are removed before the pages are freed or
  1827. * invalidated.
  1828. *
  1829. * In general, this operation should only be performed on objects that
  1830. * contain managed pages. There are, however, two exceptions. First, it
  1831. * is performed on the kernel and kmem objects by vm_map_entry_delete().
  1832. * Second, it is used by msync(..., MS_INVALIDATE) to invalidate device-
  1833. * backed pages. In both of these cases, the option OBJPR_CLEANONLY must
  1834. * not be specified and the option OBJPR_NOTMAPPED must be specified.
  1835. *
  1836. * The object must be locked.
  1837. */
  1838. void
  1839. vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
  1840. int options)
  1841. {
  1842. vm_page_t p, next;
  1843. VM_OBJECT_ASSERT_WLOCKED(object);
  1844. KASSERT((object->flags & OBJ_UNMANAGED) == 0 ||
  1845. (options & (OBJPR_CLEANONLY | OBJPR_NOTMAPPED)) == OBJPR_NOTMAPPED,
  1846. ("vm_object_page_remove: illegal options for object %p", object));
  1847. if (object->resident_page_count == 0)
  1848. return;
  1849. vm_object_pip_add(object, 1);
  1850. again:
  1851. p = vm_page_find_least(object, start);
  1852. /*
  1853. * Here, the variable "p" is either (1) the page with the least pindex
  1854. * greater than or equal to the parameter "start" or (2) NULL.
  1855. */
  1856. for (; p != NULL && (p->pindex < end || end == 0); p = next) {
  1857. next = TAILQ_NEXT(p, listq);
  1858. /*
  1859. * Skip invalid pages if asked to do so. Try to avoid acquiring
  1860. * the busy lock, as some consumers rely on this to avoid
  1861. * deadlocks.
  1862. *
  1863. * A thread may concurrently transition the page from invalid to
  1864. * valid using only the busy lock, so the result of this check
  1865. * is immediately stale. It is up to consumers to handle this,
  1866. * for instance by ensuring that all invalid->valid transitions
  1867. * happen with a mutex held, as may be possible for a
  1868. * filesystem.
  1869. */
  1870. if ((options & OBJPR_VALIDONLY) != 0 && vm_page_none_valid(p))
  1871. continue;
  1872. /*
  1873. * If the page is wired for any reason besides the existence
  1874. * of managed, wired mappings, then it cannot be freed. For
  1875. * example, fictitious pages, which represent device memory,
  1876. * are inherently wired and cannot be freed. They can,
  1877. * however, be invalidated if the option OBJPR_CLEANONLY is
  1878. * not specified.
  1879. */
  1880. if (vm_page_tryxbusy(p) == 0) {
  1881. if (vm_page_busy_sleep(p, "vmopar", 0))
  1882. VM_OBJECT_WLOCK(object);
  1883. goto again;
  1884. }
  1885. if ((options & OBJPR_VALIDONLY) != 0 && vm_page_none_valid(p)) {
  1886. vm_page_xunbusy(p);
  1887. continue;
  1888. }
  1889. if (vm_page_wired(p)) {
  1890. wired:
  1891. if ((options & OBJPR_NOTMAPPED) == 0 &&
  1892. object->ref_count != 0)
  1893. pmap_remove_all(p);
  1894. if ((options & OBJPR_CLEANONLY) == 0) {
  1895. vm_page_invalid(p);
  1896. vm_page_undirty(p);
  1897. }
  1898. vm_page_xunbusy(p);
  1899. continue;
  1900. }
  1901. KASSERT((p->flags & PG_FICTITIOUS) == 0,
  1902. ("vm_object_page_remove: page %p is fictitious", p));
  1903. if ((options & OBJPR_CLEANONLY) != 0 &&
  1904. !vm_page_none_valid(p)) {
  1905. if ((options & OBJPR_NOTMAPPED) == 0 &&
  1906. object->ref_count != 0 &&
  1907. !vm_page_try_remove_write(p))
  1908. goto wired;
  1909. if (p->dirty != 0) {
  1910. vm_page_xunbusy(p);
  1911. continue;
  1912. }
  1913. }
  1914. if ((options & OBJPR_NOTMAPPED) == 0 &&
  1915. object->ref_count != 0 && !vm_page_try_remove_all(p))
  1916. goto wired;
  1917. vm_page_free(p);
  1918. }
  1919. vm_object_pip_wakeup(object);
  1920. vm_pager_freespace(object, start, (end == 0 ? object->size : end) -
  1921. start);
  1922. }
  1923. /*
  1924. * vm_object_page_noreuse:
  1925. *
  1926. * For the given object, attempt to move the specified pages to
  1927. * the head of the inactive queue. This bypasses regular LRU
  1928. * operation and allows the pages to be reused quickly under memory
  1929. * pressure. If a page is wired for any reason, then it will not
  1930. * be queued. Pages are specified by the range ["start", "end").
  1931. * As a special case, if "end" is zero, then the range extends from
  1932. * "start" to the end of the object.
  1933. *
  1934. * This operation should only be performed on objects that
  1935. * contain non-fictitious, managed pages.
  1936. *
  1937. * The object must be locked.
  1938. */
  1939. void
  1940. vm_object_page_noreuse(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
  1941. {
  1942. vm_page_t p, next;
  1943. VM_OBJECT_ASSERT_LOCKED(object);
  1944. KASSERT((object->flags & (OBJ_FICTITIOUS | OBJ_UNMANAGED)) == 0,
  1945. ("vm_object_page_noreuse: illegal object %p", object));
  1946. if (object->resident_page_count == 0)
  1947. return;
  1948. p = vm_page_find_least(object, start);
  1949. /*
  1950. * Here, the variable "p" is either (1) the page with the least pindex
  1951. * greater than or equal to the parameter "start" or (2) NULL.
  1952. */
  1953. for (; p != NULL && (p->pindex < end || end == 0); p = next) {
  1954. next = TAILQ_NEXT(p, listq);
  1955. vm_page_deactivate_noreuse(p);
  1956. }
  1957. }
  1958. /*
  1959. * Populate the specified range of the object with valid pages. Returns
  1960. * TRUE if the range is successfully populated and FALSE otherwise.
  1961. *
  1962. * Note: This function should be optimized to pass a larger array of
  1963. * pages to vm_pager_get_pages() before it is applied to a non-
  1964. * OBJT_DEVICE object.
  1965. *
  1966. * The object must be locked.
  1967. */
  1968. boolean_t
  1969. vm_object_populate(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
  1970. {
  1971. vm_page_t m;
  1972. vm_pindex_t pindex;
  1973. int rv;
  1974. VM_OBJECT_ASSERT_WLOCKED(object);
  1975. for (pindex = start; pindex < end; pindex++) {
  1976. rv = vm_page_grab_valid(&m, object, pindex, VM_ALLOC_NORMAL);
  1977. if (rv != VM_PAGER_OK)
  1978. break;
  1979. /*
  1980. * Keep "m" busy because a subsequent iteration may unlock
  1981. * the object.
  1982. */
  1983. }
  1984. if (pindex > start) {
  1985. m = vm_page_lookup(object, start);
  1986. while (m != NULL && m->pindex < pindex) {
  1987. vm_page_xunbusy(m);
  1988. m = TAILQ_NEXT(m, listq);
  1989. }
  1990. }
  1991. return (pindex == end);
  1992. }
  1993. /*
  1994. * Routine: vm_object_coalesce
  1995. * Function: Coalesces two objects backing up adjoining
  1996. * regions of memory into a single object.
  1997. *
  1998. * returns TRUE if objects were combined.
  1999. *
  2000. * NOTE: Only works at the moment if the second object is NULL -
  2001. * if it's not, which object do we lock first?
  2002. *
  2003. * Parameters:
  2004. * prev_object First object to coalesce
  2005. * prev_offset Offset into prev_object
  2006. * prev_size Size of reference to prev_object
  2007. * next_size Size of reference to the second object
  2008. * reserved Indicator that extension region has
  2009. * swap accounted for
  2010. *
  2011. * Conditions:
  2012. * The object must *not* be locked.
  2013. */
  2014. boolean_t
  2015. vm_object_coalesce(vm_object_t prev_object, vm_ooffset_t prev_offset,
  2016. vm_size_t prev_size, vm_size_t next_size, boolean_t reserved)
  2017. {
  2018. vm_pindex_t next_pindex;
  2019. if (prev_object == NULL)
  2020. return (TRUE);
  2021. if ((prev_object->flags & OBJ_ANON) == 0)
  2022. return (FALSE);
  2023. VM_OBJECT_WLOCK(prev_object);
  2024. /*
  2025. * Try to collapse the object first.
  2026. */
  2027. vm_object_collapse(prev_object);
  2028. /*
  2029. * Can't coalesce if: . more than one reference . paged out . shadows
  2030. * another object . has a copy elsewhere (any of which mean that the
  2031. * pages not mapped to prev_entry may be in use anyway)
  2032. */
  2033. if (prev_object->backing_object != NULL) {
  2034. VM_OBJECT_WUNLOCK(prev_object);
  2035. return (FALSE);
  2036. }
  2037. prev_size >>= PAGE_SHIFT;
  2038. next_size >>= PAGE_SHIFT;
  2039. next_pindex = OFF_TO_IDX(prev_offset) + prev_size;
  2040. if (prev_object->ref_count > 1 &&
  2041. prev_object->size != next_pindex &&
  2042. (prev_object->flags & OBJ_ONEMAPPING) == 0) {
  2043. VM_OBJECT_WUNLOCK(prev_object);
  2044. return (FALSE);
  2045. }
  2046. /*
  2047. * Account for the charge.
  2048. */
  2049. if (prev_object->cred != NULL) {
  2050. /*
  2051. * If prev_object was charged, then this mapping,
  2052. * although not charged now, may become writable
  2053. * later. Non-NULL cred in the object would prevent
  2054. * swap reservation during enabling of the write
  2055. * access, so reserve swap now. Failed reservation
  2056. * cause allocation of the separate object for the map
  2057. * entry, and swap reservation for this entry is
  2058. * managed in appropriate time.
  2059. */
  2060. if (!reserved && !swap_reserve_by_cred(ptoa(next_size),
  2061. prev_object->cred)) {
  2062. VM_OBJECT_WUNLOCK(prev_object);
  2063. return (FALSE);
  2064. }
  2065. prev_object->charge += ptoa(next_size);
  2066. }
  2067. /*
  2068. * Remove any pages that may still be in the object from a previous
  2069. * deallocation.
  2070. */
  2071. if (next_pindex < prev_object->size) {
  2072. vm_object_page_remove(prev_object, next_pindex, next_pindex +
  2073. next_size, 0);
  2074. #if 0
  2075. if (prev_object->cred != NULL) {
  2076. KASSERT(prev_object->charge >=
  2077. ptoa(prev_object->size - next_pindex),
  2078. ("object %p overcharged 1 %jx %jx", prev_object,
  2079. (uintmax_t)next_pindex, (uintmax_t)next_size));
  2080. prev_object->charge -= ptoa(prev_object->size -
  2081. next_pindex);
  2082. }
  2083. #endif
  2084. }
  2085. /*
  2086. * Extend the object if necessary.
  2087. */
  2088. if (next_pindex + next_size > prev_object->size)
  2089. prev_object->size = next_pindex + next_size;
  2090. VM_OBJECT_WUNLOCK(prev_object);
  2091. return (TRUE);
  2092. }
  2093. void
  2094. vm_object_set_writeable_dirty_(vm_object_t object)
  2095. {
  2096. atomic_add_int(&object->generation, 1);
  2097. }
  2098. bool
  2099. vm_object_mightbedirty_(vm_object_t object)
  2100. {
  2101. return (object->generation != object->cleangeneration);
  2102. }
  2103. /*
  2104. * vm_object_unwire:
  2105. *
  2106. * For each page offset within the specified range of the given object,
  2107. * find the highest-level page in the shadow chain and unwire it. A page
  2108. * must exist at every page offset, and the highest-level page must be
  2109. * wired.
  2110. */
  2111. void
  2112. vm_object_unwire(vm_object_t object, vm_ooffset_t offset, vm_size_t length,
  2113. uint8_t queue)
  2114. {
  2115. vm_object_t tobject, t1object;
  2116. vm_page_t m, tm;
  2117. vm_pindex_t end_pindex, pindex, tpindex;
  2118. int depth, locked_depth;
  2119. KASSERT((offset & PAGE_MASK) == 0,
  2120. ("vm_object_unwire: offset is not page aligned"));
  2121. KASSERT((length & PAGE_MASK) == 0,
  2122. ("vm_object_unwire: length is not a multiple of PAGE_SIZE"));
  2123. /* The wired count of a fictitious page never changes. */
  2124. if ((object->flags & OBJ_FICTITIOUS) != 0)
  2125. return;
  2126. pindex = OFF_TO_IDX(offset);
  2127. end_pindex = pindex + atop(length);
  2128. again:
  2129. locked_depth = 1;
  2130. VM_OBJECT_RLOCK(object);
  2131. m = vm_page_find_least(object, pindex);
  2132. while (pindex < end_pindex) {
  2133. if (m == NULL || pindex < m->pindex) {
  2134. /*
  2135. * The first object in the shadow chain doesn't
  2136. * contain a page at the current index. Therefore,
  2137. * the page must exist in a backing object.
  2138. */
  2139. tobject = object;
  2140. tpindex = pindex;
  2141. depth = 0;
  2142. do {
  2143. tpindex +=
  2144. OFF_TO_IDX(tobject->backing_object_offset);
  2145. tobject = tobject->backing_object;
  2146. KASSERT(tobject != NULL,
  2147. ("vm_object_unwire: missing page"));
  2148. if ((tobject->flags & OBJ_FICTITIOUS) != 0)
  2149. goto next_page;
  2150. depth++;
  2151. if (depth == locked_depth) {
  2152. locked_depth++;
  2153. VM_OBJECT_RLOCK(tobject);
  2154. }
  2155. } while ((tm = vm_page_lookup(tobject, tpindex)) ==
  2156. NULL);
  2157. } else {
  2158. tm = m;
  2159. m = TAILQ_NEXT(m, listq);
  2160. }
  2161. if (vm_page_trysbusy(tm) == 0) {
  2162. for (tobject = object; locked_depth >= 1;
  2163. locked_depth--) {
  2164. t1object = tobject->backing_object;
  2165. if (tm->object != tobject)
  2166. VM_OBJECT_RUNLOCK(tobject);
  2167. tobject = t1object;
  2168. }
  2169. tobject = tm->object;
  2170. if (!vm_page_busy_sleep(tm, "unwbo",
  2171. VM_ALLOC_IGN_SBUSY))
  2172. VM_OBJECT_RUNLOCK(tobject);
  2173. goto again;
  2174. }
  2175. vm_page_unwire(tm, queue);
  2176. vm_page_sunbusy(tm);
  2177. next_page:
  2178. pindex++;
  2179. }
  2180. /* Release the accumulated object locks. */
  2181. for (tobject = object; locked_depth >= 1; locked_depth--) {
  2182. t1object = tobject->backing_object;
  2183. VM_OBJECT_RUNLOCK(tobject);
  2184. tobject = t1object;
  2185. }
  2186. }
  2187. /*
  2188. * Return the vnode for the given object, or NULL if none exists.
  2189. * For tmpfs objects, the function may return NULL if there is
  2190. * no vnode allocated at the time of the call.
  2191. */
  2192. struct vnode *
  2193. vm_object_vnode(vm_object_t object)
  2194. {
  2195. struct vnode *vp;
  2196. VM_OBJECT_ASSERT_LOCKED(object);
  2197. vm_pager_getvp(object, &vp, NULL);
  2198. return (vp);
  2199. }
  2200. /*
  2201. * Busy the vm object. This prevents new pages belonging to the object from
  2202. * becoming busy. Existing pages persist as busy. Callers are responsible
  2203. * for checking page state before proceeding.
  2204. */
  2205. void
  2206. vm_object_busy(vm_object_t obj)
  2207. {
  2208. VM_OBJECT_ASSERT_LOCKED(obj);
  2209. blockcount_acquire(&obj->busy, 1);
  2210. /* The fence is required to order loads of page busy. */
  2211. atomic_thread_fence_acq_rel();
  2212. }
  2213. void
  2214. vm_object_unbusy(vm_object_t obj)
  2215. {
  2216. blockcount_release(&obj->busy, 1);
  2217. }
  2218. void
  2219. vm_object_busy_wait(vm_object_t obj, const char *wmesg)
  2220. {
  2221. VM_OBJECT_ASSERT_UNLOCKED(obj);
  2222. (void)blockcount_sleep(&obj->busy, NULL, wmesg, PVM);
  2223. }
  2224. /*
  2225. * This function aims to determine if the object is mapped,
  2226. * specifically, if it is referenced by a vm_map_entry. Because
  2227. * objects occasionally acquire transient references that do not
  2228. * represent a mapping, the method used here is inexact. However, it
  2229. * has very low overhead and is good enough for the advisory
  2230. * vm.vmtotal sysctl.
  2231. */
  2232. bool
  2233. vm_object_is_active(vm_object_t obj)
  2234. {
  2235. return (obj->ref_count > atomic_load_int(&obj->shadow_count));
  2236. }
  2237. static int
  2238. vm_object_list_handler(struct sysctl_req *req, bool swap_only)
  2239. {
  2240. struct kinfo_vmobject *kvo;
  2241. char *fullpath, *freepath;
  2242. struct vnode *vp;
  2243. struct vattr va;
  2244. vm_object_t obj;
  2245. vm_page_t m;
  2246. u_long sp;
  2247. int count, error;
  2248. bool want_path;
  2249. if (req->oldptr == NULL) {
  2250. /*
  2251. * If an old buffer has not been provided, generate an
  2252. * estimate of the space needed for a subsequent call.
  2253. */
  2254. mtx_lock(&vm_object_list_mtx);
  2255. count = 0;
  2256. TAILQ_FOREACH(obj, &vm_object_list, object_list) {
  2257. if (obj->type == OBJT_DEAD)
  2258. continue;
  2259. count++;
  2260. }
  2261. mtx_unlock(&vm_object_list_mtx);
  2262. return (SYSCTL_OUT(req, NULL, sizeof(struct kinfo_vmobject) *
  2263. count * 11 / 10));
  2264. }
  2265. want_path = !(swap_only || jailed(curthread->td_ucred));
  2266. kvo = malloc(sizeof(*kvo), M_TEMP, M_WAITOK | M_ZERO);
  2267. error = 0;
  2268. /*
  2269. * VM objects are type stable and are never removed from the
  2270. * list once added. This allows us to safely read obj->object_list
  2271. * after reacquiring the VM object lock.
  2272. */
  2273. mtx_lock(&vm_object_list_mtx);
  2274. TAILQ_FOREACH(obj, &vm_object_list, object_list) {
  2275. if (obj->type == OBJT_DEAD ||
  2276. (swap_only && (obj->flags & (OBJ_ANON | OBJ_SWAP)) == 0))
  2277. continue;
  2278. VM_OBJECT_RLOCK(obj);
  2279. if (obj->type == OBJT_DEAD ||
  2280. (swap_only && (obj->flags & (OBJ_ANON | OBJ_SWAP)) == 0)) {
  2281. VM_OBJECT_RUNLOCK(obj);
  2282. continue;
  2283. }
  2284. mtx_unlock(&vm_object_list_mtx);
  2285. kvo->kvo_size = ptoa(obj->size);
  2286. kvo->kvo_resident = obj->resident_page_count;
  2287. kvo->kvo_ref_count = obj->ref_count;
  2288. kvo->kvo_shadow_count = atomic_load_int(&obj->shadow_count);
  2289. kvo->kvo_memattr = obj->memattr;
  2290. kvo->kvo_active = 0;
  2291. kvo->kvo_inactive = 0;
  2292. if (!swap_only) {
  2293. TAILQ_FOREACH(m, &obj->memq, listq) {
  2294. /*
  2295. * A page may belong to the object but be
  2296. * dequeued and set to PQ_NONE while the
  2297. * object lock is not held. This makes the
  2298. * reads of m->queue below racy, and we do not
  2299. * count pages set to PQ_NONE. However, this
  2300. * sysctl is only meant to give an
  2301. * approximation of the system anyway.
  2302. */
  2303. if (m->a.queue == PQ_ACTIVE)
  2304. kvo->kvo_active++;
  2305. else if (m->a.queue == PQ_INACTIVE)
  2306. kvo->kvo_inactive++;
  2307. }
  2308. }
  2309. kvo->kvo_vn_fileid = 0;
  2310. kvo->kvo_vn_fsid = 0;
  2311. kvo->kvo_vn_fsid_freebsd11 = 0;
  2312. freepath = NULL;
  2313. fullpath = "";
  2314. vp = NULL;
  2315. kvo->kvo_type = vm_object_kvme_type(obj, want_path ? &vp :
  2316. NULL);
  2317. if (vp != NULL) {
  2318. vref(vp);
  2319. } else if ((obj->flags & OBJ_ANON) != 0) {
  2320. MPASS(kvo->kvo_type == KVME_TYPE_SWAP);
  2321. kvo->kvo_me = (uintptr_t)obj;
  2322. /* tmpfs objs are reported as vnodes */
  2323. kvo->kvo_backing_obj = (uintptr_t)obj->backing_object;
  2324. sp = swap_pager_swapped_pages(obj);
  2325. kvo->kvo_swapped = sp > UINT32_MAX ? UINT32_MAX : sp;
  2326. }
  2327. VM_OBJECT_RUNLOCK(obj);
  2328. if (vp != NULL) {
  2329. vn_fullpath(vp, &fullpath, &freepath);
  2330. vn_lock(vp, LK_SHARED | LK_RETRY);
  2331. if (VOP_GETATTR(vp, &va, curthread->td_ucred) == 0) {
  2332. kvo->kvo_vn_fileid = va.va_fileid;
  2333. kvo->kvo_vn_fsid = va.va_fsid;
  2334. kvo->kvo_vn_fsid_freebsd11 = va.va_fsid;
  2335. /* truncate */
  2336. }
  2337. vput(vp);
  2338. }
  2339. strlcpy(kvo->kvo_path, fullpath, sizeof(kvo->kvo_path));
  2340. free(freepath, M_TEMP);
  2341. /* Pack record size down */
  2342. kvo->kvo_structsize = offsetof(struct kinfo_vmobject, kvo_path)
  2343. + strlen(kvo->kvo_path) + 1;
  2344. kvo->kvo_structsize = roundup(kvo->kvo_structsize,
  2345. sizeof(uint64_t));
  2346. error = SYSCTL_OUT(req, kvo, kvo->kvo_structsize);
  2347. maybe_yield();
  2348. mtx_lock(&vm_object_list_mtx);
  2349. if (error)
  2350. break;
  2351. }
  2352. mtx_unlock(&vm_object_list_mtx);
  2353. free(kvo, M_TEMP);
  2354. return (error);
  2355. }
  2356. static int
  2357. sysctl_vm_object_list(SYSCTL_HANDLER_ARGS)
  2358. {
  2359. return (vm_object_list_handler(req, false));
  2360. }
  2361. SYSCTL_PROC(_vm, OID_AUTO, objects, CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_SKIP |
  2362. CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_object_list, "S,kinfo_vmobject",
  2363. "List of VM objects");
  2364. static int
  2365. sysctl_vm_object_list_swap(SYSCTL_HANDLER_ARGS)
  2366. {
  2367. return (vm_object_list_handler(req, true));
  2368. }
  2369. /*
  2370. * This sysctl returns list of the anonymous or swap objects. Intent
  2371. * is to provide stripped optimized list useful to analyze swap use.
  2372. * Since technically non-swap (default) objects participate in the
  2373. * shadow chains, and are converted to swap type as needed by swap
  2374. * pager, we must report them.
  2375. */
  2376. SYSCTL_PROC(_vm, OID_AUTO, swap_objects,
  2377. CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_SKIP | CTLFLAG_MPSAFE, NULL, 0,
  2378. sysctl_vm_object_list_swap, "S,kinfo_vmobject",
  2379. "List of swap VM objects");
  2380. #include "opt_ddb.h"
  2381. #ifdef DDB
  2382. #include <sys/kernel.h>
  2383. #include <sys/cons.h>
  2384. #include <ddb/ddb.h>
  2385. static int
  2386. _vm_object_in_map(vm_map_t map, vm_object_t object, vm_map_entry_t entry)
  2387. {
  2388. vm_map_t tmpm;
  2389. vm_map_entry_t tmpe;
  2390. vm_object_t obj;
  2391. if (map == 0)
  2392. return 0;
  2393. if (entry == 0) {
  2394. VM_MAP_ENTRY_FOREACH(tmpe, map) {
  2395. if (_vm_object_in_map(map, object, tmpe)) {
  2396. return 1;
  2397. }
  2398. }
  2399. } else if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
  2400. tmpm = entry->object.sub_map;
  2401. VM_MAP_ENTRY_FOREACH(tmpe, tmpm) {
  2402. if (_vm_object_in_map(tmpm, object, tmpe)) {
  2403. return 1;
  2404. }
  2405. }
  2406. } else if ((obj = entry->object.vm_object) != NULL) {
  2407. for (; obj; obj = obj->backing_object)
  2408. if (obj == object) {
  2409. return 1;
  2410. }
  2411. }
  2412. return 0;
  2413. }
  2414. static int
  2415. vm_object_in_map(vm_object_t object)
  2416. {
  2417. struct proc *p;
  2418. /* sx_slock(&allproc_lock); */
  2419. FOREACH_PROC_IN_SYSTEM(p) {
  2420. if (!p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */)
  2421. continue;
  2422. if (_vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) {
  2423. /* sx_sunlock(&allproc_lock); */
  2424. return 1;
  2425. }
  2426. }
  2427. /* sx_sunlock(&allproc_lock); */
  2428. if (_vm_object_in_map(kernel_map, object, 0))
  2429. return 1;
  2430. return 0;
  2431. }
  2432. DB_SHOW_COMMAND_FLAGS(vmochk, vm_object_check, DB_CMD_MEMSAFE)
  2433. {
  2434. vm_object_t object;
  2435. /*
  2436. * make sure that internal objs are in a map somewhere
  2437. * and none have zero ref counts.
  2438. */
  2439. TAILQ_FOREACH(object, &vm_object_list, object_list) {
  2440. if ((object->flags & OBJ_ANON) != 0) {
  2441. if (object->ref_count == 0) {
  2442. db_printf("vmochk: internal obj has zero ref count: %ld\n",
  2443. (long)object->size);
  2444. }
  2445. if (!vm_object_in_map(object)) {
  2446. db_printf(
  2447. "vmochk: internal obj is not in a map: "
  2448. "ref: %d, size: %lu: 0x%lx, backing_object: %p\n",
  2449. object->ref_count, (u_long)object->size,
  2450. (u_long)object->size,
  2451. (void *)object->backing_object);
  2452. }
  2453. }
  2454. if (db_pager_quit)
  2455. return;
  2456. }
  2457. }
  2458. /*
  2459. * vm_object_print: [ debug ]
  2460. */
  2461. DB_SHOW_COMMAND(object, vm_object_print_static)
  2462. {
  2463. /* XXX convert args. */
  2464. vm_object_t object = (vm_object_t)addr;
  2465. boolean_t full = have_addr;
  2466. vm_page_t p;
  2467. /* XXX count is an (unused) arg. Avoid shadowing it. */
  2468. #define count was_count
  2469. int count;
  2470. if (object == NULL)
  2471. return;
  2472. db_iprintf(
  2473. "Object %p: type=%d, size=0x%jx, res=%d, ref=%d, flags=0x%x ruid %d charge %jx\n",
  2474. object, (int)object->type, (uintmax_t)object->size,
  2475. object->resident_page_count, object->ref_count, object->flags,
  2476. object->cred ? object->cred->cr_ruid : -1, (uintmax_t)object->charge);
  2477. db_iprintf(" sref=%d, backing_object(%d)=(%p)+0x%jx\n",
  2478. atomic_load_int(&object->shadow_count),
  2479. object->backing_object ? object->backing_object->ref_count : 0,
  2480. object->backing_object, (uintmax_t)object->backing_object_offset);
  2481. if (!full)
  2482. return;
  2483. db_indent += 2;
  2484. count = 0;
  2485. TAILQ_FOREACH(p, &object->memq, listq) {
  2486. if (count == 0)
  2487. db_iprintf("memory:=");
  2488. else if (count == 6) {
  2489. db_printf("\n");
  2490. db_iprintf(" ...");
  2491. count = 0;
  2492. } else
  2493. db_printf(",");
  2494. count++;
  2495. db_printf("(off=0x%jx,page=0x%jx)",
  2496. (uintmax_t)p->pindex, (uintmax_t)VM_PAGE_TO_PHYS(p));
  2497. if (db_pager_quit)
  2498. break;
  2499. }
  2500. if (count != 0)
  2501. db_printf("\n");
  2502. db_indent -= 2;
  2503. }
  2504. /* XXX. */
  2505. #undef count
  2506. /* XXX need this non-static entry for calling from vm_map_print. */
  2507. void
  2508. vm_object_print(
  2509. /* db_expr_t */ long addr,
  2510. boolean_t have_addr,
  2511. /* db_expr_t */ long count,
  2512. char *modif)
  2513. {
  2514. vm_object_print_static(addr, have_addr, count, modif);
  2515. }
  2516. DB_SHOW_COMMAND_FLAGS(vmopag, vm_object_print_pages, DB_CMD_MEMSAFE)
  2517. {
  2518. vm_object_t object;
  2519. vm_pindex_t fidx;
  2520. vm_paddr_t pa;
  2521. vm_page_t m, prev_m;
  2522. int rcount;
  2523. TAILQ_FOREACH(object, &vm_object_list, object_list) {
  2524. db_printf("new object: %p\n", (void *)object);
  2525. if (db_pager_quit)
  2526. return;
  2527. rcount = 0;
  2528. fidx = 0;
  2529. pa = -1;
  2530. TAILQ_FOREACH(m, &object->memq, listq) {
  2531. if ((prev_m = TAILQ_PREV(m, pglist, listq)) != NULL &&
  2532. prev_m->pindex + 1 != m->pindex) {
  2533. if (rcount) {
  2534. db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
  2535. (long)fidx, rcount, (long)pa);
  2536. if (db_pager_quit)
  2537. return;
  2538. rcount = 0;
  2539. }
  2540. }
  2541. if (rcount &&
  2542. (VM_PAGE_TO_PHYS(m) == pa + rcount * PAGE_SIZE)) {
  2543. ++rcount;
  2544. continue;
  2545. }
  2546. if (rcount) {
  2547. db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
  2548. (long)fidx, rcount, (long)pa);
  2549. if (db_pager_quit)
  2550. return;
  2551. }
  2552. fidx = m->pindex;
  2553. pa = VM_PAGE_TO_PHYS(m);
  2554. rcount = 1;
  2555. }
  2556. if (rcount) {
  2557. db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
  2558. (long)fidx, rcount, (long)pa);
  2559. if (db_pager_quit)
  2560. return;
  2561. }
  2562. }
  2563. }
  2564. #endif /* DDB */