vfs_subr.c 51 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254
  1. /* $OpenBSD: vfs_subr.c,v 1.232 2015/07/16 18:17:27 claudio Exp $ */
  2. /* $NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $ */
  3. /*
  4. * Copyright (c) 1989, 1993
  5. * The Regents of the University of California. All rights reserved.
  6. * (c) UNIX System Laboratories, Inc.
  7. * All or some portions of this file are derived from material licensed
  8. * to the University of California by American Telephone and Telegraph
  9. * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  10. * the permission of UNIX System Laboratories, Inc.
  11. *
  12. * Redistribution and use in source and binary forms, with or without
  13. * modification, are permitted provided that the following conditions
  14. * are met:
  15. * 1. Redistributions of source code must retain the above copyright
  16. * notice, this list of conditions and the following disclaimer.
  17. * 2. Redistributions in binary form must reproduce the above copyright
  18. * notice, this list of conditions and the following disclaimer in the
  19. * documentation and/or other materials provided with the distribution.
  20. * 3. Neither the name of the University nor the names of its contributors
  21. * may be used to endorse or promote products derived from this software
  22. * without specific prior written permission.
  23. *
  24. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  25. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  26. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  27. * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  28. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  29. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  30. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  31. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  32. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  33. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  34. * SUCH DAMAGE.
  35. *
  36. * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94
  37. */
  38. /*
  39. * External virtual filesystem routines
  40. */
  41. #include <sys/param.h>
  42. #include <sys/systm.h>
  43. #include <sys/proc.h>
  44. #include <sys/sysctl.h>
  45. #include <sys/mount.h>
  46. #include <sys/time.h>
  47. #include <sys/fcntl.h>
  48. #include <sys/kernel.h>
  49. #include <sys/vnode.h>
  50. #include <sys/lock.h>
  51. #include <sys/stat.h>
  52. #include <sys/acct.h>
  53. #include <sys/namei.h>
  54. #include <sys/ucred.h>
  55. #include <sys/buf.h>
  56. #include <sys/errno.h>
  57. #include <sys/malloc.h>
  58. #include <sys/mbuf.h>
  59. #include <sys/syscallargs.h>
  60. #include <sys/pool.h>
  61. #include <sys/tree.h>
  62. #include <sys/specdev.h>
  63. #include <netinet/in.h>
  64. #include <uvm/uvm_extern.h>
  65. #include <uvm/uvm_vnode.h>
  66. #include "softraid.h"
  67. void sr_shutdown(void);
  68. enum vtype iftovt_tab[16] = {
  69. VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
  70. VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
  71. };
  72. int vttoif_tab[9] = {
  73. 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
  74. S_IFSOCK, S_IFIFO, S_IFMT,
  75. };
  76. int doforce = 1; /* 1 => permit forcible unmounting */
  77. int prtactive = 0; /* 1 => print out reclaim of active vnodes */
  78. int suid_clear = 1; /* 1 => clear SUID / SGID on owner change */
  79. /*
  80. * Insq/Remq for the vnode usage lists.
  81. */
  82. #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
  83. #define bufremvn(bp) { \
  84. LIST_REMOVE(bp, b_vnbufs); \
  85. LIST_NEXT(bp, b_vnbufs) = NOLIST; \
  86. }
  87. struct freelst vnode_hold_list; /* list of vnodes referencing buffers */
  88. struct freelst vnode_free_list; /* vnode free list */
  89. struct mntlist mountlist; /* mounted filesystem list */
  90. void vclean(struct vnode *, int, struct proc *);
  91. void insmntque(struct vnode *, struct mount *);
  92. int getdevvp(dev_t, struct vnode **, enum vtype);
  93. int vfs_hang_addrlist(struct mount *, struct netexport *,
  94. struct export_args *);
  95. int vfs_free_netcred(struct radix_node *, void *, u_int);
  96. void vfs_free_addrlist(struct netexport *);
  97. void vputonfreelist(struct vnode *);
  98. int vflush_vnode(struct vnode *, void *);
  99. int maxvnodes;
  100. #ifdef DEBUG
  101. void printlockedvnodes(void);
  102. #endif
  103. struct pool vnode_pool;
  104. struct pool uvm_vnode_pool;
  105. static int rb_buf_compare(struct buf *b1, struct buf *b2);
  106. RB_GENERATE(buf_rb_bufs, buf, b_rbbufs, rb_buf_compare);
  107. static int
  108. rb_buf_compare(struct buf *b1, struct buf *b2)
  109. {
  110. if (b1->b_lblkno < b2->b_lblkno)
  111. return(-1);
  112. if (b1->b_lblkno > b2->b_lblkno)
  113. return(1);
  114. return(0);
  115. }
  116. /*
  117. * Initialize the vnode management data structures.
  118. */
  119. void
  120. vntblinit(void)
  121. {
  122. /* buffer cache may need a vnode for each buffer */
  123. maxvnodes = 2 * initialvnodes;
  124. pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, PR_WAITOK,
  125. "vnodes", NULL);
  126. pool_init(&uvm_vnode_pool, sizeof(struct uvm_vnode), 0, 0, PR_WAITOK,
  127. "uvmvnodes", NULL);
  128. TAILQ_INIT(&vnode_hold_list);
  129. TAILQ_INIT(&vnode_free_list);
  130. TAILQ_INIT(&mountlist);
  131. /*
  132. * Initialize the filesystem syncer.
  133. */
  134. vn_initialize_syncerd();
  135. }
  136. /*
  137. * Mark a mount point as busy. Used to synchronize access and to delay
  138. * unmounting.
  139. *
  140. * Default behaviour is to attempt getting a READ lock and in case of an
  141. * ongoing unmount, to wait for it to finish and then return failure.
  142. */
  143. int
  144. vfs_busy(struct mount *mp, int flags)
  145. {
  146. int rwflags = 0;
  147. /* new mountpoints need their lock initialised */
  148. if (mp->mnt_lock.rwl_name == NULL)
  149. rw_init(&mp->mnt_lock, "vfslock");
  150. if (flags & VB_WRITE)
  151. rwflags |= RW_WRITE;
  152. else
  153. rwflags |= RW_READ;
  154. if (flags & VB_WAIT)
  155. rwflags |= RW_SLEEPFAIL;
  156. else
  157. rwflags |= RW_NOSLEEP;
  158. if (rw_enter(&mp->mnt_lock, rwflags))
  159. return (EBUSY);
  160. return (0);
  161. }
  162. /*
  163. * Free a busy file system
  164. */
  165. void
  166. vfs_unbusy(struct mount *mp)
  167. {
  168. rw_exit(&mp->mnt_lock);
  169. }
  170. int
  171. vfs_isbusy(struct mount *mp)
  172. {
  173. if (RWLOCK_OWNER(&mp->mnt_lock) > 0)
  174. return (1);
  175. else
  176. return (0);
  177. }
  178. /*
  179. * Lookup a filesystem type, and if found allocate and initialize
  180. * a mount structure for it.
  181. *
  182. * Devname is usually updated by mount(8) after booting.
  183. */
  184. int
  185. vfs_rootmountalloc(char *fstypename, char *devname, struct mount **mpp)
  186. {
  187. struct vfsconf *vfsp;
  188. struct mount *mp;
  189. for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
  190. if (!strcmp(vfsp->vfc_name, fstypename))
  191. break;
  192. if (vfsp == NULL)
  193. return (ENODEV);
  194. mp = malloc(sizeof(*mp), M_MOUNT, M_WAITOK|M_ZERO);
  195. (void)vfs_busy(mp, VB_READ|VB_NOWAIT);
  196. LIST_INIT(&mp->mnt_vnodelist);
  197. mp->mnt_vfc = vfsp;
  198. mp->mnt_op = vfsp->vfc_vfsops;
  199. mp->mnt_flag = MNT_RDONLY;
  200. mp->mnt_vnodecovered = NULLVP;
  201. vfsp->vfc_refcount++;
  202. mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
  203. strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
  204. mp->mnt_stat.f_mntonname[0] = '/';
  205. copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN, 0);
  206. copystr(devname, mp->mnt_stat.f_mntfromspec, MNAMELEN, 0);
  207. *mpp = mp;
  208. return (0);
  209. }
  210. /*
  211. * Lookup a mount point by filesystem identifier.
  212. */
  213. struct mount *
  214. vfs_getvfs(fsid_t *fsid)
  215. {
  216. struct mount *mp;
  217. TAILQ_FOREACH(mp, &mountlist, mnt_list) {
  218. if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
  219. mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
  220. return (mp);
  221. }
  222. }
  223. return (NULL);
  224. }
  225. /*
  226. * Get a new unique fsid
  227. */
  228. void
  229. vfs_getnewfsid(struct mount *mp)
  230. {
  231. static u_short xxxfs_mntid;
  232. fsid_t tfsid;
  233. int mtype;
  234. mtype = mp->mnt_vfc->vfc_typenum;
  235. mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
  236. mp->mnt_stat.f_fsid.val[1] = mtype;
  237. if (xxxfs_mntid == 0)
  238. ++xxxfs_mntid;
  239. tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
  240. tfsid.val[1] = mtype;
  241. if (!TAILQ_EMPTY(&mountlist)) {
  242. while (vfs_getvfs(&tfsid)) {
  243. tfsid.val[0]++;
  244. xxxfs_mntid++;
  245. }
  246. }
  247. mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
  248. }
  249. /*
  250. * Set vnode attributes to VNOVAL
  251. */
  252. void
  253. vattr_null(struct vattr *vap)
  254. {
  255. vap->va_type = VNON;
  256. /* XXX These next two used to be one line, but for a GCC bug. */
  257. vap->va_size = VNOVAL;
  258. vap->va_bytes = VNOVAL;
  259. vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
  260. vap->va_fsid = vap->va_fileid =
  261. vap->va_blocksize = vap->va_rdev =
  262. vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
  263. vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
  264. vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
  265. vap->va_flags = vap->va_gen = VNOVAL;
  266. vap->va_vaflags = 0;
  267. }
  268. /*
  269. * Routines having to do with the management of the vnode table.
  270. */
  271. long numvnodes;
  272. /*
  273. * Return the next vnode from the free list.
  274. */
  275. int
  276. getnewvnode(enum vtagtype tag, struct mount *mp, struct vops *vops,
  277. struct vnode **vpp)
  278. {
  279. struct proc *p = curproc;
  280. struct freelst *listhd;
  281. static int toggle;
  282. struct vnode *vp;
  283. int s;
  284. /*
  285. * allow maxvnodes to increase if the buffer cache itself
  286. * is big enough to justify it. (we don't shrink it ever)
  287. */
  288. maxvnodes = maxvnodes < bcstats.numbufs ? bcstats.numbufs
  289. : maxvnodes;
  290. /*
  291. * We must choose whether to allocate a new vnode or recycle an
  292. * existing one. The criterion for allocating a new one is that
  293. * the total number of vnodes is less than the number desired or
  294. * there are no vnodes on either free list. Generally we only
  295. * want to recycle vnodes that have no buffers associated with
  296. * them, so we look first on the vnode_free_list. If it is empty,
  297. * we next consider vnodes with referencing buffers on the
  298. * vnode_hold_list. The toggle ensures that half the time we
  299. * will use a buffer from the vnode_hold_list, and half the time
  300. * we will allocate a new one unless the list has grown to twice
  301. * the desired size. We are reticent to recycle vnodes from the
  302. * vnode_hold_list because we will lose the identity of all its
  303. * referencing buffers.
  304. */
  305. toggle ^= 1;
  306. if (numvnodes / 2 > maxvnodes)
  307. toggle = 0;
  308. s = splbio();
  309. if ((numvnodes < maxvnodes) ||
  310. ((TAILQ_FIRST(listhd = &vnode_free_list) == NULL) &&
  311. ((TAILQ_FIRST(listhd = &vnode_hold_list) == NULL) || toggle))) {
  312. splx(s);
  313. vp = pool_get(&vnode_pool, PR_WAITOK | PR_ZERO);
  314. vp->v_uvm = pool_get(&uvm_vnode_pool, PR_WAITOK | PR_ZERO);
  315. vp->v_uvm->u_vnode = vp;
  316. RB_INIT(&vp->v_bufs_tree);
  317. RB_INIT(&vp->v_nc_tree);
  318. TAILQ_INIT(&vp->v_cache_dst);
  319. numvnodes++;
  320. } else {
  321. for (vp = TAILQ_FIRST(listhd); vp != NULLVP;
  322. vp = TAILQ_NEXT(vp, v_freelist)) {
  323. if (VOP_ISLOCKED(vp) == 0)
  324. break;
  325. }
  326. /*
  327. * Unless this is a bad time of the month, at most
  328. * the first NCPUS items on the free list are
  329. * locked, so this is close enough to being empty.
  330. */
  331. if (vp == NULL) {
  332. splx(s);
  333. tablefull("vnode");
  334. *vpp = 0;
  335. return (ENFILE);
  336. }
  337. #ifdef DIAGNOSTIC
  338. if (vp->v_usecount) {
  339. vprint("free vnode", vp);
  340. panic("free vnode isn't");
  341. }
  342. #endif
  343. TAILQ_REMOVE(listhd, vp, v_freelist);
  344. vp->v_bioflag &= ~VBIOONFREELIST;
  345. splx(s);
  346. if (vp->v_type != VBAD)
  347. vgonel(vp, p);
  348. #ifdef DIAGNOSTIC
  349. if (vp->v_data) {
  350. vprint("cleaned vnode", vp);
  351. panic("cleaned vnode isn't");
  352. }
  353. s = splbio();
  354. if (vp->v_numoutput)
  355. panic("Clean vnode has pending I/O's");
  356. splx(s);
  357. #endif
  358. vp->v_flag = 0;
  359. vp->v_socket = 0;
  360. }
  361. cache_purge(vp);
  362. vp->v_type = VNON;
  363. vp->v_tag = tag;
  364. vp->v_op = vops;
  365. insmntque(vp, mp);
  366. *vpp = vp;
  367. vp->v_usecount = 1;
  368. vp->v_data = 0;
  369. return (0);
  370. }
  371. /*
  372. * Move a vnode from one mount queue to another.
  373. */
  374. void
  375. insmntque(struct vnode *vp, struct mount *mp)
  376. {
  377. /*
  378. * Delete from old mount point vnode list, if on one.
  379. */
  380. if (vp->v_mount != NULL)
  381. LIST_REMOVE(vp, v_mntvnodes);
  382. /*
  383. * Insert into list of vnodes for the new mount point, if available.
  384. */
  385. if ((vp->v_mount = mp) != NULL)
  386. LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
  387. }
  388. /*
  389. * Create a vnode for a block device.
  390. * Used for root filesystem, argdev, and swap areas.
  391. * Also used for memory file system special devices.
  392. */
  393. int
  394. bdevvp(dev_t dev, struct vnode **vpp)
  395. {
  396. return (getdevvp(dev, vpp, VBLK));
  397. }
  398. /*
  399. * Create a vnode for a character device.
  400. * Used for console handling.
  401. */
  402. int
  403. cdevvp(dev_t dev, struct vnode **vpp)
  404. {
  405. return (getdevvp(dev, vpp, VCHR));
  406. }
  407. /*
  408. * Create a vnode for a device.
  409. * Used by bdevvp (block device) for root file system etc.,
  410. * and by cdevvp (character device) for console.
  411. */
  412. int
  413. getdevvp(dev_t dev, struct vnode **vpp, enum vtype type)
  414. {
  415. struct vnode *vp;
  416. struct vnode *nvp;
  417. int error;
  418. if (dev == NODEV) {
  419. *vpp = NULLVP;
  420. return (0);
  421. }
  422. error = getnewvnode(VT_NON, NULL, &spec_vops, &nvp);
  423. if (error) {
  424. *vpp = NULLVP;
  425. return (error);
  426. }
  427. vp = nvp;
  428. vp->v_type = type;
  429. if ((nvp = checkalias(vp, dev, NULL)) != 0) {
  430. vput(vp);
  431. vp = nvp;
  432. }
  433. *vpp = vp;
  434. return (0);
  435. }
  436. /*
  437. * Check to see if the new vnode represents a special device
  438. * for which we already have a vnode (either because of
  439. * bdevvp() or because of a different vnode representing
  440. * the same block device). If such an alias exists, deallocate
  441. * the existing contents and return the aliased vnode. The
  442. * caller is responsible for filling it with its new contents.
  443. */
  444. struct vnode *
  445. checkalias(struct vnode *nvp, dev_t nvp_rdev, struct mount *mp)
  446. {
  447. struct proc *p = curproc;
  448. struct vnode *vp;
  449. struct vnode **vpp;
  450. if (nvp->v_type != VBLK && nvp->v_type != VCHR)
  451. return (NULLVP);
  452. vpp = &speclisth[SPECHASH(nvp_rdev)];
  453. loop:
  454. for (vp = *vpp; vp; vp = vp->v_specnext) {
  455. if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) {
  456. continue;
  457. }
  458. /*
  459. * Alias, but not in use, so flush it out.
  460. */
  461. if (vp->v_usecount == 0) {
  462. vgonel(vp, p);
  463. goto loop;
  464. }
  465. if (vget(vp, LK_EXCLUSIVE, p)) {
  466. goto loop;
  467. }
  468. break;
  469. }
  470. /*
  471. * Common case is actually in the if statement
  472. */
  473. if (vp == NULL || !(vp->v_tag == VT_NON && vp->v_type == VBLK)) {
  474. nvp->v_specinfo = malloc(sizeof(struct specinfo), M_VNODE,
  475. M_WAITOK);
  476. nvp->v_rdev = nvp_rdev;
  477. nvp->v_hashchain = vpp;
  478. nvp->v_specnext = *vpp;
  479. nvp->v_specmountpoint = NULL;
  480. nvp->v_speclockf = NULL;
  481. memset(nvp->v_specbitmap, 0, sizeof(nvp->v_specbitmap));
  482. *vpp = nvp;
  483. if (vp != NULLVP) {
  484. nvp->v_flag |= VALIASED;
  485. vp->v_flag |= VALIASED;
  486. vput(vp);
  487. }
  488. return (NULLVP);
  489. }
  490. /*
  491. * This code is the uncommon case. It is called in case
  492. * we found an alias that was VT_NON && vtype of VBLK
  493. * This means we found a block device that was created
  494. * using bdevvp.
  495. * An example of such a vnode is the root partition device vnode
  496. * created in ffs_mountroot.
  497. *
  498. * The vnodes created by bdevvp should not be aliased (why?).
  499. */
  500. VOP_UNLOCK(vp, 0, p);
  501. vclean(vp, 0, p);
  502. vp->v_op = nvp->v_op;
  503. vp->v_tag = nvp->v_tag;
  504. nvp->v_type = VNON;
  505. insmntque(vp, mp);
  506. return (vp);
  507. }
  508. /*
  509. * Grab a particular vnode from the free list, increment its
  510. * reference count and lock it. If the vnode lock bit is set,
  511. * the vnode is being eliminated in vgone. In that case, we
  512. * cannot grab it, so the process is awakened when the
  513. * transition is completed, and an error code is returned to
  514. * indicate that the vnode is no longer usable, possibly
  515. * having been changed to a new file system type.
  516. */
  517. int
  518. vget(struct vnode *vp, int flags, struct proc *p)
  519. {
  520. int error, s, onfreelist;
  521. /*
  522. * If the vnode is in the process of being cleaned out for
  523. * another use, we wait for the cleaning to finish and then
  524. * return failure. Cleaning is determined by checking that
  525. * the VXLOCK flag is set.
  526. */
  527. if (vp->v_flag & VXLOCK) {
  528. if (flags & LK_NOWAIT) {
  529. return (EBUSY);
  530. }
  531. vp->v_flag |= VXWANT;
  532. tsleep(vp, PINOD, "vget", 0);
  533. return (ENOENT);
  534. }
  535. onfreelist = vp->v_bioflag & VBIOONFREELIST;
  536. if (vp->v_usecount == 0 && onfreelist) {
  537. s = splbio();
  538. if (vp->v_holdcnt > 0)
  539. TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
  540. else
  541. TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
  542. vp->v_bioflag &= ~VBIOONFREELIST;
  543. splx(s);
  544. }
  545. vp->v_usecount++;
  546. if (flags & LK_TYPE_MASK) {
  547. if ((error = vn_lock(vp, flags, p)) != 0) {
  548. vp->v_usecount--;
  549. if (vp->v_usecount == 0 && onfreelist)
  550. vputonfreelist(vp);
  551. }
  552. return (error);
  553. }
  554. return (0);
  555. }
  556. /* Vnode reference. */
  557. void
  558. vref(struct vnode *vp)
  559. {
  560. #ifdef DIAGNOSTIC
  561. if (vp->v_usecount == 0)
  562. panic("vref used where vget required");
  563. if (vp->v_type == VNON)
  564. panic("vref on a VNON vnode");
  565. #endif
  566. vp->v_usecount++;
  567. }
  568. void
  569. vputonfreelist(struct vnode *vp)
  570. {
  571. int s;
  572. struct freelst *lst;
  573. s = splbio();
  574. #ifdef DIAGNOSTIC
  575. if (vp->v_usecount != 0)
  576. panic("Use count is not zero!");
  577. if (vp->v_bioflag & VBIOONFREELIST) {
  578. vprint("vnode already on free list: ", vp);
  579. panic("vnode already on free list");
  580. }
  581. #endif
  582. vp->v_bioflag |= VBIOONFREELIST;
  583. if (vp->v_holdcnt > 0)
  584. lst = &vnode_hold_list;
  585. else
  586. lst = &vnode_free_list;
  587. if (vp->v_type == VBAD)
  588. TAILQ_INSERT_HEAD(lst, vp, v_freelist);
  589. else
  590. TAILQ_INSERT_TAIL(lst, vp, v_freelist);
  591. splx(s);
  592. }
  593. /*
  594. * vput(), just unlock and vrele()
  595. */
  596. void
  597. vput(struct vnode *vp)
  598. {
  599. struct proc *p = curproc;
  600. #ifdef DIAGNOSTIC
  601. if (vp == NULL)
  602. panic("vput: null vp");
  603. #endif
  604. #ifdef DIAGNOSTIC
  605. if (vp->v_usecount == 0) {
  606. vprint("vput: bad ref count", vp);
  607. panic("vput: ref cnt");
  608. }
  609. #endif
  610. vp->v_usecount--;
  611. if (vp->v_usecount > 0) {
  612. VOP_UNLOCK(vp, 0, p);
  613. return;
  614. }
  615. #ifdef DIAGNOSTIC
  616. if (vp->v_writecount != 0) {
  617. vprint("vput: bad writecount", vp);
  618. panic("vput: v_writecount != 0");
  619. }
  620. #endif
  621. VOP_INACTIVE(vp, p);
  622. if (vp->v_usecount == 0 && !(vp->v_bioflag & VBIOONFREELIST))
  623. vputonfreelist(vp);
  624. }
  625. /*
  626. * Vnode release - use for active VNODES.
  627. * If count drops to zero, call inactive routine and return to freelist.
  628. * Returns 0 if it did not sleep.
  629. */
  630. int
  631. vrele(struct vnode *vp)
  632. {
  633. struct proc *p = curproc;
  634. #ifdef DIAGNOSTIC
  635. if (vp == NULL)
  636. panic("vrele: null vp");
  637. #endif
  638. #ifdef DIAGNOSTIC
  639. if (vp->v_usecount == 0) {
  640. vprint("vrele: bad ref count", vp);
  641. panic("vrele: ref cnt");
  642. }
  643. #endif
  644. vp->v_usecount--;
  645. if (vp->v_usecount > 0) {
  646. return (0);
  647. }
  648. #ifdef DIAGNOSTIC
  649. if (vp->v_writecount != 0) {
  650. vprint("vrele: bad writecount", vp);
  651. panic("vrele: v_writecount != 0");
  652. }
  653. #endif
  654. if (vn_lock(vp, LK_EXCLUSIVE, p)) {
  655. #ifdef DIAGNOSTIC
  656. vprint("vrele: cannot lock", vp);
  657. #endif
  658. return (1);
  659. }
  660. VOP_INACTIVE(vp, p);
  661. if (vp->v_usecount == 0 && !(vp->v_bioflag & VBIOONFREELIST))
  662. vputonfreelist(vp);
  663. return (1);
  664. }
  665. /* Page or buffer structure gets a reference. */
  666. void
  667. vhold(struct vnode *vp)
  668. {
  669. /*
  670. * If it is on the freelist and the hold count is currently
  671. * zero, move it to the hold list.
  672. */
  673. if ((vp->v_bioflag & VBIOONFREELIST) &&
  674. vp->v_holdcnt == 0 && vp->v_usecount == 0) {
  675. TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
  676. TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
  677. }
  678. vp->v_holdcnt++;
  679. }
  680. /* Lose interest in a vnode. */
  681. void
  682. vdrop(struct vnode *vp)
  683. {
  684. #ifdef DIAGNOSTIC
  685. if (vp->v_holdcnt == 0)
  686. panic("vdrop: zero holdcnt");
  687. #endif
  688. vp->v_holdcnt--;
  689. /*
  690. * If it is on the holdlist and the hold count drops to
  691. * zero, move it to the free list.
  692. */
  693. if ((vp->v_bioflag & VBIOONFREELIST) &&
  694. vp->v_holdcnt == 0 && vp->v_usecount == 0) {
  695. TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
  696. TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
  697. }
  698. }
  699. /*
  700. * Remove any vnodes in the vnode table belonging to mount point mp.
  701. *
  702. * If MNT_NOFORCE is specified, there should not be any active ones,
  703. * return error if any are found (nb: this is a user error, not a
  704. * system error). If MNT_FORCE is specified, detach any active vnodes
  705. * that are found.
  706. */
  707. #ifdef DEBUG
  708. int busyprt = 0; /* print out busy vnodes */
  709. struct ctldebug debug1 = { "busyprt", &busyprt };
  710. #endif
  711. int
  712. vfs_mount_foreach_vnode(struct mount *mp,
  713. int (*func)(struct vnode *, void *), void *arg) {
  714. struct vnode *vp, *nvp;
  715. int error = 0;
  716. loop:
  717. for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp != NULL; vp = nvp) {
  718. if (vp->v_mount != mp)
  719. goto loop;
  720. nvp = LIST_NEXT(vp, v_mntvnodes);
  721. error = func(vp, arg);
  722. if (error != 0)
  723. break;
  724. }
  725. return (error);
  726. }
  727. struct vflush_args {
  728. struct vnode *skipvp;
  729. int busy;
  730. int flags;
  731. };
  732. int
  733. vflush_vnode(struct vnode *vp, void *arg) {
  734. struct vflush_args *va = arg;
  735. struct proc *p = curproc;
  736. if (vp == va->skipvp) {
  737. return (0);
  738. }
  739. if ((va->flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
  740. return (0);
  741. }
  742. /*
  743. * If WRITECLOSE is set, only flush out regular file
  744. * vnodes open for writing.
  745. */
  746. if ((va->flags & WRITECLOSE) &&
  747. (vp->v_writecount == 0 || vp->v_type != VREG)) {
  748. return (0);
  749. }
  750. /*
  751. * With v_usecount == 0, all we need to do is clear
  752. * out the vnode data structures and we are done.
  753. */
  754. if (vp->v_usecount == 0) {
  755. vgonel(vp, p);
  756. return (0);
  757. }
  758. /*
  759. * If FORCECLOSE is set, forcibly close the vnode.
  760. * For block or character devices, revert to an
  761. * anonymous device. For all other files, just kill them.
  762. */
  763. if (va->flags & FORCECLOSE) {
  764. if (vp->v_type != VBLK && vp->v_type != VCHR) {
  765. vgonel(vp, p);
  766. } else {
  767. vclean(vp, 0, p);
  768. vp->v_op = &spec_vops;
  769. insmntque(vp, (struct mount *)0);
  770. }
  771. return (0);
  772. }
  773. #ifdef DEBUG
  774. if (busyprt)
  775. vprint("vflush: busy vnode", vp);
  776. #endif
  777. va->busy++;
  778. return (0);
  779. }
  780. int
  781. vflush(struct mount *mp, struct vnode *skipvp, int flags)
  782. {
  783. struct vflush_args va;
  784. va.skipvp = skipvp;
  785. va.busy = 0;
  786. va.flags = flags;
  787. vfs_mount_foreach_vnode(mp, vflush_vnode, &va);
  788. if (va.busy)
  789. return (EBUSY);
  790. return (0);
  791. }
  792. /*
  793. * Disassociate the underlying file system from a vnode.
  794. */
  795. void
  796. vclean(struct vnode *vp, int flags, struct proc *p)
  797. {
  798. int active;
  799. /*
  800. * Check to see if the vnode is in use.
  801. * If so we have to reference it before we clean it out
  802. * so that its count cannot fall to zero and generate a
  803. * race against ourselves to recycle it.
  804. */
  805. if ((active = vp->v_usecount) != 0)
  806. vp->v_usecount++;
  807. /*
  808. * Prevent the vnode from being recycled or
  809. * brought into use while we clean it out.
  810. */
  811. if (vp->v_flag & VXLOCK)
  812. panic("vclean: deadlock");
  813. vp->v_flag |= VXLOCK;
  814. /*
  815. * Even if the count is zero, the VOP_INACTIVE routine may still
  816. * have the object locked while it cleans it out. The VOP_LOCK
  817. * ensures that the VOP_INACTIVE routine is done with its work.
  818. * For active vnodes, it ensures that no other activity can
  819. * occur while the underlying object is being cleaned out.
  820. */
  821. VOP_LOCK(vp, LK_DRAIN, p);
  822. /*
  823. * Clean out any VM data associated with the vnode.
  824. */
  825. uvm_vnp_terminate(vp);
  826. /*
  827. * Clean out any buffers associated with the vnode.
  828. */
  829. if (flags & DOCLOSE)
  830. vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
  831. /*
  832. * If purging an active vnode, it must be closed and
  833. * deactivated before being reclaimed. Note that the
  834. * VOP_INACTIVE will unlock the vnode
  835. */
  836. if (active) {
  837. if (flags & DOCLOSE)
  838. VOP_CLOSE(vp, FNONBLOCK, NOCRED, p);
  839. VOP_INACTIVE(vp, p);
  840. } else {
  841. /*
  842. * Any other processes trying to obtain this lock must first
  843. * wait for VXLOCK to clear, then call the new lock operation.
  844. */
  845. VOP_UNLOCK(vp, 0, p);
  846. }
  847. /*
  848. * Reclaim the vnode.
  849. */
  850. if (VOP_RECLAIM(vp, p))
  851. panic("vclean: cannot reclaim");
  852. if (active) {
  853. vp->v_usecount--;
  854. if (vp->v_usecount == 0) {
  855. if (vp->v_holdcnt > 0)
  856. panic("vclean: not clean");
  857. vputonfreelist(vp);
  858. }
  859. }
  860. cache_purge(vp);
  861. /*
  862. * Done with purge, notify sleepers of the grim news.
  863. */
  864. vp->v_op = &dead_vops;
  865. VN_KNOTE(vp, NOTE_REVOKE);
  866. vp->v_tag = VT_NON;
  867. vp->v_flag &= ~VXLOCK;
  868. #ifdef VFSLCKDEBUG
  869. vp->v_flag &= ~VLOCKSWORK;
  870. #endif
  871. if (vp->v_flag & VXWANT) {
  872. vp->v_flag &= ~VXWANT;
  873. wakeup(vp);
  874. }
  875. }
  876. /*
  877. * Recycle an unused vnode to the front of the free list.
  878. */
  879. int
  880. vrecycle(struct vnode *vp, struct proc *p)
  881. {
  882. if (vp->v_usecount == 0) {
  883. vgonel(vp, p);
  884. return (1);
  885. }
  886. return (0);
  887. }
  888. /*
  889. * Eliminate all activity associated with a vnode
  890. * in preparation for reuse.
  891. */
  892. void
  893. vgone(struct vnode *vp)
  894. {
  895. struct proc *p = curproc;
  896. vgonel(vp, p);
  897. }
  898. /*
  899. * vgone, with struct proc.
  900. */
  901. void
  902. vgonel(struct vnode *vp, struct proc *p)
  903. {
  904. struct vnode *vq;
  905. struct vnode *vx;
  906. /*
  907. * If a vgone (or vclean) is already in progress,
  908. * wait until it is done and return.
  909. */
  910. if (vp->v_flag & VXLOCK) {
  911. vp->v_flag |= VXWANT;
  912. tsleep(vp, PINOD, "vgone", 0);
  913. return;
  914. }
  915. /*
  916. * Clean out the filesystem specific data.
  917. */
  918. vclean(vp, DOCLOSE, p);
  919. /*
  920. * Delete from old mount point vnode list, if on one.
  921. */
  922. if (vp->v_mount != NULL)
  923. insmntque(vp, (struct mount *)0);
  924. /*
  925. * If special device, remove it from special device alias list
  926. * if it is on one.
  927. */
  928. if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
  929. if (*vp->v_hashchain == vp) {
  930. *vp->v_hashchain = vp->v_specnext;
  931. } else {
  932. for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
  933. if (vq->v_specnext != vp)
  934. continue;
  935. vq->v_specnext = vp->v_specnext;
  936. break;
  937. }
  938. if (vq == NULL)
  939. panic("missing bdev");
  940. }
  941. if (vp->v_flag & VALIASED) {
  942. vx = NULL;
  943. for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
  944. if (vq->v_rdev != vp->v_rdev ||
  945. vq->v_type != vp->v_type)
  946. continue;
  947. if (vx)
  948. break;
  949. vx = vq;
  950. }
  951. if (vx == NULL)
  952. panic("missing alias");
  953. if (vq == NULL)
  954. vx->v_flag &= ~VALIASED;
  955. vp->v_flag &= ~VALIASED;
  956. }
  957. free(vp->v_specinfo, M_VNODE, sizeof(struct specinfo));
  958. vp->v_specinfo = NULL;
  959. }
  960. /*
  961. * If it is on the freelist and not already at the head,
  962. * move it to the head of the list.
  963. */
  964. vp->v_type = VBAD;
  965. /*
  966. * Move onto the free list, unless we were called from
  967. * getnewvnode and we're not on any free list
  968. */
  969. if (vp->v_usecount == 0 &&
  970. (vp->v_bioflag & VBIOONFREELIST)) {
  971. int s;
  972. s = splbio();
  973. if (vp->v_holdcnt > 0)
  974. panic("vgonel: not clean");
  975. if (TAILQ_FIRST(&vnode_free_list) != vp) {
  976. TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
  977. TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
  978. }
  979. splx(s);
  980. }
  981. }
  982. /*
  983. * Lookup a vnode by device number.
  984. */
  985. int
  986. vfinddev(dev_t dev, enum vtype type, struct vnode **vpp)
  987. {
  988. struct vnode *vp;
  989. int rc =0;
  990. for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
  991. if (dev != vp->v_rdev || type != vp->v_type)
  992. continue;
  993. *vpp = vp;
  994. rc = 1;
  995. break;
  996. }
  997. return (rc);
  998. }
  999. /*
  1000. * Revoke all the vnodes corresponding to the specified minor number
  1001. * range (endpoints inclusive) of the specified major.
  1002. */
  1003. void
  1004. vdevgone(int maj, int minl, int minh, enum vtype type)
  1005. {
  1006. struct vnode *vp;
  1007. int mn;
  1008. for (mn = minl; mn <= minh; mn++)
  1009. if (vfinddev(makedev(maj, mn), type, &vp))
  1010. VOP_REVOKE(vp, REVOKEALL);
  1011. }
  1012. /*
  1013. * Calculate the total number of references to a special device.
  1014. */
  1015. int
  1016. vcount(struct vnode *vp)
  1017. {
  1018. struct vnode *vq, *vnext;
  1019. int count;
  1020. loop:
  1021. if ((vp->v_flag & VALIASED) == 0)
  1022. return (vp->v_usecount);
  1023. for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
  1024. vnext = vq->v_specnext;
  1025. if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
  1026. continue;
  1027. /*
  1028. * Alias, but not in use, so flush it out.
  1029. */
  1030. if (vq->v_usecount == 0 && vq != vp) {
  1031. vgone(vq);
  1032. goto loop;
  1033. }
  1034. count += vq->v_usecount;
  1035. }
  1036. return (count);
  1037. }
  1038. #if defined(DEBUG) || defined(DIAGNOSTIC)
  1039. /*
  1040. * Print out a description of a vnode.
  1041. */
  1042. static char *typename[] =
  1043. { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
  1044. void
  1045. vprint(char *label, struct vnode *vp)
  1046. {
  1047. char buf[64];
  1048. if (label != NULL)
  1049. printf("%s: ", label);
  1050. printf("%p, type %s, use %u, write %u, hold %u,",
  1051. vp, typename[vp->v_type], vp->v_usecount, vp->v_writecount,
  1052. vp->v_holdcnt);
  1053. buf[0] = '\0';
  1054. if (vp->v_flag & VROOT)
  1055. strlcat(buf, "|VROOT", sizeof buf);
  1056. if (vp->v_flag & VTEXT)
  1057. strlcat(buf, "|VTEXT", sizeof buf);
  1058. if (vp->v_flag & VSYSTEM)
  1059. strlcat(buf, "|VSYSTEM", sizeof buf);
  1060. if (vp->v_flag & VXLOCK)
  1061. strlcat(buf, "|VXLOCK", sizeof buf);
  1062. if (vp->v_flag & VXWANT)
  1063. strlcat(buf, "|VXWANT", sizeof buf);
  1064. if (vp->v_bioflag & VBIOWAIT)
  1065. strlcat(buf, "|VBIOWAIT", sizeof buf);
  1066. if (vp->v_bioflag & VBIOONFREELIST)
  1067. strlcat(buf, "|VBIOONFREELIST", sizeof buf);
  1068. if (vp->v_bioflag & VBIOONSYNCLIST)
  1069. strlcat(buf, "|VBIOONSYNCLIST", sizeof buf);
  1070. if (vp->v_flag & VALIASED)
  1071. strlcat(buf, "|VALIASED", sizeof buf);
  1072. if (buf[0] != '\0')
  1073. printf(" flags (%s)", &buf[1]);
  1074. if (vp->v_data == NULL) {
  1075. printf("\n");
  1076. } else {
  1077. printf("\n\t");
  1078. VOP_PRINT(vp);
  1079. }
  1080. }
  1081. #endif /* DEBUG || DIAGNOSTIC */
  1082. #ifdef DEBUG
  1083. /*
  1084. * List all of the locked vnodes in the system.
  1085. * Called when debugging the kernel.
  1086. */
  1087. void
  1088. printlockedvnodes(void)
  1089. {
  1090. struct mount *mp, *nmp;
  1091. struct vnode *vp;
  1092. printf("Locked vnodes\n");
  1093. TAILQ_FOREACH_SAFE(mp, &mountlist, mnt_list, nmp) {
  1094. if (vfs_busy(mp, VB_READ|VB_NOWAIT))
  1095. continue;
  1096. LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
  1097. if (VOP_ISLOCKED(vp))
  1098. vprint((char *)0, vp);
  1099. }
  1100. vfs_unbusy(mp);
  1101. }
  1102. }
  1103. #endif
  1104. /*
  1105. * Top level filesystem related information gathering.
  1106. */
  1107. int
  1108. vfs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
  1109. size_t newlen, struct proc *p)
  1110. {
  1111. struct vfsconf *vfsp, *tmpvfsp;
  1112. int ret;
  1113. /* all sysctl names at this level are at least name and field */
  1114. if (namelen < 2)
  1115. return (ENOTDIR); /* overloaded */
  1116. if (name[0] != VFS_GENERIC) {
  1117. for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
  1118. if (vfsp->vfc_typenum == name[0])
  1119. break;
  1120. if (vfsp == NULL)
  1121. return (EOPNOTSUPP);
  1122. return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
  1123. oldp, oldlenp, newp, newlen, p));
  1124. }
  1125. switch (name[1]) {
  1126. case VFS_MAXTYPENUM:
  1127. return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
  1128. case VFS_CONF:
  1129. if (namelen < 3)
  1130. return (ENOTDIR); /* overloaded */
  1131. for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
  1132. if (vfsp->vfc_typenum == name[2])
  1133. break;
  1134. if (vfsp == NULL)
  1135. return (EOPNOTSUPP);
  1136. /* Make a copy, clear out kernel pointers */
  1137. tmpvfsp = malloc(sizeof(*tmpvfsp), M_TEMP, M_WAITOK);
  1138. memcpy(tmpvfsp, vfsp, sizeof(*tmpvfsp));
  1139. tmpvfsp->vfc_vfsops = NULL;
  1140. tmpvfsp->vfc_next = NULL;
  1141. ret = sysctl_rdstruct(oldp, oldlenp, newp, tmpvfsp,
  1142. sizeof(struct vfsconf));
  1143. free(tmpvfsp, M_TEMP, sizeof(*tmpvfsp));
  1144. return (ret);
  1145. case VFS_BCACHESTAT: /* buffer cache statistics */
  1146. ret = sysctl_rdstruct(oldp, oldlenp, newp, &bcstats,
  1147. sizeof(struct bcachestats));
  1148. return(ret);
  1149. }
  1150. return (EOPNOTSUPP);
  1151. }
  1152. /*
  1153. * Check to see if a filesystem is mounted on a block device.
  1154. */
  1155. int
  1156. vfs_mountedon(struct vnode *vp)
  1157. {
  1158. struct vnode *vq;
  1159. int error = 0;
  1160. if (vp->v_specmountpoint != NULL)
  1161. return (EBUSY);
  1162. if (vp->v_flag & VALIASED) {
  1163. for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
  1164. if (vq->v_rdev != vp->v_rdev ||
  1165. vq->v_type != vp->v_type)
  1166. continue;
  1167. if (vq->v_specmountpoint != NULL) {
  1168. error = EBUSY;
  1169. break;
  1170. }
  1171. }
  1172. }
  1173. return (error);
  1174. }
  1175. /*
  1176. * Build hash lists of net addresses and hang them off the mount point.
  1177. * Called by ufs_mount() to set up the lists of export addresses.
  1178. */
  1179. int
  1180. vfs_hang_addrlist(struct mount *mp, struct netexport *nep,
  1181. struct export_args *argp)
  1182. {
  1183. struct netcred *np;
  1184. struct radix_node_head *rnh;
  1185. int nplen, i;
  1186. struct radix_node *rn;
  1187. struct sockaddr *saddr, *smask = 0;
  1188. int error;
  1189. if (argp->ex_addrlen == 0) {
  1190. if (mp->mnt_flag & MNT_DEFEXPORTED)
  1191. return (EPERM);
  1192. np = &nep->ne_defexported;
  1193. /* fill in the kernel's ucred from userspace's xucred */
  1194. if ((error = crfromxucred(&np->netc_anon, &argp->ex_anon)))
  1195. return (error);
  1196. mp->mnt_flag |= MNT_DEFEXPORTED;
  1197. goto finish;
  1198. }
  1199. if (argp->ex_addrlen > MLEN || argp->ex_masklen > MLEN ||
  1200. argp->ex_addrlen < 0 || argp->ex_masklen < 0)
  1201. return (EINVAL);
  1202. nplen = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
  1203. np = (struct netcred *)malloc(nplen, M_NETADDR, M_WAITOK|M_ZERO);
  1204. saddr = (struct sockaddr *)(np + 1);
  1205. error = copyin(argp->ex_addr, saddr, argp->ex_addrlen);
  1206. if (error)
  1207. goto out;
  1208. if (saddr->sa_len > argp->ex_addrlen)
  1209. saddr->sa_len = argp->ex_addrlen;
  1210. if (argp->ex_masklen) {
  1211. smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
  1212. error = copyin(argp->ex_mask, smask, argp->ex_masklen);
  1213. if (error)
  1214. goto out;
  1215. if (smask->sa_len > argp->ex_masklen)
  1216. smask->sa_len = argp->ex_masklen;
  1217. }
  1218. /* fill in the kernel's ucred from userspace's xucred */
  1219. if ((error = crfromxucred(&np->netc_anon, &argp->ex_anon)))
  1220. goto out;
  1221. i = saddr->sa_family;
  1222. switch (i) {
  1223. case AF_INET:
  1224. if ((rnh = nep->ne_rtable_inet) == NULL) {
  1225. if (!rn_inithead((void **)&nep->ne_rtable_inet,
  1226. offsetof(struct sockaddr_in, sin_addr) * 8)) {
  1227. error = ENOBUFS;
  1228. goto out;
  1229. }
  1230. rnh = nep->ne_rtable_inet;
  1231. }
  1232. break;
  1233. default:
  1234. error = EINVAL;
  1235. goto out;
  1236. }
  1237. rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
  1238. np->netc_rnodes, 0);
  1239. if (rn == 0 || np != (struct netcred *)rn) { /* already exists */
  1240. error = EPERM;
  1241. goto out;
  1242. }
  1243. finish:
  1244. np->netc_exflags = argp->ex_flags;
  1245. return (0);
  1246. out:
  1247. free(np, M_NETADDR, nplen);
  1248. return (error);
  1249. }
  1250. /* ARGSUSED */
  1251. int
  1252. vfs_free_netcred(struct radix_node *rn, void *w, u_int id)
  1253. {
  1254. struct radix_node_head *rnh = (struct radix_node_head *)w;
  1255. (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh, NULL);
  1256. free(rn, M_NETADDR, 0);
  1257. return (0);
  1258. }
  1259. /*
  1260. * Free the net address hash lists that are hanging off the mount points.
  1261. */
  1262. void
  1263. vfs_free_addrlist(struct netexport *nep)
  1264. {
  1265. struct radix_node_head *rnh;
  1266. if ((rnh = nep->ne_rtable_inet) != NULL) {
  1267. (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh);
  1268. free(rnh, M_RTABLE, 0);
  1269. nep->ne_rtable_inet = NULL;
  1270. }
  1271. }
  1272. int
  1273. vfs_export(struct mount *mp, struct netexport *nep, struct export_args *argp)
  1274. {
  1275. int error;
  1276. if (argp->ex_flags & MNT_DELEXPORT) {
  1277. vfs_free_addrlist(nep);
  1278. mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
  1279. }
  1280. if (argp->ex_flags & MNT_EXPORTED) {
  1281. if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0)
  1282. return (error);
  1283. mp->mnt_flag |= MNT_EXPORTED;
  1284. }
  1285. return (0);
  1286. }
  1287. struct netcred *
  1288. vfs_export_lookup(struct mount *mp, struct netexport *nep, struct mbuf *nam)
  1289. {
  1290. struct netcred *np;
  1291. struct radix_node_head *rnh;
  1292. struct sockaddr *saddr;
  1293. np = NULL;
  1294. if (mp->mnt_flag & MNT_EXPORTED) {
  1295. /*
  1296. * Lookup in the export list first.
  1297. */
  1298. if (nam != NULL) {
  1299. saddr = mtod(nam, struct sockaddr *);
  1300. switch(saddr->sa_family) {
  1301. case AF_INET:
  1302. rnh = nep->ne_rtable_inet;
  1303. break;
  1304. default:
  1305. rnh = NULL;
  1306. break;
  1307. }
  1308. if (rnh != NULL) {
  1309. np = (struct netcred *)
  1310. (*rnh->rnh_matchaddr)((caddr_t)saddr,
  1311. rnh);
  1312. }
  1313. }
  1314. /*
  1315. * If no address match, use the default if it exists.
  1316. */
  1317. if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
  1318. np = &nep->ne_defexported;
  1319. }
  1320. return (np);
  1321. }
  1322. /*
  1323. * Do the usual access checking.
  1324. * file_mode, uid and gid are from the vnode in question,
  1325. * while acc_mode and cred are from the VOP_ACCESS parameter list
  1326. */
  1327. int
  1328. vaccess(enum vtype type, mode_t file_mode, uid_t uid, gid_t gid,
  1329. mode_t acc_mode, struct ucred *cred)
  1330. {
  1331. mode_t mask;
  1332. /* User id 0 always gets read/write access. */
  1333. if (cred->cr_uid == 0) {
  1334. /* For VEXEC, at least one of the execute bits must be set. */
  1335. if ((acc_mode & VEXEC) && type != VDIR &&
  1336. (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)
  1337. return EACCES;
  1338. return 0;
  1339. }
  1340. mask = 0;
  1341. /* Otherwise, check the owner. */
  1342. if (cred->cr_uid == uid) {
  1343. if (acc_mode & VEXEC)
  1344. mask |= S_IXUSR;
  1345. if (acc_mode & VREAD)
  1346. mask |= S_IRUSR;
  1347. if (acc_mode & VWRITE)
  1348. mask |= S_IWUSR;
  1349. return (file_mode & mask) == mask ? 0 : EACCES;
  1350. }
  1351. /* Otherwise, check the groups. */
  1352. if (groupmember(gid, cred)) {
  1353. if (acc_mode & VEXEC)
  1354. mask |= S_IXGRP;
  1355. if (acc_mode & VREAD)
  1356. mask |= S_IRGRP;
  1357. if (acc_mode & VWRITE)
  1358. mask |= S_IWGRP;
  1359. return (file_mode & mask) == mask ? 0 : EACCES;
  1360. }
  1361. /* Otherwise, check everyone else. */
  1362. if (acc_mode & VEXEC)
  1363. mask |= S_IXOTH;
  1364. if (acc_mode & VREAD)
  1365. mask |= S_IROTH;
  1366. if (acc_mode & VWRITE)
  1367. mask |= S_IWOTH;
  1368. return (file_mode & mask) == mask ? 0 : EACCES;
  1369. }
  1370. /*
  1371. * Unmount all file systems.
  1372. * We traverse the list in reverse order under the assumption that doing so
  1373. * will avoid needing to worry about dependencies.
  1374. */
  1375. void
  1376. vfs_unmountall(void)
  1377. {
  1378. struct mount *mp, *nmp;
  1379. int allerror, error, again = 1;
  1380. retry:
  1381. allerror = 0;
  1382. TAILQ_FOREACH_REVERSE_SAFE(mp, &mountlist, mntlist, mnt_list, nmp) {
  1383. if ((vfs_busy(mp, VB_WRITE|VB_NOWAIT)) != 0)
  1384. continue;
  1385. if ((error = dounmount(mp, MNT_FORCE, curproc, NULL)) != 0) {
  1386. printf("unmount of %s failed with error %d\n",
  1387. mp->mnt_stat.f_mntonname, error);
  1388. allerror = 1;
  1389. }
  1390. }
  1391. if (allerror) {
  1392. printf("WARNING: some file systems would not unmount\n");
  1393. if (again) {
  1394. printf("retrying\n");
  1395. again = 0;
  1396. goto retry;
  1397. }
  1398. }
  1399. }
  1400. /*
  1401. * Sync and unmount file systems before shutting down.
  1402. */
  1403. void
  1404. vfs_shutdown(void)
  1405. {
  1406. #ifdef ACCOUNTING
  1407. acct_shutdown();
  1408. #endif
  1409. /* XXX Should suspend scheduling. */
  1410. (void) spl0();
  1411. printf("syncing disks... ");
  1412. if (panicstr == 0) {
  1413. /* Sync before unmount, in case we hang on something. */
  1414. sys_sync(&proc0, (void *)0, (register_t *)0);
  1415. /* Unmount file systems. */
  1416. vfs_unmountall();
  1417. }
  1418. if (vfs_syncwait(1))
  1419. printf("giving up\n");
  1420. else
  1421. printf("done\n");
  1422. #if NSOFTRAID > 0
  1423. sr_shutdown();
  1424. #endif
  1425. }
  1426. /*
  1427. * perform sync() operation and wait for buffers to flush.
  1428. * assumptions: called w/ scheduler disabled and physical io enabled
  1429. * for now called at spl0() XXX
  1430. */
  1431. int
  1432. vfs_syncwait(int verbose)
  1433. {
  1434. struct buf *bp;
  1435. int iter, nbusy, dcount, s;
  1436. struct proc *p;
  1437. #ifdef MULTIPROCESSOR
  1438. int hold_count;
  1439. #endif
  1440. p = curproc? curproc : &proc0;
  1441. sys_sync(p, (void *)0, (register_t *)0);
  1442. /* Wait for sync to finish. */
  1443. dcount = 10000;
  1444. for (iter = 0; iter < 20; iter++) {
  1445. nbusy = 0;
  1446. LIST_FOREACH(bp, &bufhead, b_list) {
  1447. if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY)
  1448. nbusy++;
  1449. /*
  1450. * With soft updates, some buffers that are
  1451. * written will be remarked as dirty until other
  1452. * buffers are written.
  1453. */
  1454. if (bp->b_flags & B_DELWRI) {
  1455. s = splbio();
  1456. bremfree(bp);
  1457. buf_acquire(bp);
  1458. splx(s);
  1459. nbusy++;
  1460. bawrite(bp);
  1461. if (dcount-- <= 0) {
  1462. if (verbose)
  1463. printf("softdep ");
  1464. return 1;
  1465. }
  1466. }
  1467. }
  1468. if (nbusy == 0)
  1469. break;
  1470. if (verbose)
  1471. printf("%d ", nbusy);
  1472. #ifdef MULTIPROCESSOR
  1473. if (__mp_lock_held(&kernel_lock))
  1474. hold_count = __mp_release_all(&kernel_lock);
  1475. else
  1476. hold_count = 0;
  1477. #endif
  1478. DELAY(40000 * iter);
  1479. #ifdef MULTIPROCESSOR
  1480. if (hold_count)
  1481. __mp_acquire_count(&kernel_lock, hold_count);
  1482. #endif
  1483. }
  1484. return nbusy;
  1485. }
  1486. /*
  1487. * posix file system related system variables.
  1488. */
  1489. int
  1490. fs_posix_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
  1491. void *newp, size_t newlen, struct proc *p)
  1492. {
  1493. /* all sysctl names at this level are terminal */
  1494. if (namelen != 1)
  1495. return (ENOTDIR);
  1496. switch (name[0]) {
  1497. case FS_POSIX_SETUID:
  1498. if (newp && securelevel > 0)
  1499. return (EPERM);
  1500. return(sysctl_int(oldp, oldlenp, newp, newlen, &suid_clear));
  1501. default:
  1502. return (EOPNOTSUPP);
  1503. }
  1504. /* NOTREACHED */
  1505. }
  1506. /*
  1507. * file system related system variables.
  1508. */
  1509. int
  1510. fs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
  1511. size_t newlen, struct proc *p)
  1512. {
  1513. sysctlfn *fn;
  1514. switch (name[0]) {
  1515. case FS_POSIX:
  1516. fn = fs_posix_sysctl;
  1517. break;
  1518. default:
  1519. return (EOPNOTSUPP);
  1520. }
  1521. return (*fn)(name + 1, namelen - 1, oldp, oldlenp, newp, newlen, p);
  1522. }
  1523. /*
  1524. * Routines dealing with vnodes and buffers
  1525. */
  1526. /*
  1527. * Wait for all outstanding I/Os to complete
  1528. *
  1529. * Manipulates v_numoutput. Must be called at splbio()
  1530. */
  1531. int
  1532. vwaitforio(struct vnode *vp, int slpflag, char *wmesg, int timeo)
  1533. {
  1534. int error = 0;
  1535. splassert(IPL_BIO);
  1536. while (vp->v_numoutput) {
  1537. vp->v_bioflag |= VBIOWAIT;
  1538. error = tsleep(&vp->v_numoutput,
  1539. slpflag | (PRIBIO + 1), wmesg, timeo);
  1540. if (error)
  1541. break;
  1542. }
  1543. return (error);
  1544. }
  1545. /*
  1546. * Update outstanding I/O count and do wakeup if requested.
  1547. *
  1548. * Manipulates v_numoutput. Must be called at splbio()
  1549. */
  1550. void
  1551. vwakeup(struct vnode *vp)
  1552. {
  1553. splassert(IPL_BIO);
  1554. if (vp != NULL) {
  1555. if (vp->v_numoutput-- == 0)
  1556. panic("vwakeup: neg numoutput");
  1557. if ((vp->v_bioflag & VBIOWAIT) && vp->v_numoutput == 0) {
  1558. vp->v_bioflag &= ~VBIOWAIT;
  1559. wakeup(&vp->v_numoutput);
  1560. }
  1561. }
  1562. }
  1563. /*
  1564. * Flush out and invalidate all buffers associated with a vnode.
  1565. * Called with the underlying object locked.
  1566. */
  1567. int
  1568. vinvalbuf(struct vnode *vp, int flags, struct ucred *cred, struct proc *p,
  1569. int slpflag, int slptimeo)
  1570. {
  1571. struct buf *bp;
  1572. struct buf *nbp, *blist;
  1573. int s, error;
  1574. #ifdef VFSLCKDEBUG
  1575. if ((vp->v_flag & VLOCKSWORK) && !VOP_ISLOCKED(vp))
  1576. panic("vinvalbuf(): vp isn't locked");
  1577. #endif
  1578. if (flags & V_SAVE) {
  1579. s = splbio();
  1580. vwaitforio(vp, 0, "vinvalbuf", 0);
  1581. if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
  1582. splx(s);
  1583. if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0)
  1584. return (error);
  1585. s = splbio();
  1586. if (vp->v_numoutput > 0 ||
  1587. !LIST_EMPTY(&vp->v_dirtyblkhd))
  1588. panic("vinvalbuf: dirty bufs");
  1589. }
  1590. splx(s);
  1591. }
  1592. loop:
  1593. s = splbio();
  1594. for (;;) {
  1595. if ((blist = LIST_FIRST(&vp->v_cleanblkhd)) &&
  1596. (flags & V_SAVEMETA))
  1597. while (blist && blist->b_lblkno < 0)
  1598. blist = LIST_NEXT(blist, b_vnbufs);
  1599. if (blist == NULL &&
  1600. (blist = LIST_FIRST(&vp->v_dirtyblkhd)) &&
  1601. (flags & V_SAVEMETA))
  1602. while (blist && blist->b_lblkno < 0)
  1603. blist = LIST_NEXT(blist, b_vnbufs);
  1604. if (!blist)
  1605. break;
  1606. for (bp = blist; bp; bp = nbp) {
  1607. nbp = LIST_NEXT(bp, b_vnbufs);
  1608. if (flags & V_SAVEMETA && bp->b_lblkno < 0)
  1609. continue;
  1610. if (bp->b_flags & B_BUSY) {
  1611. bp->b_flags |= B_WANTED;
  1612. error = tsleep(bp, slpflag | (PRIBIO + 1),
  1613. "vinvalbuf", slptimeo);
  1614. if (error) {
  1615. splx(s);
  1616. return (error);
  1617. }
  1618. break;
  1619. }
  1620. bremfree(bp);
  1621. /*
  1622. * XXX Since there are no node locks for NFS, I believe
  1623. * there is a slight chance that a delayed write will
  1624. * occur while sleeping just above, so check for it.
  1625. */
  1626. if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
  1627. buf_acquire(bp);
  1628. splx(s);
  1629. (void) VOP_BWRITE(bp);
  1630. goto loop;
  1631. }
  1632. buf_acquire_nomap(bp);
  1633. bp->b_flags |= B_INVAL;
  1634. brelse(bp);
  1635. }
  1636. }
  1637. if (!(flags & V_SAVEMETA) &&
  1638. (!LIST_EMPTY(&vp->v_dirtyblkhd) || !LIST_EMPTY(&vp->v_cleanblkhd)))
  1639. panic("vinvalbuf: flush failed");
  1640. splx(s);
  1641. return (0);
  1642. }
  1643. void
  1644. vflushbuf(struct vnode *vp, int sync)
  1645. {
  1646. struct buf *bp, *nbp;
  1647. int s;
  1648. loop:
  1649. s = splbio();
  1650. for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp != NULL; bp = nbp) {
  1651. nbp = LIST_NEXT(bp, b_vnbufs);
  1652. if ((bp->b_flags & B_BUSY))
  1653. continue;
  1654. if ((bp->b_flags & B_DELWRI) == 0)
  1655. panic("vflushbuf: not dirty");
  1656. bremfree(bp);
  1657. buf_acquire(bp);
  1658. splx(s);
  1659. /*
  1660. * Wait for I/O associated with indirect blocks to complete,
  1661. * since there is no way to quickly wait for them below.
  1662. */
  1663. if (bp->b_vp == vp || sync == 0)
  1664. (void) bawrite(bp);
  1665. else
  1666. (void) bwrite(bp);
  1667. goto loop;
  1668. }
  1669. if (sync == 0) {
  1670. splx(s);
  1671. return;
  1672. }
  1673. vwaitforio(vp, 0, "vflushbuf", 0);
  1674. if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
  1675. splx(s);
  1676. #ifdef DIAGNOSTIC
  1677. vprint("vflushbuf: dirty", vp);
  1678. #endif
  1679. goto loop;
  1680. }
  1681. splx(s);
  1682. }
  1683. /*
  1684. * Associate a buffer with a vnode.
  1685. *
  1686. * Manipulates buffer vnode queues. Must be called at splbio().
  1687. */
  1688. void
  1689. bgetvp(struct vnode *vp, struct buf *bp)
  1690. {
  1691. splassert(IPL_BIO);
  1692. if (bp->b_vp)
  1693. panic("bgetvp: not free");
  1694. vhold(vp);
  1695. bp->b_vp = vp;
  1696. if (vp->v_type == VBLK || vp->v_type == VCHR)
  1697. bp->b_dev = vp->v_rdev;
  1698. else
  1699. bp->b_dev = NODEV;
  1700. /*
  1701. * Insert onto list for new vnode.
  1702. */
  1703. bufinsvn(bp, &vp->v_cleanblkhd);
  1704. }
  1705. /*
  1706. * Disassociate a buffer from a vnode.
  1707. *
  1708. * Manipulates vnode buffer queues. Must be called at splbio().
  1709. */
  1710. void
  1711. brelvp(struct buf *bp)
  1712. {
  1713. struct vnode *vp;
  1714. splassert(IPL_BIO);
  1715. if ((vp = bp->b_vp) == (struct vnode *) 0)
  1716. panic("brelvp: NULL");
  1717. /*
  1718. * Delete from old vnode list, if on one.
  1719. */
  1720. if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
  1721. bufremvn(bp);
  1722. if ((vp->v_bioflag & VBIOONSYNCLIST) &&
  1723. LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
  1724. vp->v_bioflag &= ~VBIOONSYNCLIST;
  1725. LIST_REMOVE(vp, v_synclist);
  1726. }
  1727. bp->b_vp = NULL;
  1728. vdrop(vp);
  1729. }
  1730. /*
  1731. * Replaces the current vnode associated with the buffer, if any,
  1732. * with a new vnode.
  1733. *
  1734. * If an output I/O is pending on the buffer, the old vnode
  1735. * I/O count is adjusted.
  1736. *
  1737. * Ignores vnode buffer queues. Must be called at splbio().
  1738. */
  1739. void
  1740. buf_replacevnode(struct buf *bp, struct vnode *newvp)
  1741. {
  1742. struct vnode *oldvp = bp->b_vp;
  1743. splassert(IPL_BIO);
  1744. if (oldvp)
  1745. brelvp(bp);
  1746. if ((bp->b_flags & (B_READ | B_DONE)) == 0) {
  1747. newvp->v_numoutput++; /* put it on swapdev */
  1748. vwakeup(oldvp);
  1749. }
  1750. bgetvp(newvp, bp);
  1751. bufremvn(bp);
  1752. }
  1753. /*
  1754. * Used to assign buffers to the appropriate clean or dirty list on
  1755. * the vnode and to add newly dirty vnodes to the appropriate
  1756. * filesystem syncer list.
  1757. *
  1758. * Manipulates vnode buffer queues. Must be called at splbio().
  1759. */
  1760. void
  1761. reassignbuf(struct buf *bp)
  1762. {
  1763. struct buflists *listheadp;
  1764. int delay;
  1765. struct vnode *vp = bp->b_vp;
  1766. splassert(IPL_BIO);
  1767. /*
  1768. * Delete from old vnode list, if on one.
  1769. */
  1770. if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
  1771. bufremvn(bp);
  1772. /*
  1773. * If dirty, put on list of dirty buffers;
  1774. * otherwise insert onto list of clean buffers.
  1775. */
  1776. if ((bp->b_flags & B_DELWRI) == 0) {
  1777. listheadp = &vp->v_cleanblkhd;
  1778. if ((vp->v_bioflag & VBIOONSYNCLIST) &&
  1779. LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
  1780. vp->v_bioflag &= ~VBIOONSYNCLIST;
  1781. LIST_REMOVE(vp, v_synclist);
  1782. }
  1783. } else {
  1784. listheadp = &vp->v_dirtyblkhd;
  1785. if ((vp->v_bioflag & VBIOONSYNCLIST) == 0) {
  1786. switch (vp->v_type) {
  1787. case VDIR:
  1788. delay = syncdelay / 2;
  1789. break;
  1790. case VBLK:
  1791. if (vp->v_specmountpoint != NULL) {
  1792. delay = syncdelay / 3;
  1793. break;
  1794. }
  1795. /* FALLTHROUGH */
  1796. default:
  1797. delay = syncdelay;
  1798. }
  1799. vn_syncer_add_to_worklist(vp, delay);
  1800. }
  1801. }
  1802. bufinsvn(bp, listheadp);
  1803. }
  1804. int
  1805. vfs_register(struct vfsconf *vfs)
  1806. {
  1807. struct vfsconf *vfsp;
  1808. struct vfsconf **vfspp;
  1809. #ifdef DIAGNOSTIC
  1810. /* Paranoia? */
  1811. if (vfs->vfc_refcount != 0)
  1812. printf("vfs_register called with vfc_refcount > 0\n");
  1813. #endif
  1814. /* Check if filesystem already known */
  1815. for (vfspp = &vfsconf, vfsp = vfsconf; vfsp;
  1816. vfspp = &vfsp->vfc_next, vfsp = vfsp->vfc_next)
  1817. if (strcmp(vfsp->vfc_name, vfs->vfc_name) == 0)
  1818. return (EEXIST);
  1819. if (vfs->vfc_typenum > maxvfsconf)
  1820. maxvfsconf = vfs->vfc_typenum;
  1821. vfs->vfc_next = NULL;
  1822. /* Add to the end of the list */
  1823. *vfspp = vfs;
  1824. /* Call vfs_init() */
  1825. if (vfs->vfc_vfsops->vfs_init)
  1826. (*(vfs->vfc_vfsops->vfs_init))(vfs);
  1827. return 0;
  1828. }
  1829. int
  1830. vfs_unregister(struct vfsconf *vfs)
  1831. {
  1832. struct vfsconf *vfsp;
  1833. struct vfsconf **vfspp;
  1834. int maxtypenum;
  1835. /* Find our vfsconf struct */
  1836. for (vfspp = &vfsconf, vfsp = vfsconf; vfsp;
  1837. vfspp = &vfsp->vfc_next, vfsp = vfsp->vfc_next) {
  1838. if (strcmp(vfsp->vfc_name, vfs->vfc_name) == 0)
  1839. break;
  1840. }
  1841. if (!vfsp) /* Not found */
  1842. return (ENOENT);
  1843. if (vfsp->vfc_refcount) /* In use */
  1844. return (EBUSY);
  1845. /* Remove from list and free */
  1846. *vfspp = vfsp->vfc_next;
  1847. maxtypenum = 0;
  1848. for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
  1849. if (vfsp->vfc_typenum > maxtypenum)
  1850. maxtypenum = vfsp->vfc_typenum;
  1851. maxvfsconf = maxtypenum;
  1852. return 0;
  1853. }
  1854. /*
  1855. * Check if vnode represents a disk device
  1856. */
  1857. int
  1858. vn_isdisk(struct vnode *vp, int *errp)
  1859. {
  1860. if (vp->v_type != VBLK && vp->v_type != VCHR)
  1861. return (0);
  1862. return (1);
  1863. }
  1864. #ifdef DDB
  1865. #include <machine/db_machdep.h>
  1866. #include <ddb/db_interface.h>
  1867. void
  1868. vfs_buf_print(void *b, int full,
  1869. int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
  1870. {
  1871. struct buf *bp = b;
  1872. (*pr)(" vp %p lblkno 0x%llx blkno 0x%llx dev 0x%x\n"
  1873. " proc %p error %d flags %lb\n",
  1874. bp->b_vp, (int64_t)bp->b_lblkno, (int64_t)bp->b_blkno, bp->b_dev,
  1875. bp->b_proc, bp->b_error, bp->b_flags, B_BITS);
  1876. (*pr)(" bufsize 0x%lx bcount 0x%lx resid 0x%lx\n"
  1877. " data %p saveaddr %p dep %p iodone %p\n",
  1878. bp->b_bufsize, bp->b_bcount, (long)bp->b_resid,
  1879. bp->b_data, bp->b_saveaddr,
  1880. LIST_FIRST(&bp->b_dep), bp->b_iodone);
  1881. (*pr)(" dirty {off 0x%x end 0x%x} valid {off 0x%x end 0x%x}\n",
  1882. bp->b_dirtyoff, bp->b_dirtyend, bp->b_validoff, bp->b_validend);
  1883. #ifdef FFS_SOFTUPDATES
  1884. if (full)
  1885. softdep_print(bp, full, pr);
  1886. #endif
  1887. }
  1888. const char *vtypes[] = { VTYPE_NAMES };
  1889. const char *vtags[] = { VTAG_NAMES };
  1890. void
  1891. vfs_vnode_print(void *v, int full,
  1892. int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
  1893. {
  1894. struct vnode *vp = v;
  1895. (*pr)("tag %s(%d) type %s(%d) mount %p typedata %p\n",
  1896. vp->v_tag > nitems(vtags)? "<unk>":vtags[vp->v_tag], vp->v_tag,
  1897. vp->v_type > nitems(vtypes)? "<unk>":vtypes[vp->v_type],
  1898. vp->v_type, vp->v_mount, vp->v_mountedhere);
  1899. (*pr)("data %p usecount %d writecount %d holdcnt %d numoutput %d\n",
  1900. vp->v_data, vp->v_usecount, vp->v_writecount,
  1901. vp->v_holdcnt, vp->v_numoutput);
  1902. /* uvm_object_printit(&vp->v_uobj, full, pr); */
  1903. if (full) {
  1904. struct buf *bp;
  1905. (*pr)("clean bufs:\n");
  1906. LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) {
  1907. (*pr)(" bp %p\n", bp);
  1908. vfs_buf_print(bp, full, pr);
  1909. }
  1910. (*pr)("dirty bufs:\n");
  1911. LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
  1912. (*pr)(" bp %p\n", bp);
  1913. vfs_buf_print(bp, full, pr);
  1914. }
  1915. }
  1916. }
  1917. void
  1918. vfs_mount_print(struct mount *mp, int full,
  1919. int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
  1920. {
  1921. struct vfsconf *vfc = mp->mnt_vfc;
  1922. struct vnode *vp;
  1923. int cnt = 0;
  1924. (*pr)("flags %b\nvnodecovered %p syncer %p data %p\n",
  1925. mp->mnt_flag, MNT_BITS,
  1926. mp->mnt_vnodecovered, mp->mnt_syncer, mp->mnt_data);
  1927. (*pr)("vfsconf: ops %p name \"%s\" num %d ref %d flags 0x%x\n",
  1928. vfc->vfc_vfsops, vfc->vfc_name, vfc->vfc_typenum,
  1929. vfc->vfc_refcount, vfc->vfc_flags);
  1930. (*pr)("statvfs cache: bsize %x iosize %x\nblocks %llu free %llu avail %lld\n",
  1931. mp->mnt_stat.f_bsize, mp->mnt_stat.f_iosize, mp->mnt_stat.f_blocks,
  1932. mp->mnt_stat.f_bfree, mp->mnt_stat.f_bavail);
  1933. (*pr)(" files %llu ffiles %llu favail %lld\n", mp->mnt_stat.f_files,
  1934. mp->mnt_stat.f_ffree, mp->mnt_stat.f_favail);
  1935. (*pr)(" f_fsidx {0x%x, 0x%x} owner %u ctime 0x%llx\n",
  1936. mp->mnt_stat.f_fsid.val[0], mp->mnt_stat.f_fsid.val[1],
  1937. mp->mnt_stat.f_owner, mp->mnt_stat.f_ctime);
  1938. (*pr)(" syncwrites %llu asyncwrites = %llu\n",
  1939. mp->mnt_stat.f_syncwrites, mp->mnt_stat.f_asyncwrites);
  1940. (*pr)(" syncreads %llu asyncreads = %llu\n",
  1941. mp->mnt_stat.f_syncreads, mp->mnt_stat.f_asyncreads);
  1942. (*pr)(" fstype \"%s\" mnton \"%s\" mntfrom \"%s\" mntspec \"%s\"\n",
  1943. mp->mnt_stat.f_fstypename, mp->mnt_stat.f_mntonname,
  1944. mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntfromspec);
  1945. (*pr)("locked vnodes:");
  1946. /* XXX would take mountlist lock, except ddb has no context */
  1947. LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes)
  1948. if (VOP_ISLOCKED(vp)) {
  1949. if (!LIST_NEXT(vp, v_mntvnodes))
  1950. (*pr)(" %p", vp);
  1951. else if (!(cnt++ % (72 / (sizeof(void *) * 2 + 4))))
  1952. (*pr)("\n\t%p", vp);
  1953. else
  1954. (*pr)(", %p", vp);
  1955. }
  1956. (*pr)("\n");
  1957. if (full) {
  1958. (*pr)("all vnodes:\n\t");
  1959. /* XXX would take mountlist lock, except ddb has no context */
  1960. LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes)
  1961. if (!LIST_NEXT(vp, v_mntvnodes))
  1962. (*pr)(" %p", vp);
  1963. else if (!(cnt++ % (72 / (sizeof(void *) * 2 + 4))))
  1964. (*pr)(" %p,\n\t", vp);
  1965. else
  1966. (*pr)(" %p,", vp);
  1967. (*pr)("\n");
  1968. }
  1969. }
  1970. #endif /* DDB */
  1971. void
  1972. copy_statfs_info(struct statfs *sbp, const struct mount *mp)
  1973. {
  1974. const struct statfs *mbp;
  1975. strncpy(sbp->f_fstypename, mp->mnt_vfc->vfc_name, MFSNAMELEN);
  1976. if (sbp == (mbp = &mp->mnt_stat))
  1977. return;
  1978. sbp->f_fsid = mbp->f_fsid;
  1979. sbp->f_owner = mbp->f_owner;
  1980. sbp->f_flags = mbp->f_flags;
  1981. sbp->f_syncwrites = mbp->f_syncwrites;
  1982. sbp->f_asyncwrites = mbp->f_asyncwrites;
  1983. sbp->f_syncreads = mbp->f_syncreads;
  1984. sbp->f_asyncreads = mbp->f_asyncreads;
  1985. sbp->f_namemax = mbp->f_namemax;
  1986. memcpy(sbp->f_mntonname, mp->mnt_stat.f_mntonname, MNAMELEN);
  1987. memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname, MNAMELEN);
  1988. memcpy(sbp->f_mntfromspec, mp->mnt_stat.f_mntfromspec, MNAMELEN);
  1989. memcpy(&sbp->mount_info.ufs_args, &mp->mnt_stat.mount_info.ufs_args,
  1990. sizeof(struct ufs_args));
  1991. }