vfs_sync.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392
  1. /* $OpenBSD: vfs_sync.c,v 1.54 2015/03/14 03:38:51 jsg Exp $ */
  2. /*
  3. * Portions of this code are:
  4. *
  5. * Copyright (c) 1989, 1993
  6. * The Regents of the University of California. All rights reserved.
  7. * (c) UNIX System Laboratories, Inc.
  8. * All or some portions of this file are derived from material licensed
  9. * to the University of California by American Telephone and Telegraph
  10. * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  11. * the permission of UNIX System Laboratories, Inc.
  12. *
  13. * Redistribution and use in source and binary forms, with or without
  14. * modification, are permitted provided that the following conditions
  15. * are met:
  16. * 1. Redistributions of source code must retain the above copyright
  17. * notice, this list of conditions and the following disclaimer.
  18. * 2. Redistributions in binary form must reproduce the above copyright
  19. * notice, this list of conditions and the following disclaimer in the
  20. * documentation and/or other materials provided with the distribution.
  21. * 3. Neither the name of the University nor the names of its contributors
  22. * may be used to endorse or promote products derived from this software
  23. * without specific prior written permission.
  24. *
  25. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  26. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  27. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  28. * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  29. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  30. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  31. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  32. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  33. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  34. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  35. * SUCH DAMAGE.
  36. */
  37. /*
  38. * Syncer daemon
  39. */
  40. #include <sys/queue.h>
  41. #include <sys/param.h>
  42. #include <sys/systm.h>
  43. #include <sys/proc.h>
  44. #include <sys/mount.h>
  45. #include <sys/vnode.h>
  46. #include <sys/lock.h>
  47. #include <sys/malloc.h>
  48. #include <sys/kernel.h>
  49. #include <sys/sched.h>
  50. #ifdef FFS_SOFTUPDATES
  51. int softdep_process_worklist(struct mount *);
  52. #endif
  53. /*
  54. * The workitem queue.
  55. */
  56. #define SYNCER_MAXDELAY 32 /* maximum sync delay time */
  57. #define SYNCER_DEFAULT 30 /* default sync delay time */
  58. int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */
  59. int syncdelay = SYNCER_DEFAULT; /* time to delay syncing vnodes */
  60. int rushjob = 0; /* number of slots to run ASAP */
  61. int stat_rush_requests = 0; /* number of rush requests */
  62. int syncer_delayno = 0;
  63. long syncer_mask;
  64. LIST_HEAD(synclist, vnode);
  65. static struct synclist *syncer_workitem_pending;
  66. struct proc *syncerproc;
  67. /*
  68. * The workitem queue.
  69. *
  70. * It is useful to delay writes of file data and filesystem metadata
  71. * for tens of seconds so that quickly created and deleted files need
  72. * not waste disk bandwidth being created and removed. To realize this,
  73. * we append vnodes to a "workitem" queue. When running with a soft
  74. * updates implementation, most pending metadata dependencies should
  75. * not wait for more than a few seconds. Thus, mounted block devices
  76. * are delayed only about half the time that file data is delayed.
  77. * Similarly, directory updates are more critical, so are only delayed
  78. * about a third the time that file data is delayed. Thus, there are
  79. * SYNCER_MAXDELAY queues that are processed round-robin at a rate of
  80. * one each second (driven off the filesystem syncer process). The
  81. * syncer_delayno variable indicates the next queue that is to be processed.
  82. * Items that need to be processed soon are placed in this queue:
  83. *
  84. * syncer_workitem_pending[syncer_delayno]
  85. *
  86. * A delay of fifteen seconds is done by placing the request fifteen
  87. * entries later in the queue:
  88. *
  89. * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask]
  90. *
  91. */
  92. void
  93. vn_initialize_syncerd(void)
  94. {
  95. syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, M_WAITOK,
  96. &syncer_mask);
  97. syncer_maxdelay = syncer_mask + 1;
  98. }
  99. /*
  100. * Add an item to the syncer work queue.
  101. */
  102. void
  103. vn_syncer_add_to_worklist(struct vnode *vp, int delay)
  104. {
  105. int s, slot;
  106. if (delay > syncer_maxdelay - 2)
  107. delay = syncer_maxdelay - 2;
  108. slot = (syncer_delayno + delay) & syncer_mask;
  109. s = splbio();
  110. if (vp->v_bioflag & VBIOONSYNCLIST)
  111. LIST_REMOVE(vp, v_synclist);
  112. vp->v_bioflag |= VBIOONSYNCLIST;
  113. LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist);
  114. splx(s);
  115. }
  116. /*
  117. * System filesystem synchronizer daemon.
  118. */
  119. void
  120. sched_sync(struct proc *p)
  121. {
  122. struct synclist *slp;
  123. struct vnode *vp;
  124. time_t starttime;
  125. int s;
  126. syncerproc = curproc;
  127. for (;;) {
  128. starttime = time_second;
  129. /*
  130. * Push files whose dirty time has expired.
  131. */
  132. s = splbio();
  133. slp = &syncer_workitem_pending[syncer_delayno];
  134. syncer_delayno += 1;
  135. if (syncer_delayno == syncer_maxdelay)
  136. syncer_delayno = 0;
  137. while ((vp = LIST_FIRST(slp)) != NULL) {
  138. if (vget(vp, LK_EXCLUSIVE | LK_NOWAIT, p)) {
  139. /*
  140. * If we fail to get the lock, we move this
  141. * vnode one second ahead in time.
  142. * XXX - no good, but the best we can do.
  143. */
  144. vn_syncer_add_to_worklist(vp, 1);
  145. continue;
  146. }
  147. splx(s);
  148. (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p);
  149. vput(vp);
  150. s = splbio();
  151. if (LIST_FIRST(slp) == vp) {
  152. /*
  153. * Note: disk vps can remain on the
  154. * worklist too with no dirty blocks, but
  155. * since sync_fsync() moves it to a different
  156. * slot we are safe.
  157. */
  158. #ifdef DIAGNOSTIC
  159. if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL &&
  160. vp->v_type != VBLK) {
  161. vprint("fsync failed", vp);
  162. if (vp->v_mount != NULL)
  163. printf("mounted on: %s\n",
  164. vp->v_mount->mnt_stat.f_mntonname);
  165. panic("sched_sync: fsync failed");
  166. }
  167. #endif /* DIAGNOSTIC */
  168. /*
  169. * Put us back on the worklist. The worklist
  170. * routine will remove us from our current
  171. * position and then add us back in at a later
  172. * position.
  173. */
  174. vn_syncer_add_to_worklist(vp, syncdelay);
  175. }
  176. sched_pause();
  177. }
  178. splx(s);
  179. #ifdef FFS_SOFTUPDATES
  180. /*
  181. * Do soft update processing.
  182. */
  183. softdep_process_worklist(NULL);
  184. #endif
  185. /*
  186. * The variable rushjob allows the kernel to speed up the
  187. * processing of the filesystem syncer process. A rushjob
  188. * value of N tells the filesystem syncer to process the next
  189. * N seconds worth of work on its queue ASAP. Currently rushjob
  190. * is used by the soft update code to speed up the filesystem
  191. * syncer process when the incore state is getting so far
  192. * ahead of the disk that the kernel memory pool is being
  193. * threatened with exhaustion.
  194. */
  195. if (rushjob > 0) {
  196. rushjob -= 1;
  197. continue;
  198. }
  199. /*
  200. * If it has taken us less than a second to process the
  201. * current work, then wait. Otherwise start right over
  202. * again. We can still lose time if any single round
  203. * takes more than two seconds, but it does not really
  204. * matter as we are just trying to generally pace the
  205. * filesystem activity.
  206. */
  207. if (time_second == starttime)
  208. tsleep(&lbolt, PPAUSE, "syncer", 0);
  209. }
  210. }
  211. /*
  212. * Request the syncer daemon to speed up its work.
  213. * We never push it to speed up more than half of its
  214. * normal turn time, otherwise it could take over the cpu.
  215. */
  216. int
  217. speedup_syncer(void)
  218. {
  219. int s;
  220. SCHED_LOCK(s);
  221. if (syncerproc && syncerproc->p_wchan == &lbolt)
  222. setrunnable(syncerproc);
  223. SCHED_UNLOCK(s);
  224. if (rushjob < syncdelay / 2) {
  225. rushjob += 1;
  226. stat_rush_requests += 1;
  227. return 1;
  228. }
  229. return 0;
  230. }
  231. /* Routine to create and manage a filesystem syncer vnode. */
  232. int sync_fsync(void *);
  233. int sync_inactive(void *);
  234. int sync_print(void *);
  235. struct vops sync_vops = {
  236. .vop_close = nullop,
  237. .vop_fsync = sync_fsync,
  238. .vop_inactive = sync_inactive,
  239. .vop_reclaim = nullop,
  240. .vop_lock = vop_generic_lock,
  241. .vop_unlock = vop_generic_unlock,
  242. .vop_islocked = vop_generic_islocked,
  243. .vop_print = sync_print
  244. };
  245. /*
  246. * Create a new filesystem syncer vnode for the specified mount point.
  247. */
  248. int
  249. vfs_allocate_syncvnode(struct mount *mp)
  250. {
  251. struct vnode *vp;
  252. static long start, incr, next;
  253. int error;
  254. /* Allocate a new vnode */
  255. if ((error = getnewvnode(VT_VFS, mp, &sync_vops, &vp)) != 0) {
  256. mp->mnt_syncer = NULL;
  257. return (error);
  258. }
  259. vp->v_writecount = 1;
  260. vp->v_type = VNON;
  261. /*
  262. * Place the vnode onto the syncer worklist. We attempt to
  263. * scatter them about on the list so that they will go off
  264. * at evenly distributed times even if all the filesystems
  265. * are mounted at once.
  266. */
  267. next += incr;
  268. if (next == 0 || next > syncer_maxdelay) {
  269. start /= 2;
  270. incr /= 2;
  271. if (start == 0) {
  272. start = syncer_maxdelay / 2;
  273. incr = syncer_maxdelay;
  274. }
  275. next = start;
  276. }
  277. vn_syncer_add_to_worklist(vp, next);
  278. mp->mnt_syncer = vp;
  279. return (0);
  280. }
  281. /*
  282. * Do a lazy sync of the filesystem.
  283. */
  284. int
  285. sync_fsync(void *v)
  286. {
  287. struct vop_fsync_args *ap = v;
  288. struct vnode *syncvp = ap->a_vp;
  289. struct mount *mp = syncvp->v_mount;
  290. int asyncflag;
  291. /*
  292. * We only need to do something if this is a lazy evaluation.
  293. */
  294. if (ap->a_waitfor != MNT_LAZY)
  295. return (0);
  296. /*
  297. * Move ourselves to the back of the sync list.
  298. */
  299. vn_syncer_add_to_worklist(syncvp, syncdelay);
  300. /*
  301. * Walk the list of vnodes pushing all that are dirty and
  302. * not already on the sync list.
  303. */
  304. if (vfs_busy(mp, VB_READ|VB_NOWAIT) == 0) {
  305. asyncflag = mp->mnt_flag & MNT_ASYNC;
  306. mp->mnt_flag &= ~MNT_ASYNC;
  307. VFS_SYNC(mp, MNT_LAZY, ap->a_cred, ap->a_p);
  308. if (asyncflag)
  309. mp->mnt_flag |= MNT_ASYNC;
  310. vfs_unbusy(mp);
  311. }
  312. return (0);
  313. }
  314. /*
  315. * The syncer vnode is no longer needed and is being decommissioned.
  316. */
  317. int
  318. sync_inactive(void *v)
  319. {
  320. struct vop_inactive_args *ap = v;
  321. struct vnode *vp = ap->a_vp;
  322. int s;
  323. if (vp->v_usecount == 0) {
  324. VOP_UNLOCK(vp, 0, ap->a_p);
  325. return (0);
  326. }
  327. vp->v_mount->mnt_syncer = NULL;
  328. s = splbio();
  329. LIST_REMOVE(vp, v_synclist);
  330. vp->v_bioflag &= ~VBIOONSYNCLIST;
  331. splx(s);
  332. vp->v_writecount = 0;
  333. vput(vp);
  334. return (0);
  335. }
  336. /*
  337. * Print out a syncer vnode.
  338. */
  339. int
  340. sync_print(void *v)
  341. {
  342. printf("syncer vnode\n");
  343. return (0);
  344. }