vfs_cache.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475
  1. /* $OpenBSD: vfs_cache.c,v 1.47 2015/03/14 03:38:51 jsg Exp $ */
  2. /* $NetBSD: vfs_cache.c,v 1.13 1996/02/04 02:18:09 christos Exp $ */
  3. /*
  4. * Copyright (c) 1989, 1993
  5. * The Regents of the University of California. All rights reserved.
  6. *
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following conditions
  9. * are met:
  10. * 1. Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * 2. Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in the
  14. * documentation and/or other materials provided with the distribution.
  15. * 3. Neither the name of the University nor the names of its contributors
  16. * may be used to endorse or promote products derived from this software
  17. * without specific prior written permission.
  18. *
  19. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  20. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  22. * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  23. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  24. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  25. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  26. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  27. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  28. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  29. * SUCH DAMAGE.
  30. *
  31. * @(#)vfs_cache.c 8.3 (Berkeley) 8/22/94
  32. */
  33. #include <sys/param.h>
  34. #include <sys/systm.h>
  35. #include <sys/time.h>
  36. #include <sys/mount.h>
  37. #include <sys/vnode.h>
  38. #include <sys/lock.h>
  39. #include <sys/namei.h>
  40. #include <sys/errno.h>
  41. #include <sys/pool.h>
  42. /*
  43. * TODO: namecache access should really be locked.
  44. */
  45. /*
  46. * For simplicity (and economy of storage), names longer than
  47. * a maximum length of NAMECACHE_MAXLEN are not cached; they occur
  48. * infrequently in any case, and are almost never of interest.
  49. *
  50. * Upon reaching the last segment of a path, if the reference
  51. * is for DELETE, or NOCACHE is set (rewrite), and the
  52. * name is located in the cache, it will be dropped.
  53. */
  54. /*
  55. * Structures associated with name caching.
  56. */
  57. long numcache; /* total number of cache entries allocated */
  58. long numneg; /* number of negative cache entries */
  59. TAILQ_HEAD(, namecache) nclruhead; /* Regular Entry LRU chain */
  60. TAILQ_HEAD(, namecache) nclruneghead; /* Negative Entry LRU chain */
  61. struct nchstats nchstats; /* cache effectiveness statistics */
  62. int doingcache = 1; /* 1 => enable the cache */
  63. struct pool nch_pool;
  64. void cache_zap(struct namecache *);
  65. u_long nextvnodeid;
  66. static int
  67. namecache_compare(struct namecache *n1, struct namecache *n2)
  68. {
  69. if (n1->nc_nlen == n2->nc_nlen)
  70. return (memcmp(n1->nc_name, n2->nc_name, n1->nc_nlen));
  71. else
  72. return (n1->nc_nlen - n2->nc_nlen);
  73. }
  74. RB_GENERATE(namecache_rb_cache, namecache, n_rbcache, namecache_compare);
  75. /*
  76. * blow away a namecache entry
  77. */
  78. void
  79. cache_zap(struct namecache *ncp)
  80. {
  81. struct vnode *dvp = NULL;
  82. if (ncp->nc_vp != NULL) {
  83. TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
  84. numcache--;
  85. } else {
  86. TAILQ_REMOVE(&nclruneghead, ncp, nc_neg);
  87. numneg--;
  88. }
  89. if (ncp->nc_dvp) {
  90. RB_REMOVE(namecache_rb_cache, &ncp->nc_dvp->v_nc_tree, ncp);
  91. if (RB_EMPTY(&ncp->nc_dvp->v_nc_tree))
  92. dvp = ncp->nc_dvp;
  93. }
  94. if (ncp->nc_vp && (ncp->nc_vpid == ncp->nc_vp->v_id)) {
  95. if (ncp->nc_vp != ncp->nc_dvp &&
  96. ncp->nc_vp->v_type == VDIR &&
  97. (ncp->nc_nlen > 2 ||
  98. (ncp->nc_nlen > 1 &&
  99. ncp->nc_name[1] != '.') ||
  100. (ncp->nc_nlen > 0 &&
  101. ncp->nc_name[0] != '.'))) {
  102. TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_me);
  103. }
  104. }
  105. pool_put(&nch_pool, ncp);
  106. if (dvp)
  107. vdrop(dvp);
  108. }
  109. /*
  110. * Look for a name in the cache. We don't do this if the segment name is
  111. * long, simply so the cache can avoid holding long names (which would
  112. * either waste space, or add greatly to the complexity).
  113. * dvp points to the directory to search. The componentname cnp holds
  114. * the information on the entry being sought, such as its length
  115. * and its name. If the lookup succeeds, vpp is set to point to the vnode
  116. * and an error of 0 is returned. If the lookup determines the name does
  117. * not exist (negative caching) an error of ENOENT is returned. If the
  118. * lookup fails, an error of -1 is returned.
  119. */
  120. int
  121. cache_lookup(struct vnode *dvp, struct vnode **vpp,
  122. struct componentname *cnp)
  123. {
  124. struct namecache *ncp;
  125. struct namecache n;
  126. struct vnode *vp;
  127. struct proc *p = curproc;
  128. u_long vpid;
  129. int error;
  130. *vpp = NULL;
  131. if (!doingcache) {
  132. cnp->cn_flags &= ~MAKEENTRY;
  133. return (-1);
  134. }
  135. if (cnp->cn_namelen > NAMECACHE_MAXLEN) {
  136. nchstats.ncs_long++;
  137. cnp->cn_flags &= ~MAKEENTRY;
  138. return (-1);
  139. }
  140. /* lookup in directory vnode's redblack tree */
  141. n.nc_nlen = cnp->cn_namelen;
  142. memcpy(n.nc_name, cnp->cn_nameptr, n.nc_nlen);
  143. ncp = RB_FIND(namecache_rb_cache, &dvp->v_nc_tree, &n);
  144. if (ncp == NULL) {
  145. nchstats.ncs_miss++;
  146. return (-1);
  147. }
  148. if ((cnp->cn_flags & MAKEENTRY) == 0) {
  149. nchstats.ncs_badhits++;
  150. goto remove;
  151. } else if (ncp->nc_vp == NULL) {
  152. if (cnp->cn_nameiop != CREATE ||
  153. (cnp->cn_flags & ISLASTCN) == 0) {
  154. nchstats.ncs_neghits++;
  155. /*
  156. * Move this slot to end of the negative LRU chain,
  157. */
  158. if (TAILQ_NEXT(ncp, nc_neg) != NULL) {
  159. TAILQ_REMOVE(&nclruneghead, ncp, nc_neg);
  160. TAILQ_INSERT_TAIL(&nclruneghead, ncp,
  161. nc_neg);
  162. }
  163. return (ENOENT);
  164. } else {
  165. nchstats.ncs_badhits++;
  166. goto remove;
  167. }
  168. } else if (ncp->nc_vpid != ncp->nc_vp->v_id) {
  169. nchstats.ncs_falsehits++;
  170. goto remove;
  171. }
  172. /*
  173. * Move this slot to end of the regular LRU chain.
  174. */
  175. if (TAILQ_NEXT(ncp, nc_lru) != NULL) {
  176. TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
  177. TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
  178. }
  179. vp = ncp->nc_vp;
  180. vpid = vp->v_id;
  181. if (vp == dvp) { /* lookup on "." */
  182. vref(dvp);
  183. error = 0;
  184. } else if (cnp->cn_flags & ISDOTDOT) {
  185. VOP_UNLOCK(dvp, 0, p);
  186. cnp->cn_flags |= PDIRUNLOCK;
  187. error = vget(vp, LK_EXCLUSIVE, p);
  188. /*
  189. * If the above vget() succeeded and both LOCKPARENT and
  190. * ISLASTCN is set, lock the directory vnode as well.
  191. */
  192. if (!error && (~cnp->cn_flags & (LOCKPARENT|ISLASTCN)) == 0) {
  193. if ((error = vn_lock(dvp, LK_EXCLUSIVE, p)) != 0) {
  194. vput(vp);
  195. return (error);
  196. }
  197. cnp->cn_flags &= ~PDIRUNLOCK;
  198. }
  199. } else {
  200. error = vget(vp, LK_EXCLUSIVE, p);
  201. /*
  202. * If the above vget() failed or either of LOCKPARENT or
  203. * ISLASTCN is set, unlock the directory vnode.
  204. */
  205. if (error || (~cnp->cn_flags & (LOCKPARENT|ISLASTCN)) != 0) {
  206. VOP_UNLOCK(dvp, 0, p);
  207. cnp->cn_flags |= PDIRUNLOCK;
  208. }
  209. }
  210. /*
  211. * Check that the lock succeeded, and that the capability number did
  212. * not change while we were waiting for the lock.
  213. */
  214. if (error || vpid != vp->v_id) {
  215. if (!error) {
  216. vput(vp);
  217. nchstats.ncs_falsehits++;
  218. } else
  219. nchstats.ncs_badhits++;
  220. /*
  221. * The parent needs to be locked when we return to VOP_LOOKUP().
  222. * The `.' case here should be extremely rare (if it can happen
  223. * at all), so we don't bother optimizing out the unlock/relock.
  224. */
  225. if (vp == dvp || error ||
  226. (~cnp->cn_flags & (LOCKPARENT|ISLASTCN)) != 0) {
  227. if ((error = vn_lock(dvp, LK_EXCLUSIVE, p)) != 0)
  228. return (error);
  229. cnp->cn_flags &= ~PDIRUNLOCK;
  230. }
  231. return (-1);
  232. }
  233. nchstats.ncs_goodhits++;
  234. *vpp = vp;
  235. return (0);
  236. remove:
  237. /*
  238. * Last component and we are renaming or deleting,
  239. * the cache entry is invalid, or otherwise don't
  240. * want cache entry to exist.
  241. */
  242. cache_zap(ncp);
  243. return (-1);
  244. }
  245. /*
  246. * Scan cache looking for name of directory entry pointing at vp.
  247. *
  248. * Fill in dvpp.
  249. *
  250. * If bufp is non-NULL, also place the name in the buffer which starts
  251. * at bufp, immediately before *bpp, and move bpp backwards to point
  252. * at the start of it. (Yes, this is a little baroque, but it's done
  253. * this way to cater to the whims of getcwd).
  254. *
  255. * Returns 0 on success, -1 on cache miss, positive errno on failure.
  256. *
  257. * TODO: should we return *dvpp locked?
  258. */
  259. int
  260. cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp)
  261. {
  262. struct namecache *ncp;
  263. struct vnode *dvp = NULL;
  264. char *bp;
  265. if (!doingcache)
  266. goto out;
  267. TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_me) {
  268. dvp = ncp->nc_dvp;
  269. if (dvp && dvp != vp && ncp->nc_dvpid == dvp->v_id)
  270. goto found;
  271. }
  272. goto miss;
  273. found:
  274. #ifdef DIAGNOSTIC
  275. if (ncp->nc_nlen == 1 &&
  276. ncp->nc_name[0] == '.')
  277. panic("cache_revlookup: found entry for .");
  278. if (ncp->nc_nlen == 2 &&
  279. ncp->nc_name[0] == '.' &&
  280. ncp->nc_name[1] == '.')
  281. panic("cache_revlookup: found entry for ..");
  282. #endif
  283. nchstats.ncs_revhits++;
  284. if (bufp != NULL) {
  285. bp = *bpp;
  286. bp -= ncp->nc_nlen;
  287. if (bp <= bufp) {
  288. *dvpp = NULL;
  289. return (ERANGE);
  290. }
  291. memcpy(bp, ncp->nc_name, ncp->nc_nlen);
  292. *bpp = bp;
  293. }
  294. *dvpp = dvp;
  295. /*
  296. * XXX: Should we vget() here to have more
  297. * consistent semantics with cache_lookup()?
  298. */
  299. return (0);
  300. miss:
  301. nchstats.ncs_revmiss++;
  302. out:
  303. *dvpp = NULL;
  304. return (-1);
  305. }
  306. /*
  307. * Add an entry to the cache
  308. */
  309. void
  310. cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
  311. {
  312. struct namecache *ncp, *lncp;
  313. if (!doingcache || cnp->cn_namelen > NAMECACHE_MAXLEN)
  314. return;
  315. /*
  316. * allocate, or recycle (free and allocate) an ncp.
  317. */
  318. if (numcache >= initialvnodes) {
  319. if ((ncp = TAILQ_FIRST(&nclruhead)) != NULL)
  320. cache_zap(ncp);
  321. else if ((ncp = TAILQ_FIRST(&nclruneghead)) != NULL)
  322. cache_zap(ncp);
  323. else
  324. panic("wtf? leak?");
  325. }
  326. ncp = pool_get(&nch_pool, PR_WAITOK|PR_ZERO);
  327. /* grab the vnode we just found */
  328. ncp->nc_vp = vp;
  329. if (vp)
  330. ncp->nc_vpid = vp->v_id;
  331. /* fill in cache info */
  332. ncp->nc_dvp = dvp;
  333. ncp->nc_dvpid = dvp->v_id;
  334. ncp->nc_nlen = cnp->cn_namelen;
  335. memcpy(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen);
  336. if (RB_EMPTY(&dvp->v_nc_tree)) {
  337. vhold(dvp);
  338. }
  339. if ((lncp = RB_INSERT(namecache_rb_cache, &dvp->v_nc_tree, ncp))
  340. != NULL) {
  341. /* someone has raced us and added a different entry
  342. * for the same vnode (different ncp) - we don't need
  343. * this entry, so free it and we are done.
  344. */
  345. pool_put(&nch_pool, ncp);
  346. /* we know now dvp->v_nc_tree is not empty, no need
  347. * to vdrop here
  348. */
  349. goto done;
  350. }
  351. if (vp) {
  352. TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
  353. numcache++;
  354. /* don't put . or .. in the reverse map */
  355. if (vp != dvp && vp->v_type == VDIR &&
  356. (ncp->nc_nlen > 2 ||
  357. (ncp->nc_nlen > 1 &&
  358. ncp->nc_name[1] != '.') ||
  359. (ncp->nc_nlen > 0 &&
  360. ncp->nc_name[0] != '.')))
  361. TAILQ_INSERT_TAIL(&vp->v_cache_dst, ncp,
  362. nc_me);
  363. } else {
  364. TAILQ_INSERT_TAIL(&nclruneghead, ncp, nc_neg);
  365. numneg++;
  366. }
  367. if (numneg > initialvnodes) {
  368. if ((ncp = TAILQ_FIRST(&nclruneghead))
  369. != NULL)
  370. cache_zap(ncp);
  371. }
  372. done:
  373. return;
  374. }
  375. /*
  376. * Name cache initialization, from vfs_init() when we are booting
  377. */
  378. void
  379. nchinit()
  380. {
  381. TAILQ_INIT(&nclruhead);
  382. TAILQ_INIT(&nclruneghead);
  383. pool_init(&nch_pool, sizeof(struct namecache), 0, 0, PR_WAITOK,
  384. "nchpl", NULL);
  385. }
  386. /*
  387. * Cache flush, a particular vnode; called when a vnode is renamed to
  388. * hide entries that would now be invalid
  389. */
  390. void
  391. cache_purge(struct vnode *vp)
  392. {
  393. struct namecache *ncp;
  394. /* We should never have destinations cached for a non-VDIR vnode. */
  395. KASSERT(vp->v_type == VDIR || TAILQ_EMPTY(&vp->v_cache_dst));
  396. while ((ncp = TAILQ_FIRST(&vp->v_cache_dst)))
  397. cache_zap(ncp);
  398. while ((ncp = RB_ROOT(&vp->v_nc_tree)))
  399. cache_zap(ncp);
  400. /* XXX this blows goats */
  401. vp->v_id = ++nextvnodeid;
  402. if (vp->v_id == 0)
  403. vp->v_id = ++nextvnodeid;
  404. }
  405. /*
  406. * Cache flush, a whole filesystem; called when filesys is umounted to
  407. * remove entries that would now be invalid
  408. */
  409. void
  410. cache_purgevfs(struct mount *mp)
  411. {
  412. struct namecache *ncp, *nxtcp;
  413. /* whack the regular entries */
  414. for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) {
  415. nxtcp = TAILQ_NEXT(ncp, nc_lru);
  416. if (ncp->nc_dvp == NULL || ncp->nc_dvp->v_mount != mp) {
  417. continue;
  418. }
  419. /* free the resources we had */
  420. cache_zap(ncp);
  421. }
  422. /* whack the negative entries */
  423. for (ncp = TAILQ_FIRST(&nclruneghead); ncp != NULL; ncp = nxtcp) {
  424. nxtcp = TAILQ_NEXT(ncp, nc_neg);
  425. if (ncp->nc_dvp == NULL || ncp->nc_dvp->v_mount != mp) {
  426. continue;
  427. }
  428. /* free the resources we had */
  429. cache_zap(ncp);
  430. }
  431. }