mbcache.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859
  1. /*
  2. * linux/fs/mbcache.c
  3. * (C) 2001-2002 Andreas Gruenbacher, <a.gruenbacher@computer.org>
  4. */
  5. /*
  6. * Filesystem Meta Information Block Cache (mbcache)
  7. *
  8. * The mbcache caches blocks of block devices that need to be located
  9. * by their device/block number, as well as by other criteria (such
  10. * as the block's contents).
  11. *
  12. * There can only be one cache entry in a cache per device and block number.
  13. * Additional indexes need not be unique in this sense. The number of
  14. * additional indexes (=other criteria) can be hardwired at compile time
  15. * or specified at cache create time.
  16. *
  17. * Each cache entry is of fixed size. An entry may be `valid' or `invalid'
  18. * in the cache. A valid entry is in the main hash tables of the cache,
  19. * and may also be in the lru list. An invalid entry is not in any hashes
  20. * or lists.
  21. *
  22. * A valid cache entry is only in the lru list if no handles refer to it.
  23. * Invalid cache entries will be freed when the last handle to the cache
  24. * entry is released. Entries that cannot be freed immediately are put
  25. * back on the lru list.
  26. */
  27. /*
  28. * Lock descriptions and usage:
  29. *
  30. * Each hash chain of both the block and index hash tables now contains
  31. * a built-in lock used to serialize accesses to the hash chain.
  32. *
  33. * Accesses to global data structures mb_cache_list and mb_cache_lru_list
  34. * are serialized via the global spinlock mb_cache_spinlock.
  35. *
  36. * Each mb_cache_entry contains a spinlock, e_entry_lock, to serialize
  37. * accesses to its local data, such as e_used and e_queued.
  38. *
  39. * Lock ordering:
  40. *
  41. * Each block hash chain's lock has the highest lock order, followed by an
  42. * index hash chain's lock, mb_cache_bg_lock (used to implement mb_cache_entry's
  43. * lock), and mb_cach_spinlock, with the lowest order. While holding
  44. * either a block or index hash chain lock, a thread can acquire an
  45. * mc_cache_bg_lock, which in turn can also acquire mb_cache_spinlock.
  46. *
  47. * Synchronization:
  48. *
  49. * Since both mb_cache_entry_get and mb_cache_entry_find scan the block and
  50. * index hash chian, it needs to lock the corresponding hash chain. For each
  51. * mb_cache_entry within the chain, it needs to lock the mb_cache_entry to
  52. * prevent either any simultaneous release or free on the entry and also
  53. * to serialize accesses to either the e_used or e_queued member of the entry.
  54. *
  55. * To avoid having a dangling reference to an already freed
  56. * mb_cache_entry, an mb_cache_entry is only freed when it is not on a
  57. * block hash chain and also no longer being referenced, both e_used,
  58. * and e_queued are 0's. When an mb_cache_entry is explicitly freed it is
  59. * first removed from a block hash chain.
  60. */
  61. #include <linux/kernel.h>
  62. #include <linux/module.h>
  63. #include <linux/hash.h>
  64. #include <linux/fs.h>
  65. #include <linux/mm.h>
  66. #include <linux/slab.h>
  67. #include <linux/sched.h>
  68. #include <linux/list_bl.h>
  69. #include <linux/mbcache.h>
  70. #include <linux/init.h>
  71. #include <linux/blockgroup_lock.h>
  72. #include <linux/log2.h>
  73. #ifdef MB_CACHE_DEBUG
  74. # define mb_debug(f...) do { \
  75. printk(KERN_DEBUG f); \
  76. printk("\n"); \
  77. } while (0)
  78. #define mb_assert(c) do { if (!(c)) \
  79. printk(KERN_ERR "assertion " #c " failed\n"); \
  80. } while(0)
  81. #else
  82. # define mb_debug(f...) do { } while(0)
  83. # define mb_assert(c) do { } while(0)
  84. #endif
  85. #define mb_error(f...) do { \
  86. printk(KERN_ERR f); \
  87. printk("\n"); \
  88. } while(0)
  89. #define MB_CACHE_WRITER ((unsigned short)~0U >> 1)
  90. #define MB_CACHE_ENTRY_LOCK_BITS ilog2(NR_BG_LOCKS)
  91. #define MB_CACHE_ENTRY_LOCK_INDEX(ce) \
  92. (hash_long((unsigned long)ce, MB_CACHE_ENTRY_LOCK_BITS))
  93. static DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue);
  94. static struct blockgroup_lock *mb_cache_bg_lock;
  95. static struct kmem_cache *mb_cache_kmem_cache;
  96. MODULE_AUTHOR("Andreas Gruenbacher <a.gruenbacher@computer.org>");
  97. MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
  98. MODULE_LICENSE("GPL");
  99. EXPORT_SYMBOL(mb_cache_create);
  100. EXPORT_SYMBOL(mb_cache_shrink);
  101. EXPORT_SYMBOL(mb_cache_destroy);
  102. EXPORT_SYMBOL(mb_cache_entry_alloc);
  103. EXPORT_SYMBOL(mb_cache_entry_insert);
  104. EXPORT_SYMBOL(mb_cache_entry_release);
  105. EXPORT_SYMBOL(mb_cache_entry_free);
  106. EXPORT_SYMBOL(mb_cache_entry_get);
  107. #if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0)
  108. EXPORT_SYMBOL(mb_cache_entry_find_first);
  109. EXPORT_SYMBOL(mb_cache_entry_find_next);
  110. #endif
  111. /*
  112. * Global data: list of all mbcache's, lru list, and a spinlock for
  113. * accessing cache data structures on SMP machines. The lru list is
  114. * global across all mbcaches.
  115. */
  116. static LIST_HEAD(mb_cache_list);
  117. static LIST_HEAD(mb_cache_lru_list);
  118. static DEFINE_SPINLOCK(mb_cache_spinlock);
  119. static inline void
  120. __spin_lock_mb_cache_entry(struct mb_cache_entry *ce)
  121. {
  122. spin_lock(bgl_lock_ptr(mb_cache_bg_lock,
  123. MB_CACHE_ENTRY_LOCK_INDEX(ce)));
  124. }
  125. static inline void
  126. __spin_unlock_mb_cache_entry(struct mb_cache_entry *ce)
  127. {
  128. spin_unlock(bgl_lock_ptr(mb_cache_bg_lock,
  129. MB_CACHE_ENTRY_LOCK_INDEX(ce)));
  130. }
  131. static inline int
  132. __mb_cache_entry_is_block_hashed(struct mb_cache_entry *ce)
  133. {
  134. return !hlist_bl_unhashed(&ce->e_block_list);
  135. }
  136. static inline void
  137. __mb_cache_entry_unhash_block(struct mb_cache_entry *ce)
  138. {
  139. if (__mb_cache_entry_is_block_hashed(ce))
  140. hlist_bl_del_init(&ce->e_block_list);
  141. }
  142. static inline int
  143. __mb_cache_entry_is_index_hashed(struct mb_cache_entry *ce)
  144. {
  145. return !hlist_bl_unhashed(&ce->e_index.o_list);
  146. }
  147. static inline void
  148. __mb_cache_entry_unhash_index(struct mb_cache_entry *ce)
  149. {
  150. if (__mb_cache_entry_is_index_hashed(ce))
  151. hlist_bl_del_init(&ce->e_index.o_list);
  152. }
  153. /*
  154. * __mb_cache_entry_unhash_unlock()
  155. *
  156. * This function is called to unhash both the block and index hash
  157. * chain.
  158. * It assumes both the block and index hash chain is locked upon entry.
  159. * It also unlock both hash chains both exit
  160. */
  161. static inline void
  162. __mb_cache_entry_unhash_unlock(struct mb_cache_entry *ce)
  163. {
  164. __mb_cache_entry_unhash_index(ce);
  165. hlist_bl_unlock(ce->e_index_hash_p);
  166. __mb_cache_entry_unhash_block(ce);
  167. hlist_bl_unlock(ce->e_block_hash_p);
  168. }
  169. static void
  170. __mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask)
  171. {
  172. struct mb_cache *cache = ce->e_cache;
  173. mb_assert(!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt)));
  174. kmem_cache_free(cache->c_entry_cache, ce);
  175. atomic_dec(&cache->c_entry_count);
  176. }
  177. static void
  178. __mb_cache_entry_release(struct mb_cache_entry *ce)
  179. {
  180. /* First lock the entry to serialize access to its local data. */
  181. __spin_lock_mb_cache_entry(ce);
  182. /* Wake up all processes queuing for this cache entry. */
  183. if (ce->e_queued)
  184. wake_up_all(&mb_cache_queue);
  185. if (ce->e_used >= MB_CACHE_WRITER)
  186. ce->e_used -= MB_CACHE_WRITER;
  187. /*
  188. * Make sure that all cache entries on lru_list have
  189. * both e_used and e_qued of 0s.
  190. */
  191. ce->e_used--;
  192. if (!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))) {
  193. if (!__mb_cache_entry_is_block_hashed(ce)) {
  194. __spin_unlock_mb_cache_entry(ce);
  195. goto forget;
  196. }
  197. /*
  198. * Need access to lru list, first drop entry lock,
  199. * then reacquire the lock in the proper order.
  200. */
  201. spin_lock(&mb_cache_spinlock);
  202. if (list_empty(&ce->e_lru_list))
  203. list_add_tail(&ce->e_lru_list, &mb_cache_lru_list);
  204. spin_unlock(&mb_cache_spinlock);
  205. }
  206. __spin_unlock_mb_cache_entry(ce);
  207. return;
  208. forget:
  209. mb_assert(list_empty(&ce->e_lru_list));
  210. __mb_cache_entry_forget(ce, GFP_KERNEL);
  211. }
  212. /*
  213. * mb_cache_shrink_scan() memory pressure callback
  214. *
  215. * This function is called by the kernel memory management when memory
  216. * gets low.
  217. *
  218. * @shrink: (ignored)
  219. * @sc: shrink_control passed from reclaim
  220. *
  221. * Returns the number of objects freed.
  222. */
  223. static unsigned long
  224. mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
  225. {
  226. LIST_HEAD(free_list);
  227. struct mb_cache_entry *entry, *tmp;
  228. int nr_to_scan = sc->nr_to_scan;
  229. gfp_t gfp_mask = sc->gfp_mask;
  230. unsigned long freed = 0;
  231. mb_debug("trying to free %d entries", nr_to_scan);
  232. spin_lock(&mb_cache_spinlock);
  233. while ((nr_to_scan-- > 0) && !list_empty(&mb_cache_lru_list)) {
  234. struct mb_cache_entry *ce =
  235. list_entry(mb_cache_lru_list.next,
  236. struct mb_cache_entry, e_lru_list);
  237. list_del_init(&ce->e_lru_list);
  238. if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))
  239. continue;
  240. spin_unlock(&mb_cache_spinlock);
  241. /* Prevent any find or get operation on the entry */
  242. hlist_bl_lock(ce->e_block_hash_p);
  243. hlist_bl_lock(ce->e_index_hash_p);
  244. /* Ignore if it is touched by a find/get */
  245. if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt) ||
  246. !list_empty(&ce->e_lru_list)) {
  247. hlist_bl_unlock(ce->e_index_hash_p);
  248. hlist_bl_unlock(ce->e_block_hash_p);
  249. spin_lock(&mb_cache_spinlock);
  250. continue;
  251. }
  252. __mb_cache_entry_unhash_unlock(ce);
  253. list_add_tail(&ce->e_lru_list, &free_list);
  254. spin_lock(&mb_cache_spinlock);
  255. }
  256. spin_unlock(&mb_cache_spinlock);
  257. list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) {
  258. __mb_cache_entry_forget(entry, gfp_mask);
  259. freed++;
  260. }
  261. return freed;
  262. }
  263. static unsigned long
  264. mb_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
  265. {
  266. struct mb_cache *cache;
  267. unsigned long count = 0;
  268. spin_lock(&mb_cache_spinlock);
  269. list_for_each_entry(cache, &mb_cache_list, c_cache_list) {
  270. mb_debug("cache %s (%d)", cache->c_name,
  271. atomic_read(&cache->c_entry_count));
  272. count += atomic_read(&cache->c_entry_count);
  273. }
  274. spin_unlock(&mb_cache_spinlock);
  275. return vfs_pressure_ratio(count);
  276. }
  277. static struct shrinker mb_cache_shrinker = {
  278. .count_objects = mb_cache_shrink_count,
  279. .scan_objects = mb_cache_shrink_scan,
  280. .seeks = DEFAULT_SEEKS,
  281. };
  282. /*
  283. * mb_cache_create() create a new cache
  284. *
  285. * All entries in one cache are equal size. Cache entries may be from
  286. * multiple devices. If this is the first mbcache created, registers
  287. * the cache with kernel memory management. Returns NULL if no more
  288. * memory was available.
  289. *
  290. * @name: name of the cache (informal)
  291. * @bucket_bits: log2(number of hash buckets)
  292. */
  293. struct mb_cache *
  294. mb_cache_create(const char *name, int bucket_bits)
  295. {
  296. int n, bucket_count = 1 << bucket_bits;
  297. struct mb_cache *cache = NULL;
  298. if (!mb_cache_bg_lock) {
  299. mb_cache_bg_lock = kmalloc(sizeof(struct blockgroup_lock),
  300. GFP_KERNEL);
  301. if (!mb_cache_bg_lock)
  302. return NULL;
  303. bgl_lock_init(mb_cache_bg_lock);
  304. }
  305. cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL);
  306. if (!cache)
  307. return NULL;
  308. cache->c_name = name;
  309. atomic_set(&cache->c_entry_count, 0);
  310. cache->c_bucket_bits = bucket_bits;
  311. cache->c_block_hash = kmalloc(bucket_count *
  312. sizeof(struct hlist_bl_head), GFP_KERNEL);
  313. if (!cache->c_block_hash)
  314. goto fail;
  315. for (n=0; n<bucket_count; n++)
  316. INIT_HLIST_BL_HEAD(&cache->c_block_hash[n]);
  317. cache->c_index_hash = kmalloc(bucket_count *
  318. sizeof(struct hlist_bl_head), GFP_KERNEL);
  319. if (!cache->c_index_hash)
  320. goto fail;
  321. for (n=0; n<bucket_count; n++)
  322. INIT_HLIST_BL_HEAD(&cache->c_index_hash[n]);
  323. if (!mb_cache_kmem_cache) {
  324. mb_cache_kmem_cache = kmem_cache_create(name,
  325. sizeof(struct mb_cache_entry), 0,
  326. SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
  327. if (!mb_cache_kmem_cache)
  328. goto fail2;
  329. }
  330. cache->c_entry_cache = mb_cache_kmem_cache;
  331. /*
  332. * Set an upper limit on the number of cache entries so that the hash
  333. * chains won't grow too long.
  334. */
  335. cache->c_max_entries = bucket_count << 4;
  336. spin_lock(&mb_cache_spinlock);
  337. list_add(&cache->c_cache_list, &mb_cache_list);
  338. spin_unlock(&mb_cache_spinlock);
  339. return cache;
  340. fail2:
  341. kfree(cache->c_index_hash);
  342. fail:
  343. kfree(cache->c_block_hash);
  344. kfree(cache);
  345. return NULL;
  346. }
  347. /*
  348. * mb_cache_shrink()
  349. *
  350. * Removes all cache entries of a device from the cache. All cache entries
  351. * currently in use cannot be freed, and thus remain in the cache. All others
  352. * are freed.
  353. *
  354. * @bdev: which device's cache entries to shrink
  355. */
  356. void
  357. mb_cache_shrink(struct block_device *bdev)
  358. {
  359. LIST_HEAD(free_list);
  360. struct list_head *l;
  361. struct mb_cache_entry *ce, *tmp;
  362. l = &mb_cache_lru_list;
  363. spin_lock(&mb_cache_spinlock);
  364. while (!list_is_last(l, &mb_cache_lru_list)) {
  365. l = l->next;
  366. ce = list_entry(l, struct mb_cache_entry, e_lru_list);
  367. if (ce->e_bdev == bdev) {
  368. list_del_init(&ce->e_lru_list);
  369. if (ce->e_used || ce->e_queued ||
  370. atomic_read(&ce->e_refcnt))
  371. continue;
  372. spin_unlock(&mb_cache_spinlock);
  373. /*
  374. * Prevent any find or get operation on the entry.
  375. */
  376. hlist_bl_lock(ce->e_block_hash_p);
  377. hlist_bl_lock(ce->e_index_hash_p);
  378. /* Ignore if it is touched by a find/get */
  379. if (ce->e_used || ce->e_queued ||
  380. atomic_read(&ce->e_refcnt) ||
  381. !list_empty(&ce->e_lru_list)) {
  382. hlist_bl_unlock(ce->e_index_hash_p);
  383. hlist_bl_unlock(ce->e_block_hash_p);
  384. l = &mb_cache_lru_list;
  385. spin_lock(&mb_cache_spinlock);
  386. continue;
  387. }
  388. __mb_cache_entry_unhash_unlock(ce);
  389. mb_assert(!(ce->e_used || ce->e_queued ||
  390. atomic_read(&ce->e_refcnt)));
  391. list_add_tail(&ce->e_lru_list, &free_list);
  392. l = &mb_cache_lru_list;
  393. spin_lock(&mb_cache_spinlock);
  394. }
  395. }
  396. spin_unlock(&mb_cache_spinlock);
  397. list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) {
  398. __mb_cache_entry_forget(ce, GFP_KERNEL);
  399. }
  400. }
  401. /*
  402. * mb_cache_destroy()
  403. *
  404. * Shrinks the cache to its minimum possible size (hopefully 0 entries),
  405. * and then destroys it. If this was the last mbcache, un-registers the
  406. * mbcache from kernel memory management.
  407. */
  408. void
  409. mb_cache_destroy(struct mb_cache *cache)
  410. {
  411. LIST_HEAD(free_list);
  412. struct mb_cache_entry *ce, *tmp;
  413. spin_lock(&mb_cache_spinlock);
  414. list_for_each_entry_safe(ce, tmp, &mb_cache_lru_list, e_lru_list) {
  415. if (ce->e_cache == cache)
  416. list_move_tail(&ce->e_lru_list, &free_list);
  417. }
  418. list_del(&cache->c_cache_list);
  419. spin_unlock(&mb_cache_spinlock);
  420. list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) {
  421. list_del_init(&ce->e_lru_list);
  422. /*
  423. * Prevent any find or get operation on the entry.
  424. */
  425. hlist_bl_lock(ce->e_block_hash_p);
  426. hlist_bl_lock(ce->e_index_hash_p);
  427. mb_assert(!(ce->e_used || ce->e_queued ||
  428. atomic_read(&ce->e_refcnt)));
  429. __mb_cache_entry_unhash_unlock(ce);
  430. __mb_cache_entry_forget(ce, GFP_KERNEL);
  431. }
  432. if (atomic_read(&cache->c_entry_count) > 0) {
  433. mb_error("cache %s: %d orphaned entries",
  434. cache->c_name,
  435. atomic_read(&cache->c_entry_count));
  436. }
  437. if (list_empty(&mb_cache_list)) {
  438. kmem_cache_destroy(mb_cache_kmem_cache);
  439. mb_cache_kmem_cache = NULL;
  440. }
  441. kfree(cache->c_index_hash);
  442. kfree(cache->c_block_hash);
  443. kfree(cache);
  444. }
  445. /*
  446. * mb_cache_entry_alloc()
  447. *
  448. * Allocates a new cache entry. The new entry will not be valid initially,
  449. * and thus cannot be looked up yet. It should be filled with data, and
  450. * then inserted into the cache using mb_cache_entry_insert(). Returns NULL
  451. * if no more memory was available.
  452. */
  453. struct mb_cache_entry *
  454. mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags)
  455. {
  456. struct mb_cache_entry *ce;
  457. if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) {
  458. struct list_head *l;
  459. l = &mb_cache_lru_list;
  460. spin_lock(&mb_cache_spinlock);
  461. while (!list_is_last(l, &mb_cache_lru_list)) {
  462. l = l->next;
  463. ce = list_entry(l, struct mb_cache_entry, e_lru_list);
  464. if (ce->e_cache == cache) {
  465. list_del_init(&ce->e_lru_list);
  466. if (ce->e_used || ce->e_queued ||
  467. atomic_read(&ce->e_refcnt))
  468. continue;
  469. spin_unlock(&mb_cache_spinlock);
  470. /*
  471. * Prevent any find or get operation on the
  472. * entry.
  473. */
  474. hlist_bl_lock(ce->e_block_hash_p);
  475. hlist_bl_lock(ce->e_index_hash_p);
  476. /* Ignore if it is touched by a find/get */
  477. if (ce->e_used || ce->e_queued ||
  478. atomic_read(&ce->e_refcnt) ||
  479. !list_empty(&ce->e_lru_list)) {
  480. hlist_bl_unlock(ce->e_index_hash_p);
  481. hlist_bl_unlock(ce->e_block_hash_p);
  482. l = &mb_cache_lru_list;
  483. spin_lock(&mb_cache_spinlock);
  484. continue;
  485. }
  486. mb_assert(list_empty(&ce->e_lru_list));
  487. mb_assert(!(ce->e_used || ce->e_queued ||
  488. atomic_read(&ce->e_refcnt)));
  489. __mb_cache_entry_unhash_unlock(ce);
  490. goto found;
  491. }
  492. }
  493. spin_unlock(&mb_cache_spinlock);
  494. }
  495. ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags);
  496. if (!ce)
  497. return NULL;
  498. atomic_inc(&cache->c_entry_count);
  499. INIT_LIST_HEAD(&ce->e_lru_list);
  500. INIT_HLIST_BL_NODE(&ce->e_block_list);
  501. INIT_HLIST_BL_NODE(&ce->e_index.o_list);
  502. ce->e_cache = cache;
  503. ce->e_queued = 0;
  504. atomic_set(&ce->e_refcnt, 0);
  505. found:
  506. ce->e_block_hash_p = &cache->c_block_hash[0];
  507. ce->e_index_hash_p = &cache->c_index_hash[0];
  508. ce->e_used = 1 + MB_CACHE_WRITER;
  509. return ce;
  510. }
  511. /*
  512. * mb_cache_entry_insert()
  513. *
  514. * Inserts an entry that was allocated using mb_cache_entry_alloc() into
  515. * the cache. After this, the cache entry can be looked up, but is not yet
  516. * in the lru list as the caller still holds a handle to it. Returns 0 on
  517. * success, or -EBUSY if a cache entry for that device + inode exists
  518. * already (this may happen after a failed lookup, but when another process
  519. * has inserted the same cache entry in the meantime).
  520. *
  521. * @bdev: device the cache entry belongs to
  522. * @block: block number
  523. * @key: lookup key
  524. */
  525. int
  526. mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev,
  527. sector_t block, unsigned int key)
  528. {
  529. struct mb_cache *cache = ce->e_cache;
  530. unsigned int bucket;
  531. struct hlist_bl_node *l;
  532. struct hlist_bl_head *block_hash_p;
  533. struct hlist_bl_head *index_hash_p;
  534. struct mb_cache_entry *lce;
  535. mb_assert(ce);
  536. bucket = hash_long((unsigned long)bdev + (block & 0xffffffff),
  537. cache->c_bucket_bits);
  538. block_hash_p = &cache->c_block_hash[bucket];
  539. hlist_bl_lock(block_hash_p);
  540. hlist_bl_for_each_entry(lce, l, block_hash_p, e_block_list) {
  541. if (lce->e_bdev == bdev && lce->e_block == block) {
  542. hlist_bl_unlock(block_hash_p);
  543. return -EBUSY;
  544. }
  545. }
  546. mb_assert(!__mb_cache_entry_is_block_hashed(ce));
  547. __mb_cache_entry_unhash_block(ce);
  548. __mb_cache_entry_unhash_index(ce);
  549. ce->e_bdev = bdev;
  550. ce->e_block = block;
  551. ce->e_block_hash_p = block_hash_p;
  552. ce->e_index.o_key = key;
  553. hlist_bl_add_head(&ce->e_block_list, block_hash_p);
  554. hlist_bl_unlock(block_hash_p);
  555. bucket = hash_long(key, cache->c_bucket_bits);
  556. index_hash_p = &cache->c_index_hash[bucket];
  557. hlist_bl_lock(index_hash_p);
  558. ce->e_index_hash_p = index_hash_p;
  559. hlist_bl_add_head(&ce->e_index.o_list, index_hash_p);
  560. hlist_bl_unlock(index_hash_p);
  561. return 0;
  562. }
  563. /*
  564. * mb_cache_entry_release()
  565. *
  566. * Release a handle to a cache entry. When the last handle to a cache entry
  567. * is released it is either freed (if it is invalid) or otherwise inserted
  568. * in to the lru list.
  569. */
  570. void
  571. mb_cache_entry_release(struct mb_cache_entry *ce)
  572. {
  573. __mb_cache_entry_release(ce);
  574. }
  575. /*
  576. * mb_cache_entry_free()
  577. *
  578. */
  579. void
  580. mb_cache_entry_free(struct mb_cache_entry *ce)
  581. {
  582. mb_assert(ce);
  583. mb_assert(list_empty(&ce->e_lru_list));
  584. hlist_bl_lock(ce->e_index_hash_p);
  585. __mb_cache_entry_unhash_index(ce);
  586. hlist_bl_unlock(ce->e_index_hash_p);
  587. hlist_bl_lock(ce->e_block_hash_p);
  588. __mb_cache_entry_unhash_block(ce);
  589. hlist_bl_unlock(ce->e_block_hash_p);
  590. __mb_cache_entry_release(ce);
  591. }
  592. /*
  593. * mb_cache_entry_get()
  594. *
  595. * Get a cache entry by device / block number. (There can only be one entry
  596. * in the cache per device and block.) Returns NULL if no such cache entry
  597. * exists. The returned cache entry is locked for exclusive access ("single
  598. * writer").
  599. */
  600. struct mb_cache_entry *
  601. mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev,
  602. sector_t block)
  603. {
  604. unsigned int bucket;
  605. struct hlist_bl_node *l;
  606. struct mb_cache_entry *ce;
  607. struct hlist_bl_head *block_hash_p;
  608. bucket = hash_long((unsigned long)bdev + (block & 0xffffffff),
  609. cache->c_bucket_bits);
  610. block_hash_p = &cache->c_block_hash[bucket];
  611. /* First serialize access to the block corresponding hash chain. */
  612. hlist_bl_lock(block_hash_p);
  613. hlist_bl_for_each_entry(ce, l, block_hash_p, e_block_list) {
  614. mb_assert(ce->e_block_hash_p == block_hash_p);
  615. if (ce->e_bdev == bdev && ce->e_block == block) {
  616. /*
  617. * Prevent a free from removing the entry.
  618. */
  619. atomic_inc(&ce->e_refcnt);
  620. hlist_bl_unlock(block_hash_p);
  621. __spin_lock_mb_cache_entry(ce);
  622. atomic_dec(&ce->e_refcnt);
  623. if (ce->e_used > 0) {
  624. DEFINE_WAIT(wait);
  625. while (ce->e_used > 0) {
  626. ce->e_queued++;
  627. prepare_to_wait(&mb_cache_queue, &wait,
  628. TASK_UNINTERRUPTIBLE);
  629. __spin_unlock_mb_cache_entry(ce);
  630. schedule();
  631. __spin_lock_mb_cache_entry(ce);
  632. ce->e_queued--;
  633. }
  634. finish_wait(&mb_cache_queue, &wait);
  635. }
  636. ce->e_used += 1 + MB_CACHE_WRITER;
  637. __spin_unlock_mb_cache_entry(ce);
  638. if (!list_empty(&ce->e_lru_list)) {
  639. spin_lock(&mb_cache_spinlock);
  640. list_del_init(&ce->e_lru_list);
  641. spin_unlock(&mb_cache_spinlock);
  642. }
  643. if (!__mb_cache_entry_is_block_hashed(ce)) {
  644. __mb_cache_entry_release(ce);
  645. return NULL;
  646. }
  647. return ce;
  648. }
  649. }
  650. hlist_bl_unlock(block_hash_p);
  651. return NULL;
  652. }
  653. #if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0)
  654. static struct mb_cache_entry *
  655. __mb_cache_entry_find(struct hlist_bl_node *l, struct hlist_bl_head *head,
  656. struct block_device *bdev, unsigned int key)
  657. {
  658. /* The index hash chain is alredy acquire by caller. */
  659. while (l != NULL) {
  660. struct mb_cache_entry *ce =
  661. hlist_bl_entry(l, struct mb_cache_entry,
  662. e_index.o_list);
  663. mb_assert(ce->e_index_hash_p == head);
  664. if (ce->e_bdev == bdev && ce->e_index.o_key == key) {
  665. /*
  666. * Prevent a free from removing the entry.
  667. */
  668. atomic_inc(&ce->e_refcnt);
  669. hlist_bl_unlock(head);
  670. __spin_lock_mb_cache_entry(ce);
  671. atomic_dec(&ce->e_refcnt);
  672. ce->e_used++;
  673. /* Incrementing before holding the lock gives readers
  674. priority over writers. */
  675. if (ce->e_used >= MB_CACHE_WRITER) {
  676. DEFINE_WAIT(wait);
  677. while (ce->e_used >= MB_CACHE_WRITER) {
  678. ce->e_queued++;
  679. prepare_to_wait(&mb_cache_queue, &wait,
  680. TASK_UNINTERRUPTIBLE);
  681. __spin_unlock_mb_cache_entry(ce);
  682. schedule();
  683. __spin_lock_mb_cache_entry(ce);
  684. ce->e_queued--;
  685. }
  686. finish_wait(&mb_cache_queue, &wait);
  687. }
  688. __spin_unlock_mb_cache_entry(ce);
  689. if (!list_empty(&ce->e_lru_list)) {
  690. spin_lock(&mb_cache_spinlock);
  691. list_del_init(&ce->e_lru_list);
  692. spin_unlock(&mb_cache_spinlock);
  693. }
  694. if (!__mb_cache_entry_is_block_hashed(ce)) {
  695. __mb_cache_entry_release(ce);
  696. return ERR_PTR(-EAGAIN);
  697. }
  698. return ce;
  699. }
  700. l = l->next;
  701. }
  702. hlist_bl_unlock(head);
  703. return NULL;
  704. }
  705. /*
  706. * mb_cache_entry_find_first()
  707. *
  708. * Find the first cache entry on a given device with a certain key in
  709. * an additional index. Additional matches can be found with
  710. * mb_cache_entry_find_next(). Returns NULL if no match was found. The
  711. * returned cache entry is locked for shared access ("multiple readers").
  712. *
  713. * @cache: the cache to search
  714. * @bdev: the device the cache entry should belong to
  715. * @key: the key in the index
  716. */
  717. struct mb_cache_entry *
  718. mb_cache_entry_find_first(struct mb_cache *cache, struct block_device *bdev,
  719. unsigned int key)
  720. {
  721. unsigned int bucket = hash_long(key, cache->c_bucket_bits);
  722. struct hlist_bl_node *l;
  723. struct mb_cache_entry *ce = NULL;
  724. struct hlist_bl_head *index_hash_p;
  725. index_hash_p = &cache->c_index_hash[bucket];
  726. hlist_bl_lock(index_hash_p);
  727. if (!hlist_bl_empty(index_hash_p)) {
  728. l = hlist_bl_first(index_hash_p);
  729. ce = __mb_cache_entry_find(l, index_hash_p, bdev, key);
  730. } else
  731. hlist_bl_unlock(index_hash_p);
  732. return ce;
  733. }
  734. /*
  735. * mb_cache_entry_find_next()
  736. *
  737. * Find the next cache entry on a given device with a certain key in an
  738. * additional index. Returns NULL if no match could be found. The previous
  739. * entry is atomatically released, so that mb_cache_entry_find_next() can
  740. * be called like this:
  741. *
  742. * entry = mb_cache_entry_find_first();
  743. * while (entry) {
  744. * ...
  745. * entry = mb_cache_entry_find_next(entry, ...);
  746. * }
  747. *
  748. * @prev: The previous match
  749. * @bdev: the device the cache entry should belong to
  750. * @key: the key in the index
  751. */
  752. struct mb_cache_entry *
  753. mb_cache_entry_find_next(struct mb_cache_entry *prev,
  754. struct block_device *bdev, unsigned int key)
  755. {
  756. struct mb_cache *cache = prev->e_cache;
  757. unsigned int bucket = hash_long(key, cache->c_bucket_bits);
  758. struct hlist_bl_node *l;
  759. struct mb_cache_entry *ce;
  760. struct hlist_bl_head *index_hash_p;
  761. index_hash_p = &cache->c_index_hash[bucket];
  762. mb_assert(prev->e_index_hash_p == index_hash_p);
  763. hlist_bl_lock(index_hash_p);
  764. mb_assert(!hlist_bl_empty(index_hash_p));
  765. l = prev->e_index.o_list.next;
  766. ce = __mb_cache_entry_find(l, index_hash_p, bdev, key);
  767. __mb_cache_entry_release(prev);
  768. return ce;
  769. }
  770. #endif /* !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) */
  771. static int __init init_mbcache(void)
  772. {
  773. register_shrinker(&mb_cache_shrinker);
  774. return 0;
  775. }
  776. static void __exit exit_mbcache(void)
  777. {
  778. unregister_shrinker(&mb_cache_shrinker);
  779. }
  780. module_init(init_mbcache)
  781. module_exit(exit_mbcache)