dm-cache-policy-smq.c 44 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943
  1. /*
  2. * Copyright (C) 2015 Red Hat. All rights reserved.
  3. *
  4. * This file is released under the GPL.
  5. */
  6. #include "dm-cache-background-tracker.h"
  7. #include "dm-cache-policy-internal.h"
  8. #include "dm-cache-policy.h"
  9. #include "dm.h"
  10. #include <linux/hash.h>
  11. #include <linux/jiffies.h>
  12. #include <linux/module.h>
  13. #include <linux/mutex.h>
  14. #include <linux/vmalloc.h>
  15. #include <linux/math64.h>
  16. #define DM_MSG_PREFIX "cache-policy-smq"
  17. /*----------------------------------------------------------------*/
  18. /*
  19. * Safe division functions that return zero on divide by zero.
  20. */
  21. static unsigned safe_div(unsigned n, unsigned d)
  22. {
  23. return d ? n / d : 0u;
  24. }
  25. static unsigned safe_mod(unsigned n, unsigned d)
  26. {
  27. return d ? n % d : 0u;
  28. }
  29. /*----------------------------------------------------------------*/
  30. struct entry {
  31. unsigned hash_next:28;
  32. unsigned prev:28;
  33. unsigned next:28;
  34. unsigned level:6;
  35. bool dirty:1;
  36. bool allocated:1;
  37. bool sentinel:1;
  38. bool pending_work:1;
  39. dm_oblock_t oblock;
  40. };
  41. /*----------------------------------------------------------------*/
  42. #define INDEXER_NULL ((1u << 28u) - 1u)
  43. /*
  44. * An entry_space manages a set of entries that we use for the queues.
  45. * The clean and dirty queues share entries, so this object is separate
  46. * from the queue itself.
  47. */
  48. struct entry_space {
  49. struct entry *begin;
  50. struct entry *end;
  51. };
  52. static int space_init(struct entry_space *es, unsigned nr_entries)
  53. {
  54. if (!nr_entries) {
  55. es->begin = es->end = NULL;
  56. return 0;
  57. }
  58. es->begin = vzalloc(array_size(nr_entries, sizeof(struct entry)));
  59. if (!es->begin)
  60. return -ENOMEM;
  61. es->end = es->begin + nr_entries;
  62. return 0;
  63. }
  64. static void space_exit(struct entry_space *es)
  65. {
  66. vfree(es->begin);
  67. }
  68. static struct entry *__get_entry(struct entry_space *es, unsigned block)
  69. {
  70. struct entry *e;
  71. e = es->begin + block;
  72. BUG_ON(e >= es->end);
  73. return e;
  74. }
  75. static unsigned to_index(struct entry_space *es, struct entry *e)
  76. {
  77. BUG_ON(e < es->begin || e >= es->end);
  78. return e - es->begin;
  79. }
  80. static struct entry *to_entry(struct entry_space *es, unsigned block)
  81. {
  82. if (block == INDEXER_NULL)
  83. return NULL;
  84. return __get_entry(es, block);
  85. }
  86. /*----------------------------------------------------------------*/
  87. struct ilist {
  88. unsigned nr_elts; /* excluding sentinel entries */
  89. unsigned head, tail;
  90. };
  91. static void l_init(struct ilist *l)
  92. {
  93. l->nr_elts = 0;
  94. l->head = l->tail = INDEXER_NULL;
  95. }
  96. static struct entry *l_head(struct entry_space *es, struct ilist *l)
  97. {
  98. return to_entry(es, l->head);
  99. }
  100. static struct entry *l_tail(struct entry_space *es, struct ilist *l)
  101. {
  102. return to_entry(es, l->tail);
  103. }
  104. static struct entry *l_next(struct entry_space *es, struct entry *e)
  105. {
  106. return to_entry(es, e->next);
  107. }
  108. static struct entry *l_prev(struct entry_space *es, struct entry *e)
  109. {
  110. return to_entry(es, e->prev);
  111. }
  112. static bool l_empty(struct ilist *l)
  113. {
  114. return l->head == INDEXER_NULL;
  115. }
  116. static void l_add_head(struct entry_space *es, struct ilist *l, struct entry *e)
  117. {
  118. struct entry *head = l_head(es, l);
  119. e->next = l->head;
  120. e->prev = INDEXER_NULL;
  121. if (head)
  122. head->prev = l->head = to_index(es, e);
  123. else
  124. l->head = l->tail = to_index(es, e);
  125. if (!e->sentinel)
  126. l->nr_elts++;
  127. }
  128. static void l_add_tail(struct entry_space *es, struct ilist *l, struct entry *e)
  129. {
  130. struct entry *tail = l_tail(es, l);
  131. e->next = INDEXER_NULL;
  132. e->prev = l->tail;
  133. if (tail)
  134. tail->next = l->tail = to_index(es, e);
  135. else
  136. l->head = l->tail = to_index(es, e);
  137. if (!e->sentinel)
  138. l->nr_elts++;
  139. }
  140. static void l_add_before(struct entry_space *es, struct ilist *l,
  141. struct entry *old, struct entry *e)
  142. {
  143. struct entry *prev = l_prev(es, old);
  144. if (!prev)
  145. l_add_head(es, l, e);
  146. else {
  147. e->prev = old->prev;
  148. e->next = to_index(es, old);
  149. prev->next = old->prev = to_index(es, e);
  150. if (!e->sentinel)
  151. l->nr_elts++;
  152. }
  153. }
  154. static void l_del(struct entry_space *es, struct ilist *l, struct entry *e)
  155. {
  156. struct entry *prev = l_prev(es, e);
  157. struct entry *next = l_next(es, e);
  158. if (prev)
  159. prev->next = e->next;
  160. else
  161. l->head = e->next;
  162. if (next)
  163. next->prev = e->prev;
  164. else
  165. l->tail = e->prev;
  166. if (!e->sentinel)
  167. l->nr_elts--;
  168. }
  169. static struct entry *l_pop_head(struct entry_space *es, struct ilist *l)
  170. {
  171. struct entry *e;
  172. for (e = l_head(es, l); e; e = l_next(es, e))
  173. if (!e->sentinel) {
  174. l_del(es, l, e);
  175. return e;
  176. }
  177. return NULL;
  178. }
  179. static struct entry *l_pop_tail(struct entry_space *es, struct ilist *l)
  180. {
  181. struct entry *e;
  182. for (e = l_tail(es, l); e; e = l_prev(es, e))
  183. if (!e->sentinel) {
  184. l_del(es, l, e);
  185. return e;
  186. }
  187. return NULL;
  188. }
  189. /*----------------------------------------------------------------*/
  190. /*
  191. * The stochastic-multi-queue is a set of lru lists stacked into levels.
  192. * Entries are moved up levels when they are used, which loosely orders the
  193. * most accessed entries in the top levels and least in the bottom. This
  194. * structure is *much* better than a single lru list.
  195. */
  196. #define MAX_LEVELS 64u
  197. struct queue {
  198. struct entry_space *es;
  199. unsigned nr_elts;
  200. unsigned nr_levels;
  201. struct ilist qs[MAX_LEVELS];
  202. /*
  203. * We maintain a count of the number of entries we would like in each
  204. * level.
  205. */
  206. unsigned last_target_nr_elts;
  207. unsigned nr_top_levels;
  208. unsigned nr_in_top_levels;
  209. unsigned target_count[MAX_LEVELS];
  210. };
  211. static void q_init(struct queue *q, struct entry_space *es, unsigned nr_levels)
  212. {
  213. unsigned i;
  214. q->es = es;
  215. q->nr_elts = 0;
  216. q->nr_levels = nr_levels;
  217. for (i = 0; i < q->nr_levels; i++) {
  218. l_init(q->qs + i);
  219. q->target_count[i] = 0u;
  220. }
  221. q->last_target_nr_elts = 0u;
  222. q->nr_top_levels = 0u;
  223. q->nr_in_top_levels = 0u;
  224. }
  225. static unsigned q_size(struct queue *q)
  226. {
  227. return q->nr_elts;
  228. }
  229. /*
  230. * Insert an entry to the back of the given level.
  231. */
  232. static void q_push(struct queue *q, struct entry *e)
  233. {
  234. BUG_ON(e->pending_work);
  235. if (!e->sentinel)
  236. q->nr_elts++;
  237. l_add_tail(q->es, q->qs + e->level, e);
  238. }
  239. static void q_push_front(struct queue *q, struct entry *e)
  240. {
  241. BUG_ON(e->pending_work);
  242. if (!e->sentinel)
  243. q->nr_elts++;
  244. l_add_head(q->es, q->qs + e->level, e);
  245. }
  246. static void q_push_before(struct queue *q, struct entry *old, struct entry *e)
  247. {
  248. BUG_ON(e->pending_work);
  249. if (!e->sentinel)
  250. q->nr_elts++;
  251. l_add_before(q->es, q->qs + e->level, old, e);
  252. }
  253. static void q_del(struct queue *q, struct entry *e)
  254. {
  255. l_del(q->es, q->qs + e->level, e);
  256. if (!e->sentinel)
  257. q->nr_elts--;
  258. }
  259. /*
  260. * Return the oldest entry of the lowest populated level.
  261. */
  262. static struct entry *q_peek(struct queue *q, unsigned max_level, bool can_cross_sentinel)
  263. {
  264. unsigned level;
  265. struct entry *e;
  266. max_level = min(max_level, q->nr_levels);
  267. for (level = 0; level < max_level; level++)
  268. for (e = l_head(q->es, q->qs + level); e; e = l_next(q->es, e)) {
  269. if (e->sentinel) {
  270. if (can_cross_sentinel)
  271. continue;
  272. else
  273. break;
  274. }
  275. return e;
  276. }
  277. return NULL;
  278. }
  279. static struct entry *q_pop(struct queue *q)
  280. {
  281. struct entry *e = q_peek(q, q->nr_levels, true);
  282. if (e)
  283. q_del(q, e);
  284. return e;
  285. }
  286. /*
  287. * This function assumes there is a non-sentinel entry to pop. It's only
  288. * used by redistribute, so we know this is true. It also doesn't adjust
  289. * the q->nr_elts count.
  290. */
  291. static struct entry *__redist_pop_from(struct queue *q, unsigned level)
  292. {
  293. struct entry *e;
  294. for (; level < q->nr_levels; level++)
  295. for (e = l_head(q->es, q->qs + level); e; e = l_next(q->es, e))
  296. if (!e->sentinel) {
  297. l_del(q->es, q->qs + e->level, e);
  298. return e;
  299. }
  300. return NULL;
  301. }
  302. static void q_set_targets_subrange_(struct queue *q, unsigned nr_elts, unsigned lbegin, unsigned lend)
  303. {
  304. unsigned level, nr_levels, entries_per_level, remainder;
  305. BUG_ON(lbegin > lend);
  306. BUG_ON(lend > q->nr_levels);
  307. nr_levels = lend - lbegin;
  308. entries_per_level = safe_div(nr_elts, nr_levels);
  309. remainder = safe_mod(nr_elts, nr_levels);
  310. for (level = lbegin; level < lend; level++)
  311. q->target_count[level] =
  312. (level < (lbegin + remainder)) ? entries_per_level + 1u : entries_per_level;
  313. }
  314. /*
  315. * Typically we have fewer elements in the top few levels which allows us
  316. * to adjust the promote threshold nicely.
  317. */
  318. static void q_set_targets(struct queue *q)
  319. {
  320. if (q->last_target_nr_elts == q->nr_elts)
  321. return;
  322. q->last_target_nr_elts = q->nr_elts;
  323. if (q->nr_top_levels > q->nr_levels)
  324. q_set_targets_subrange_(q, q->nr_elts, 0, q->nr_levels);
  325. else {
  326. q_set_targets_subrange_(q, q->nr_in_top_levels,
  327. q->nr_levels - q->nr_top_levels, q->nr_levels);
  328. if (q->nr_in_top_levels < q->nr_elts)
  329. q_set_targets_subrange_(q, q->nr_elts - q->nr_in_top_levels,
  330. 0, q->nr_levels - q->nr_top_levels);
  331. else
  332. q_set_targets_subrange_(q, 0, 0, q->nr_levels - q->nr_top_levels);
  333. }
  334. }
  335. static void q_redistribute(struct queue *q)
  336. {
  337. unsigned target, level;
  338. struct ilist *l, *l_above;
  339. struct entry *e;
  340. q_set_targets(q);
  341. for (level = 0u; level < q->nr_levels - 1u; level++) {
  342. l = q->qs + level;
  343. target = q->target_count[level];
  344. /*
  345. * Pull down some entries from the level above.
  346. */
  347. while (l->nr_elts < target) {
  348. e = __redist_pop_from(q, level + 1u);
  349. if (!e) {
  350. /* bug in nr_elts */
  351. break;
  352. }
  353. e->level = level;
  354. l_add_tail(q->es, l, e);
  355. }
  356. /*
  357. * Push some entries up.
  358. */
  359. l_above = q->qs + level + 1u;
  360. while (l->nr_elts > target) {
  361. e = l_pop_tail(q->es, l);
  362. if (!e)
  363. /* bug in nr_elts */
  364. break;
  365. e->level = level + 1u;
  366. l_add_tail(q->es, l_above, e);
  367. }
  368. }
  369. }
  370. static void q_requeue(struct queue *q, struct entry *e, unsigned extra_levels,
  371. struct entry *s1, struct entry *s2)
  372. {
  373. struct entry *de;
  374. unsigned sentinels_passed = 0;
  375. unsigned new_level = min(q->nr_levels - 1u, e->level + extra_levels);
  376. /* try and find an entry to swap with */
  377. if (extra_levels && (e->level < q->nr_levels - 1u)) {
  378. for (de = l_head(q->es, q->qs + new_level); de && de->sentinel; de = l_next(q->es, de))
  379. sentinels_passed++;
  380. if (de) {
  381. q_del(q, de);
  382. de->level = e->level;
  383. if (s1) {
  384. switch (sentinels_passed) {
  385. case 0:
  386. q_push_before(q, s1, de);
  387. break;
  388. case 1:
  389. q_push_before(q, s2, de);
  390. break;
  391. default:
  392. q_push(q, de);
  393. }
  394. } else
  395. q_push(q, de);
  396. }
  397. }
  398. q_del(q, e);
  399. e->level = new_level;
  400. q_push(q, e);
  401. }
  402. /*----------------------------------------------------------------*/
  403. #define FP_SHIFT 8
  404. #define SIXTEENTH (1u << (FP_SHIFT - 4u))
  405. #define EIGHTH (1u << (FP_SHIFT - 3u))
  406. struct stats {
  407. unsigned hit_threshold;
  408. unsigned hits;
  409. unsigned misses;
  410. };
  411. enum performance {
  412. Q_POOR,
  413. Q_FAIR,
  414. Q_WELL
  415. };
  416. static void stats_init(struct stats *s, unsigned nr_levels)
  417. {
  418. s->hit_threshold = (nr_levels * 3u) / 4u;
  419. s->hits = 0u;
  420. s->misses = 0u;
  421. }
  422. static void stats_reset(struct stats *s)
  423. {
  424. s->hits = s->misses = 0u;
  425. }
  426. static void stats_level_accessed(struct stats *s, unsigned level)
  427. {
  428. if (level >= s->hit_threshold)
  429. s->hits++;
  430. else
  431. s->misses++;
  432. }
  433. static void stats_miss(struct stats *s)
  434. {
  435. s->misses++;
  436. }
  437. /*
  438. * There are times when we don't have any confidence in the hotspot queue.
  439. * Such as when a fresh cache is created and the blocks have been spread
  440. * out across the levels, or if an io load changes. We detect this by
  441. * seeing how often a lookup is in the top levels of the hotspot queue.
  442. */
  443. static enum performance stats_assess(struct stats *s)
  444. {
  445. unsigned confidence = safe_div(s->hits << FP_SHIFT, s->hits + s->misses);
  446. if (confidence < SIXTEENTH)
  447. return Q_POOR;
  448. else if (confidence < EIGHTH)
  449. return Q_FAIR;
  450. else
  451. return Q_WELL;
  452. }
  453. /*----------------------------------------------------------------*/
  454. struct smq_hash_table {
  455. struct entry_space *es;
  456. unsigned long long hash_bits;
  457. unsigned *buckets;
  458. };
  459. /*
  460. * All cache entries are stored in a chained hash table. To save space we
  461. * use indexing again, and only store indexes to the next entry.
  462. */
  463. static int h_init(struct smq_hash_table *ht, struct entry_space *es, unsigned nr_entries)
  464. {
  465. unsigned i, nr_buckets;
  466. ht->es = es;
  467. nr_buckets = roundup_pow_of_two(max(nr_entries / 4u, 16u));
  468. ht->hash_bits = __ffs(nr_buckets);
  469. ht->buckets = vmalloc(array_size(nr_buckets, sizeof(*ht->buckets)));
  470. if (!ht->buckets)
  471. return -ENOMEM;
  472. for (i = 0; i < nr_buckets; i++)
  473. ht->buckets[i] = INDEXER_NULL;
  474. return 0;
  475. }
  476. static void h_exit(struct smq_hash_table *ht)
  477. {
  478. vfree(ht->buckets);
  479. }
  480. static struct entry *h_head(struct smq_hash_table *ht, unsigned bucket)
  481. {
  482. return to_entry(ht->es, ht->buckets[bucket]);
  483. }
  484. static struct entry *h_next(struct smq_hash_table *ht, struct entry *e)
  485. {
  486. return to_entry(ht->es, e->hash_next);
  487. }
  488. static void __h_insert(struct smq_hash_table *ht, unsigned bucket, struct entry *e)
  489. {
  490. e->hash_next = ht->buckets[bucket];
  491. ht->buckets[bucket] = to_index(ht->es, e);
  492. }
  493. static void h_insert(struct smq_hash_table *ht, struct entry *e)
  494. {
  495. unsigned h = hash_64(from_oblock(e->oblock), ht->hash_bits);
  496. __h_insert(ht, h, e);
  497. }
  498. static struct entry *__h_lookup(struct smq_hash_table *ht, unsigned h, dm_oblock_t oblock,
  499. struct entry **prev)
  500. {
  501. struct entry *e;
  502. *prev = NULL;
  503. for (e = h_head(ht, h); e; e = h_next(ht, e)) {
  504. if (e->oblock == oblock)
  505. return e;
  506. *prev = e;
  507. }
  508. return NULL;
  509. }
  510. static void __h_unlink(struct smq_hash_table *ht, unsigned h,
  511. struct entry *e, struct entry *prev)
  512. {
  513. if (prev)
  514. prev->hash_next = e->hash_next;
  515. else
  516. ht->buckets[h] = e->hash_next;
  517. }
  518. /*
  519. * Also moves each entry to the front of the bucket.
  520. */
  521. static struct entry *h_lookup(struct smq_hash_table *ht, dm_oblock_t oblock)
  522. {
  523. struct entry *e, *prev;
  524. unsigned h = hash_64(from_oblock(oblock), ht->hash_bits);
  525. e = __h_lookup(ht, h, oblock, &prev);
  526. if (e && prev) {
  527. /*
  528. * Move to the front because this entry is likely
  529. * to be hit again.
  530. */
  531. __h_unlink(ht, h, e, prev);
  532. __h_insert(ht, h, e);
  533. }
  534. return e;
  535. }
  536. static void h_remove(struct smq_hash_table *ht, struct entry *e)
  537. {
  538. unsigned h = hash_64(from_oblock(e->oblock), ht->hash_bits);
  539. struct entry *prev;
  540. /*
  541. * The down side of using a singly linked list is we have to
  542. * iterate the bucket to remove an item.
  543. */
  544. e = __h_lookup(ht, h, e->oblock, &prev);
  545. if (e)
  546. __h_unlink(ht, h, e, prev);
  547. }
  548. /*----------------------------------------------------------------*/
  549. struct entry_alloc {
  550. struct entry_space *es;
  551. unsigned begin;
  552. unsigned nr_allocated;
  553. struct ilist free;
  554. };
  555. static void init_allocator(struct entry_alloc *ea, struct entry_space *es,
  556. unsigned begin, unsigned end)
  557. {
  558. unsigned i;
  559. ea->es = es;
  560. ea->nr_allocated = 0u;
  561. ea->begin = begin;
  562. l_init(&ea->free);
  563. for (i = begin; i != end; i++)
  564. l_add_tail(ea->es, &ea->free, __get_entry(ea->es, i));
  565. }
  566. static void init_entry(struct entry *e)
  567. {
  568. /*
  569. * We can't memset because that would clear the hotspot and
  570. * sentinel bits which remain constant.
  571. */
  572. e->hash_next = INDEXER_NULL;
  573. e->next = INDEXER_NULL;
  574. e->prev = INDEXER_NULL;
  575. e->level = 0u;
  576. e->dirty = true; /* FIXME: audit */
  577. e->allocated = true;
  578. e->sentinel = false;
  579. e->pending_work = false;
  580. }
  581. static struct entry *alloc_entry(struct entry_alloc *ea)
  582. {
  583. struct entry *e;
  584. if (l_empty(&ea->free))
  585. return NULL;
  586. e = l_pop_head(ea->es, &ea->free);
  587. init_entry(e);
  588. ea->nr_allocated++;
  589. return e;
  590. }
  591. /*
  592. * This assumes the cblock hasn't already been allocated.
  593. */
  594. static struct entry *alloc_particular_entry(struct entry_alloc *ea, unsigned i)
  595. {
  596. struct entry *e = __get_entry(ea->es, ea->begin + i);
  597. BUG_ON(e->allocated);
  598. l_del(ea->es, &ea->free, e);
  599. init_entry(e);
  600. ea->nr_allocated++;
  601. return e;
  602. }
  603. static void free_entry(struct entry_alloc *ea, struct entry *e)
  604. {
  605. BUG_ON(!ea->nr_allocated);
  606. BUG_ON(!e->allocated);
  607. ea->nr_allocated--;
  608. e->allocated = false;
  609. l_add_tail(ea->es, &ea->free, e);
  610. }
  611. static bool allocator_empty(struct entry_alloc *ea)
  612. {
  613. return l_empty(&ea->free);
  614. }
  615. static unsigned get_index(struct entry_alloc *ea, struct entry *e)
  616. {
  617. return to_index(ea->es, e) - ea->begin;
  618. }
  619. static struct entry *get_entry(struct entry_alloc *ea, unsigned index)
  620. {
  621. return __get_entry(ea->es, ea->begin + index);
  622. }
  623. /*----------------------------------------------------------------*/
  624. #define NR_HOTSPOT_LEVELS 64u
  625. #define NR_CACHE_LEVELS 64u
  626. #define WRITEBACK_PERIOD (10ul * HZ)
  627. #define DEMOTE_PERIOD (60ul * HZ)
  628. #define HOTSPOT_UPDATE_PERIOD (HZ)
  629. #define CACHE_UPDATE_PERIOD (60ul * HZ)
  630. struct smq_policy {
  631. struct dm_cache_policy policy;
  632. /* protects everything */
  633. spinlock_t lock;
  634. dm_cblock_t cache_size;
  635. sector_t cache_block_size;
  636. sector_t hotspot_block_size;
  637. unsigned nr_hotspot_blocks;
  638. unsigned cache_blocks_per_hotspot_block;
  639. unsigned hotspot_level_jump;
  640. struct entry_space es;
  641. struct entry_alloc writeback_sentinel_alloc;
  642. struct entry_alloc demote_sentinel_alloc;
  643. struct entry_alloc hotspot_alloc;
  644. struct entry_alloc cache_alloc;
  645. unsigned long *hotspot_hit_bits;
  646. unsigned long *cache_hit_bits;
  647. /*
  648. * We maintain three queues of entries. The cache proper,
  649. * consisting of a clean and dirty queue, containing the currently
  650. * active mappings. The hotspot queue uses a larger block size to
  651. * track blocks that are being hit frequently and potential
  652. * candidates for promotion to the cache.
  653. */
  654. struct queue hotspot;
  655. struct queue clean;
  656. struct queue dirty;
  657. struct stats hotspot_stats;
  658. struct stats cache_stats;
  659. /*
  660. * Keeps track of time, incremented by the core. We use this to
  661. * avoid attributing multiple hits within the same tick.
  662. */
  663. unsigned tick;
  664. /*
  665. * The hash tables allows us to quickly find an entry by origin
  666. * block.
  667. */
  668. struct smq_hash_table table;
  669. struct smq_hash_table hotspot_table;
  670. bool current_writeback_sentinels;
  671. unsigned long next_writeback_period;
  672. bool current_demote_sentinels;
  673. unsigned long next_demote_period;
  674. unsigned write_promote_level;
  675. unsigned read_promote_level;
  676. unsigned long next_hotspot_period;
  677. unsigned long next_cache_period;
  678. struct background_tracker *bg_work;
  679. bool migrations_allowed;
  680. };
  681. /*----------------------------------------------------------------*/
  682. static struct entry *get_sentinel(struct entry_alloc *ea, unsigned level, bool which)
  683. {
  684. return get_entry(ea, which ? level : NR_CACHE_LEVELS + level);
  685. }
  686. static struct entry *writeback_sentinel(struct smq_policy *mq, unsigned level)
  687. {
  688. return get_sentinel(&mq->writeback_sentinel_alloc, level, mq->current_writeback_sentinels);
  689. }
  690. static struct entry *demote_sentinel(struct smq_policy *mq, unsigned level)
  691. {
  692. return get_sentinel(&mq->demote_sentinel_alloc, level, mq->current_demote_sentinels);
  693. }
  694. static void __update_writeback_sentinels(struct smq_policy *mq)
  695. {
  696. unsigned level;
  697. struct queue *q = &mq->dirty;
  698. struct entry *sentinel;
  699. for (level = 0; level < q->nr_levels; level++) {
  700. sentinel = writeback_sentinel(mq, level);
  701. q_del(q, sentinel);
  702. q_push(q, sentinel);
  703. }
  704. }
  705. static void __update_demote_sentinels(struct smq_policy *mq)
  706. {
  707. unsigned level;
  708. struct queue *q = &mq->clean;
  709. struct entry *sentinel;
  710. for (level = 0; level < q->nr_levels; level++) {
  711. sentinel = demote_sentinel(mq, level);
  712. q_del(q, sentinel);
  713. q_push(q, sentinel);
  714. }
  715. }
  716. static void update_sentinels(struct smq_policy *mq)
  717. {
  718. if (time_after(jiffies, mq->next_writeback_period)) {
  719. mq->next_writeback_period = jiffies + WRITEBACK_PERIOD;
  720. mq->current_writeback_sentinels = !mq->current_writeback_sentinels;
  721. __update_writeback_sentinels(mq);
  722. }
  723. if (time_after(jiffies, mq->next_demote_period)) {
  724. mq->next_demote_period = jiffies + DEMOTE_PERIOD;
  725. mq->current_demote_sentinels = !mq->current_demote_sentinels;
  726. __update_demote_sentinels(mq);
  727. }
  728. }
  729. static void __sentinels_init(struct smq_policy *mq)
  730. {
  731. unsigned level;
  732. struct entry *sentinel;
  733. for (level = 0; level < NR_CACHE_LEVELS; level++) {
  734. sentinel = writeback_sentinel(mq, level);
  735. sentinel->level = level;
  736. q_push(&mq->dirty, sentinel);
  737. sentinel = demote_sentinel(mq, level);
  738. sentinel->level = level;
  739. q_push(&mq->clean, sentinel);
  740. }
  741. }
  742. static void sentinels_init(struct smq_policy *mq)
  743. {
  744. mq->next_writeback_period = jiffies + WRITEBACK_PERIOD;
  745. mq->next_demote_period = jiffies + DEMOTE_PERIOD;
  746. mq->current_writeback_sentinels = false;
  747. mq->current_demote_sentinels = false;
  748. __sentinels_init(mq);
  749. mq->current_writeback_sentinels = !mq->current_writeback_sentinels;
  750. mq->current_demote_sentinels = !mq->current_demote_sentinels;
  751. __sentinels_init(mq);
  752. }
  753. /*----------------------------------------------------------------*/
  754. static void del_queue(struct smq_policy *mq, struct entry *e)
  755. {
  756. q_del(e->dirty ? &mq->dirty : &mq->clean, e);
  757. }
  758. static void push_queue(struct smq_policy *mq, struct entry *e)
  759. {
  760. if (e->dirty)
  761. q_push(&mq->dirty, e);
  762. else
  763. q_push(&mq->clean, e);
  764. }
  765. // !h, !q, a -> h, q, a
  766. static void push(struct smq_policy *mq, struct entry *e)
  767. {
  768. h_insert(&mq->table, e);
  769. if (!e->pending_work)
  770. push_queue(mq, e);
  771. }
  772. static void push_queue_front(struct smq_policy *mq, struct entry *e)
  773. {
  774. if (e->dirty)
  775. q_push_front(&mq->dirty, e);
  776. else
  777. q_push_front(&mq->clean, e);
  778. }
  779. static void push_front(struct smq_policy *mq, struct entry *e)
  780. {
  781. h_insert(&mq->table, e);
  782. if (!e->pending_work)
  783. push_queue_front(mq, e);
  784. }
  785. static dm_cblock_t infer_cblock(struct smq_policy *mq, struct entry *e)
  786. {
  787. return to_cblock(get_index(&mq->cache_alloc, e));
  788. }
  789. static void requeue(struct smq_policy *mq, struct entry *e)
  790. {
  791. /*
  792. * Pending work has temporarily been taken out of the queues.
  793. */
  794. if (e->pending_work)
  795. return;
  796. if (!test_and_set_bit(from_cblock(infer_cblock(mq, e)), mq->cache_hit_bits)) {
  797. if (!e->dirty) {
  798. q_requeue(&mq->clean, e, 1u, NULL, NULL);
  799. return;
  800. }
  801. q_requeue(&mq->dirty, e, 1u,
  802. get_sentinel(&mq->writeback_sentinel_alloc, e->level, !mq->current_writeback_sentinels),
  803. get_sentinel(&mq->writeback_sentinel_alloc, e->level, mq->current_writeback_sentinels));
  804. }
  805. }
  806. static unsigned default_promote_level(struct smq_policy *mq)
  807. {
  808. /*
  809. * The promote level depends on the current performance of the
  810. * cache.
  811. *
  812. * If the cache is performing badly, then we can't afford
  813. * to promote much without causing performance to drop below that
  814. * of the origin device.
  815. *
  816. * If the cache is performing well, then we don't need to promote
  817. * much. If it isn't broken, don't fix it.
  818. *
  819. * If the cache is middling then we promote more.
  820. *
  821. * This scheme reminds me of a graph of entropy vs probability of a
  822. * binary variable.
  823. */
  824. static unsigned table[] = {1, 1, 1, 2, 4, 6, 7, 8, 7, 6, 4, 4, 3, 3, 2, 2, 1};
  825. unsigned hits = mq->cache_stats.hits;
  826. unsigned misses = mq->cache_stats.misses;
  827. unsigned index = safe_div(hits << 4u, hits + misses);
  828. return table[index];
  829. }
  830. static void update_promote_levels(struct smq_policy *mq)
  831. {
  832. /*
  833. * If there are unused cache entries then we want to be really
  834. * eager to promote.
  835. */
  836. unsigned threshold_level = allocator_empty(&mq->cache_alloc) ?
  837. default_promote_level(mq) : (NR_HOTSPOT_LEVELS / 2u);
  838. threshold_level = max(threshold_level, NR_HOTSPOT_LEVELS);
  839. /*
  840. * If the hotspot queue is performing badly then we have little
  841. * confidence that we know which blocks to promote. So we cut down
  842. * the amount of promotions.
  843. */
  844. switch (stats_assess(&mq->hotspot_stats)) {
  845. case Q_POOR:
  846. threshold_level /= 4u;
  847. break;
  848. case Q_FAIR:
  849. threshold_level /= 2u;
  850. break;
  851. case Q_WELL:
  852. break;
  853. }
  854. mq->read_promote_level = NR_HOTSPOT_LEVELS - threshold_level;
  855. mq->write_promote_level = (NR_HOTSPOT_LEVELS - threshold_level);
  856. }
  857. /*
  858. * If the hotspot queue is performing badly, then we try and move entries
  859. * around more quickly.
  860. */
  861. static void update_level_jump(struct smq_policy *mq)
  862. {
  863. switch (stats_assess(&mq->hotspot_stats)) {
  864. case Q_POOR:
  865. mq->hotspot_level_jump = 4u;
  866. break;
  867. case Q_FAIR:
  868. mq->hotspot_level_jump = 2u;
  869. break;
  870. case Q_WELL:
  871. mq->hotspot_level_jump = 1u;
  872. break;
  873. }
  874. }
  875. static void end_hotspot_period(struct smq_policy *mq)
  876. {
  877. clear_bitset(mq->hotspot_hit_bits, mq->nr_hotspot_blocks);
  878. update_promote_levels(mq);
  879. if (time_after(jiffies, mq->next_hotspot_period)) {
  880. update_level_jump(mq);
  881. q_redistribute(&mq->hotspot);
  882. stats_reset(&mq->hotspot_stats);
  883. mq->next_hotspot_period = jiffies + HOTSPOT_UPDATE_PERIOD;
  884. }
  885. }
  886. static void end_cache_period(struct smq_policy *mq)
  887. {
  888. if (time_after(jiffies, mq->next_cache_period)) {
  889. clear_bitset(mq->cache_hit_bits, from_cblock(mq->cache_size));
  890. q_redistribute(&mq->dirty);
  891. q_redistribute(&mq->clean);
  892. stats_reset(&mq->cache_stats);
  893. mq->next_cache_period = jiffies + CACHE_UPDATE_PERIOD;
  894. }
  895. }
  896. /*----------------------------------------------------------------*/
  897. /*
  898. * Targets are given as a percentage.
  899. */
  900. #define CLEAN_TARGET 25u
  901. #define FREE_TARGET 25u
  902. static unsigned percent_to_target(struct smq_policy *mq, unsigned p)
  903. {
  904. return from_cblock(mq->cache_size) * p / 100u;
  905. }
  906. static bool clean_target_met(struct smq_policy *mq, bool idle)
  907. {
  908. /*
  909. * Cache entries may not be populated. So we cannot rely on the
  910. * size of the clean queue.
  911. */
  912. if (idle) {
  913. /*
  914. * We'd like to clean everything.
  915. */
  916. return q_size(&mq->dirty) == 0u;
  917. }
  918. /*
  919. * If we're busy we don't worry about cleaning at all.
  920. */
  921. return true;
  922. }
  923. static bool free_target_met(struct smq_policy *mq)
  924. {
  925. unsigned nr_free;
  926. nr_free = from_cblock(mq->cache_size) - mq->cache_alloc.nr_allocated;
  927. return (nr_free + btracker_nr_demotions_queued(mq->bg_work)) >=
  928. percent_to_target(mq, FREE_TARGET);
  929. }
  930. /*----------------------------------------------------------------*/
  931. static void mark_pending(struct smq_policy *mq, struct entry *e)
  932. {
  933. BUG_ON(e->sentinel);
  934. BUG_ON(!e->allocated);
  935. BUG_ON(e->pending_work);
  936. e->pending_work = true;
  937. }
  938. static void clear_pending(struct smq_policy *mq, struct entry *e)
  939. {
  940. BUG_ON(!e->pending_work);
  941. e->pending_work = false;
  942. }
  943. static void queue_writeback(struct smq_policy *mq, bool idle)
  944. {
  945. int r;
  946. struct policy_work work;
  947. struct entry *e;
  948. e = q_peek(&mq->dirty, mq->dirty.nr_levels, idle);
  949. if (e) {
  950. mark_pending(mq, e);
  951. q_del(&mq->dirty, e);
  952. work.op = POLICY_WRITEBACK;
  953. work.oblock = e->oblock;
  954. work.cblock = infer_cblock(mq, e);
  955. r = btracker_queue(mq->bg_work, &work, NULL);
  956. if (r) {
  957. clear_pending(mq, e);
  958. q_push_front(&mq->dirty, e);
  959. }
  960. }
  961. }
  962. static void queue_demotion(struct smq_policy *mq)
  963. {
  964. int r;
  965. struct policy_work work;
  966. struct entry *e;
  967. if (WARN_ON_ONCE(!mq->migrations_allowed))
  968. return;
  969. e = q_peek(&mq->clean, mq->clean.nr_levels / 2, true);
  970. if (!e) {
  971. if (!clean_target_met(mq, true))
  972. queue_writeback(mq, false);
  973. return;
  974. }
  975. mark_pending(mq, e);
  976. q_del(&mq->clean, e);
  977. work.op = POLICY_DEMOTE;
  978. work.oblock = e->oblock;
  979. work.cblock = infer_cblock(mq, e);
  980. r = btracker_queue(mq->bg_work, &work, NULL);
  981. if (r) {
  982. clear_pending(mq, e);
  983. q_push_front(&mq->clean, e);
  984. }
  985. }
  986. static void queue_promotion(struct smq_policy *mq, dm_oblock_t oblock,
  987. struct policy_work **workp)
  988. {
  989. int r;
  990. struct entry *e;
  991. struct policy_work work;
  992. if (!mq->migrations_allowed)
  993. return;
  994. if (allocator_empty(&mq->cache_alloc)) {
  995. /*
  996. * We always claim to be 'idle' to ensure some demotions happen
  997. * with continuous loads.
  998. */
  999. if (!free_target_met(mq))
  1000. queue_demotion(mq);
  1001. return;
  1002. }
  1003. if (btracker_promotion_already_present(mq->bg_work, oblock))
  1004. return;
  1005. /*
  1006. * We allocate the entry now to reserve the cblock. If the
  1007. * background work is aborted we must remember to free it.
  1008. */
  1009. e = alloc_entry(&mq->cache_alloc);
  1010. BUG_ON(!e);
  1011. e->pending_work = true;
  1012. work.op = POLICY_PROMOTE;
  1013. work.oblock = oblock;
  1014. work.cblock = infer_cblock(mq, e);
  1015. r = btracker_queue(mq->bg_work, &work, workp);
  1016. if (r)
  1017. free_entry(&mq->cache_alloc, e);
  1018. }
  1019. /*----------------------------------------------------------------*/
  1020. enum promote_result {
  1021. PROMOTE_NOT,
  1022. PROMOTE_TEMPORARY,
  1023. PROMOTE_PERMANENT
  1024. };
  1025. /*
  1026. * Converts a boolean into a promote result.
  1027. */
  1028. static enum promote_result maybe_promote(bool promote)
  1029. {
  1030. return promote ? PROMOTE_PERMANENT : PROMOTE_NOT;
  1031. }
  1032. static enum promote_result should_promote(struct smq_policy *mq, struct entry *hs_e,
  1033. int data_dir, bool fast_promote)
  1034. {
  1035. if (data_dir == WRITE) {
  1036. if (!allocator_empty(&mq->cache_alloc) && fast_promote)
  1037. return PROMOTE_TEMPORARY;
  1038. return maybe_promote(hs_e->level >= mq->write_promote_level);
  1039. } else
  1040. return maybe_promote(hs_e->level >= mq->read_promote_level);
  1041. }
  1042. static dm_oblock_t to_hblock(struct smq_policy *mq, dm_oblock_t b)
  1043. {
  1044. sector_t r = from_oblock(b);
  1045. (void) sector_div(r, mq->cache_blocks_per_hotspot_block);
  1046. return to_oblock(r);
  1047. }
  1048. static struct entry *update_hotspot_queue(struct smq_policy *mq, dm_oblock_t b)
  1049. {
  1050. unsigned hi;
  1051. dm_oblock_t hb = to_hblock(mq, b);
  1052. struct entry *e = h_lookup(&mq->hotspot_table, hb);
  1053. if (e) {
  1054. stats_level_accessed(&mq->hotspot_stats, e->level);
  1055. hi = get_index(&mq->hotspot_alloc, e);
  1056. q_requeue(&mq->hotspot, e,
  1057. test_and_set_bit(hi, mq->hotspot_hit_bits) ?
  1058. 0u : mq->hotspot_level_jump,
  1059. NULL, NULL);
  1060. } else {
  1061. stats_miss(&mq->hotspot_stats);
  1062. e = alloc_entry(&mq->hotspot_alloc);
  1063. if (!e) {
  1064. e = q_pop(&mq->hotspot);
  1065. if (e) {
  1066. h_remove(&mq->hotspot_table, e);
  1067. hi = get_index(&mq->hotspot_alloc, e);
  1068. clear_bit(hi, mq->hotspot_hit_bits);
  1069. }
  1070. }
  1071. if (e) {
  1072. e->oblock = hb;
  1073. q_push(&mq->hotspot, e);
  1074. h_insert(&mq->hotspot_table, e);
  1075. }
  1076. }
  1077. return e;
  1078. }
  1079. /*----------------------------------------------------------------*/
  1080. /*
  1081. * Public interface, via the policy struct. See dm-cache-policy.h for a
  1082. * description of these.
  1083. */
  1084. static struct smq_policy *to_smq_policy(struct dm_cache_policy *p)
  1085. {
  1086. return container_of(p, struct smq_policy, policy);
  1087. }
  1088. static void smq_destroy(struct dm_cache_policy *p)
  1089. {
  1090. struct smq_policy *mq = to_smq_policy(p);
  1091. btracker_destroy(mq->bg_work);
  1092. h_exit(&mq->hotspot_table);
  1093. h_exit(&mq->table);
  1094. free_bitset(mq->hotspot_hit_bits);
  1095. free_bitset(mq->cache_hit_bits);
  1096. space_exit(&mq->es);
  1097. kfree(mq);
  1098. }
  1099. /*----------------------------------------------------------------*/
  1100. static int __lookup(struct smq_policy *mq, dm_oblock_t oblock, dm_cblock_t *cblock,
  1101. int data_dir, bool fast_copy,
  1102. struct policy_work **work, bool *background_work)
  1103. {
  1104. struct entry *e, *hs_e;
  1105. enum promote_result pr;
  1106. *background_work = false;
  1107. e = h_lookup(&mq->table, oblock);
  1108. if (e) {
  1109. stats_level_accessed(&mq->cache_stats, e->level);
  1110. requeue(mq, e);
  1111. *cblock = infer_cblock(mq, e);
  1112. return 0;
  1113. } else {
  1114. stats_miss(&mq->cache_stats);
  1115. /*
  1116. * The hotspot queue only gets updated with misses.
  1117. */
  1118. hs_e = update_hotspot_queue(mq, oblock);
  1119. pr = should_promote(mq, hs_e, data_dir, fast_copy);
  1120. if (pr != PROMOTE_NOT) {
  1121. queue_promotion(mq, oblock, work);
  1122. *background_work = true;
  1123. }
  1124. return -ENOENT;
  1125. }
  1126. }
  1127. static int smq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock,
  1128. int data_dir, bool fast_copy,
  1129. bool *background_work)
  1130. {
  1131. int r;
  1132. unsigned long flags;
  1133. struct smq_policy *mq = to_smq_policy(p);
  1134. spin_lock_irqsave(&mq->lock, flags);
  1135. r = __lookup(mq, oblock, cblock,
  1136. data_dir, fast_copy,
  1137. NULL, background_work);
  1138. spin_unlock_irqrestore(&mq->lock, flags);
  1139. return r;
  1140. }
  1141. static int smq_lookup_with_work(struct dm_cache_policy *p,
  1142. dm_oblock_t oblock, dm_cblock_t *cblock,
  1143. int data_dir, bool fast_copy,
  1144. struct policy_work **work)
  1145. {
  1146. int r;
  1147. bool background_queued;
  1148. unsigned long flags;
  1149. struct smq_policy *mq = to_smq_policy(p);
  1150. spin_lock_irqsave(&mq->lock, flags);
  1151. r = __lookup(mq, oblock, cblock, data_dir, fast_copy, work, &background_queued);
  1152. spin_unlock_irqrestore(&mq->lock, flags);
  1153. return r;
  1154. }
  1155. static int smq_get_background_work(struct dm_cache_policy *p, bool idle,
  1156. struct policy_work **result)
  1157. {
  1158. int r;
  1159. unsigned long flags;
  1160. struct smq_policy *mq = to_smq_policy(p);
  1161. spin_lock_irqsave(&mq->lock, flags);
  1162. r = btracker_issue(mq->bg_work, result);
  1163. if (r == -ENODATA) {
  1164. if (!clean_target_met(mq, idle)) {
  1165. queue_writeback(mq, idle);
  1166. r = btracker_issue(mq->bg_work, result);
  1167. }
  1168. }
  1169. spin_unlock_irqrestore(&mq->lock, flags);
  1170. return r;
  1171. }
  1172. /*
  1173. * We need to clear any pending work flags that have been set, and in the
  1174. * case of promotion free the entry for the destination cblock.
  1175. */
  1176. static void __complete_background_work(struct smq_policy *mq,
  1177. struct policy_work *work,
  1178. bool success)
  1179. {
  1180. struct entry *e = get_entry(&mq->cache_alloc,
  1181. from_cblock(work->cblock));
  1182. switch (work->op) {
  1183. case POLICY_PROMOTE:
  1184. // !h, !q, a
  1185. clear_pending(mq, e);
  1186. if (success) {
  1187. e->oblock = work->oblock;
  1188. e->level = NR_CACHE_LEVELS - 1;
  1189. push(mq, e);
  1190. // h, q, a
  1191. } else {
  1192. free_entry(&mq->cache_alloc, e);
  1193. // !h, !q, !a
  1194. }
  1195. break;
  1196. case POLICY_DEMOTE:
  1197. // h, !q, a
  1198. if (success) {
  1199. h_remove(&mq->table, e);
  1200. free_entry(&mq->cache_alloc, e);
  1201. // !h, !q, !a
  1202. } else {
  1203. clear_pending(mq, e);
  1204. push_queue(mq, e);
  1205. // h, q, a
  1206. }
  1207. break;
  1208. case POLICY_WRITEBACK:
  1209. // h, !q, a
  1210. clear_pending(mq, e);
  1211. push_queue(mq, e);
  1212. // h, q, a
  1213. break;
  1214. }
  1215. btracker_complete(mq->bg_work, work);
  1216. }
  1217. static void smq_complete_background_work(struct dm_cache_policy *p,
  1218. struct policy_work *work,
  1219. bool success)
  1220. {
  1221. unsigned long flags;
  1222. struct smq_policy *mq = to_smq_policy(p);
  1223. spin_lock_irqsave(&mq->lock, flags);
  1224. __complete_background_work(mq, work, success);
  1225. spin_unlock_irqrestore(&mq->lock, flags);
  1226. }
  1227. // in_hash(oblock) -> in_hash(oblock)
  1228. static void __smq_set_clear_dirty(struct smq_policy *mq, dm_cblock_t cblock, bool set)
  1229. {
  1230. struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock));
  1231. if (e->pending_work)
  1232. e->dirty = set;
  1233. else {
  1234. del_queue(mq, e);
  1235. e->dirty = set;
  1236. push_queue(mq, e);
  1237. }
  1238. }
  1239. static void smq_set_dirty(struct dm_cache_policy *p, dm_cblock_t cblock)
  1240. {
  1241. unsigned long flags;
  1242. struct smq_policy *mq = to_smq_policy(p);
  1243. spin_lock_irqsave(&mq->lock, flags);
  1244. __smq_set_clear_dirty(mq, cblock, true);
  1245. spin_unlock_irqrestore(&mq->lock, flags);
  1246. }
  1247. static void smq_clear_dirty(struct dm_cache_policy *p, dm_cblock_t cblock)
  1248. {
  1249. struct smq_policy *mq = to_smq_policy(p);
  1250. unsigned long flags;
  1251. spin_lock_irqsave(&mq->lock, flags);
  1252. __smq_set_clear_dirty(mq, cblock, false);
  1253. spin_unlock_irqrestore(&mq->lock, flags);
  1254. }
  1255. static unsigned random_level(dm_cblock_t cblock)
  1256. {
  1257. return hash_32(from_cblock(cblock), 9) & (NR_CACHE_LEVELS - 1);
  1258. }
  1259. static int smq_load_mapping(struct dm_cache_policy *p,
  1260. dm_oblock_t oblock, dm_cblock_t cblock,
  1261. bool dirty, uint32_t hint, bool hint_valid)
  1262. {
  1263. struct smq_policy *mq = to_smq_policy(p);
  1264. struct entry *e;
  1265. e = alloc_particular_entry(&mq->cache_alloc, from_cblock(cblock));
  1266. e->oblock = oblock;
  1267. e->dirty = dirty;
  1268. e->level = hint_valid ? min(hint, NR_CACHE_LEVELS - 1) : random_level(cblock);
  1269. e->pending_work = false;
  1270. /*
  1271. * When we load mappings we push ahead of both sentinels in order to
  1272. * allow demotions and cleaning to occur immediately.
  1273. */
  1274. push_front(mq, e);
  1275. return 0;
  1276. }
  1277. static int smq_invalidate_mapping(struct dm_cache_policy *p, dm_cblock_t cblock)
  1278. {
  1279. struct smq_policy *mq = to_smq_policy(p);
  1280. struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock));
  1281. if (!e->allocated)
  1282. return -ENODATA;
  1283. // FIXME: what if this block has pending background work?
  1284. del_queue(mq, e);
  1285. h_remove(&mq->table, e);
  1286. free_entry(&mq->cache_alloc, e);
  1287. return 0;
  1288. }
  1289. static uint32_t smq_get_hint(struct dm_cache_policy *p, dm_cblock_t cblock)
  1290. {
  1291. struct smq_policy *mq = to_smq_policy(p);
  1292. struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock));
  1293. if (!e->allocated)
  1294. return 0;
  1295. return e->level;
  1296. }
  1297. static dm_cblock_t smq_residency(struct dm_cache_policy *p)
  1298. {
  1299. dm_cblock_t r;
  1300. unsigned long flags;
  1301. struct smq_policy *mq = to_smq_policy(p);
  1302. spin_lock_irqsave(&mq->lock, flags);
  1303. r = to_cblock(mq->cache_alloc.nr_allocated);
  1304. spin_unlock_irqrestore(&mq->lock, flags);
  1305. return r;
  1306. }
  1307. static void smq_tick(struct dm_cache_policy *p, bool can_block)
  1308. {
  1309. struct smq_policy *mq = to_smq_policy(p);
  1310. unsigned long flags;
  1311. spin_lock_irqsave(&mq->lock, flags);
  1312. mq->tick++;
  1313. update_sentinels(mq);
  1314. end_hotspot_period(mq);
  1315. end_cache_period(mq);
  1316. spin_unlock_irqrestore(&mq->lock, flags);
  1317. }
  1318. static void smq_allow_migrations(struct dm_cache_policy *p, bool allow)
  1319. {
  1320. struct smq_policy *mq = to_smq_policy(p);
  1321. mq->migrations_allowed = allow;
  1322. }
  1323. /*
  1324. * smq has no config values, but the old mq policy did. To avoid breaking
  1325. * software we continue to accept these configurables for the mq policy,
  1326. * but they have no effect.
  1327. */
  1328. static int mq_set_config_value(struct dm_cache_policy *p,
  1329. const char *key, const char *value)
  1330. {
  1331. unsigned long tmp;
  1332. if (kstrtoul(value, 10, &tmp))
  1333. return -EINVAL;
  1334. if (!strcasecmp(key, "random_threshold") ||
  1335. !strcasecmp(key, "sequential_threshold") ||
  1336. !strcasecmp(key, "discard_promote_adjustment") ||
  1337. !strcasecmp(key, "read_promote_adjustment") ||
  1338. !strcasecmp(key, "write_promote_adjustment")) {
  1339. DMWARN("tunable '%s' no longer has any effect, mq policy is now an alias for smq", key);
  1340. return 0;
  1341. }
  1342. return -EINVAL;
  1343. }
  1344. static int mq_emit_config_values(struct dm_cache_policy *p, char *result,
  1345. unsigned maxlen, ssize_t *sz_ptr)
  1346. {
  1347. ssize_t sz = *sz_ptr;
  1348. DMEMIT("10 random_threshold 0 "
  1349. "sequential_threshold 0 "
  1350. "discard_promote_adjustment 0 "
  1351. "read_promote_adjustment 0 "
  1352. "write_promote_adjustment 0 ");
  1353. *sz_ptr = sz;
  1354. return 0;
  1355. }
  1356. /* Init the policy plugin interface function pointers. */
  1357. static void init_policy_functions(struct smq_policy *mq, bool mimic_mq)
  1358. {
  1359. mq->policy.destroy = smq_destroy;
  1360. mq->policy.lookup = smq_lookup;
  1361. mq->policy.lookup_with_work = smq_lookup_with_work;
  1362. mq->policy.get_background_work = smq_get_background_work;
  1363. mq->policy.complete_background_work = smq_complete_background_work;
  1364. mq->policy.set_dirty = smq_set_dirty;
  1365. mq->policy.clear_dirty = smq_clear_dirty;
  1366. mq->policy.load_mapping = smq_load_mapping;
  1367. mq->policy.invalidate_mapping = smq_invalidate_mapping;
  1368. mq->policy.get_hint = smq_get_hint;
  1369. mq->policy.residency = smq_residency;
  1370. mq->policy.tick = smq_tick;
  1371. mq->policy.allow_migrations = smq_allow_migrations;
  1372. if (mimic_mq) {
  1373. mq->policy.set_config_value = mq_set_config_value;
  1374. mq->policy.emit_config_values = mq_emit_config_values;
  1375. }
  1376. }
  1377. static bool too_many_hotspot_blocks(sector_t origin_size,
  1378. sector_t hotspot_block_size,
  1379. unsigned nr_hotspot_blocks)
  1380. {
  1381. return (hotspot_block_size * nr_hotspot_blocks) > origin_size;
  1382. }
  1383. static void calc_hotspot_params(sector_t origin_size,
  1384. sector_t cache_block_size,
  1385. unsigned nr_cache_blocks,
  1386. sector_t *hotspot_block_size,
  1387. unsigned *nr_hotspot_blocks)
  1388. {
  1389. *hotspot_block_size = cache_block_size * 16u;
  1390. *nr_hotspot_blocks = max(nr_cache_blocks / 4u, 1024u);
  1391. while ((*hotspot_block_size > cache_block_size) &&
  1392. too_many_hotspot_blocks(origin_size, *hotspot_block_size, *nr_hotspot_blocks))
  1393. *hotspot_block_size /= 2u;
  1394. }
  1395. static struct dm_cache_policy *__smq_create(dm_cblock_t cache_size,
  1396. sector_t origin_size,
  1397. sector_t cache_block_size,
  1398. bool mimic_mq,
  1399. bool migrations_allowed)
  1400. {
  1401. unsigned i;
  1402. unsigned nr_sentinels_per_queue = 2u * NR_CACHE_LEVELS;
  1403. unsigned total_sentinels = 2u * nr_sentinels_per_queue;
  1404. struct smq_policy *mq = kzalloc(sizeof(*mq), GFP_KERNEL);
  1405. if (!mq)
  1406. return NULL;
  1407. init_policy_functions(mq, mimic_mq);
  1408. mq->cache_size = cache_size;
  1409. mq->cache_block_size = cache_block_size;
  1410. calc_hotspot_params(origin_size, cache_block_size, from_cblock(cache_size),
  1411. &mq->hotspot_block_size, &mq->nr_hotspot_blocks);
  1412. mq->cache_blocks_per_hotspot_block = div64_u64(mq->hotspot_block_size, mq->cache_block_size);
  1413. mq->hotspot_level_jump = 1u;
  1414. if (space_init(&mq->es, total_sentinels + mq->nr_hotspot_blocks + from_cblock(cache_size))) {
  1415. DMERR("couldn't initialize entry space");
  1416. goto bad_pool_init;
  1417. }
  1418. init_allocator(&mq->writeback_sentinel_alloc, &mq->es, 0, nr_sentinels_per_queue);
  1419. for (i = 0; i < nr_sentinels_per_queue; i++)
  1420. get_entry(&mq->writeback_sentinel_alloc, i)->sentinel = true;
  1421. init_allocator(&mq->demote_sentinel_alloc, &mq->es, nr_sentinels_per_queue, total_sentinels);
  1422. for (i = 0; i < nr_sentinels_per_queue; i++)
  1423. get_entry(&mq->demote_sentinel_alloc, i)->sentinel = true;
  1424. init_allocator(&mq->hotspot_alloc, &mq->es, total_sentinels,
  1425. total_sentinels + mq->nr_hotspot_blocks);
  1426. init_allocator(&mq->cache_alloc, &mq->es,
  1427. total_sentinels + mq->nr_hotspot_blocks,
  1428. total_sentinels + mq->nr_hotspot_blocks + from_cblock(cache_size));
  1429. mq->hotspot_hit_bits = alloc_bitset(mq->nr_hotspot_blocks);
  1430. if (!mq->hotspot_hit_bits) {
  1431. DMERR("couldn't allocate hotspot hit bitset");
  1432. goto bad_hotspot_hit_bits;
  1433. }
  1434. clear_bitset(mq->hotspot_hit_bits, mq->nr_hotspot_blocks);
  1435. if (from_cblock(cache_size)) {
  1436. mq->cache_hit_bits = alloc_bitset(from_cblock(cache_size));
  1437. if (!mq->cache_hit_bits) {
  1438. DMERR("couldn't allocate cache hit bitset");
  1439. goto bad_cache_hit_bits;
  1440. }
  1441. clear_bitset(mq->cache_hit_bits, from_cblock(mq->cache_size));
  1442. } else
  1443. mq->cache_hit_bits = NULL;
  1444. mq->tick = 0;
  1445. spin_lock_init(&mq->lock);
  1446. q_init(&mq->hotspot, &mq->es, NR_HOTSPOT_LEVELS);
  1447. mq->hotspot.nr_top_levels = 8;
  1448. mq->hotspot.nr_in_top_levels = min(mq->nr_hotspot_blocks / NR_HOTSPOT_LEVELS,
  1449. from_cblock(mq->cache_size) / mq->cache_blocks_per_hotspot_block);
  1450. q_init(&mq->clean, &mq->es, NR_CACHE_LEVELS);
  1451. q_init(&mq->dirty, &mq->es, NR_CACHE_LEVELS);
  1452. stats_init(&mq->hotspot_stats, NR_HOTSPOT_LEVELS);
  1453. stats_init(&mq->cache_stats, NR_CACHE_LEVELS);
  1454. if (h_init(&mq->table, &mq->es, from_cblock(cache_size)))
  1455. goto bad_alloc_table;
  1456. if (h_init(&mq->hotspot_table, &mq->es, mq->nr_hotspot_blocks))
  1457. goto bad_alloc_hotspot_table;
  1458. sentinels_init(mq);
  1459. mq->write_promote_level = mq->read_promote_level = NR_HOTSPOT_LEVELS;
  1460. mq->next_hotspot_period = jiffies;
  1461. mq->next_cache_period = jiffies;
  1462. mq->bg_work = btracker_create(4096); /* FIXME: hard coded value */
  1463. if (!mq->bg_work)
  1464. goto bad_btracker;
  1465. mq->migrations_allowed = migrations_allowed;
  1466. return &mq->policy;
  1467. bad_btracker:
  1468. h_exit(&mq->hotspot_table);
  1469. bad_alloc_hotspot_table:
  1470. h_exit(&mq->table);
  1471. bad_alloc_table:
  1472. free_bitset(mq->cache_hit_bits);
  1473. bad_cache_hit_bits:
  1474. free_bitset(mq->hotspot_hit_bits);
  1475. bad_hotspot_hit_bits:
  1476. space_exit(&mq->es);
  1477. bad_pool_init:
  1478. kfree(mq);
  1479. return NULL;
  1480. }
  1481. static struct dm_cache_policy *smq_create(dm_cblock_t cache_size,
  1482. sector_t origin_size,
  1483. sector_t cache_block_size)
  1484. {
  1485. return __smq_create(cache_size, origin_size, cache_block_size, false, true);
  1486. }
  1487. static struct dm_cache_policy *mq_create(dm_cblock_t cache_size,
  1488. sector_t origin_size,
  1489. sector_t cache_block_size)
  1490. {
  1491. return __smq_create(cache_size, origin_size, cache_block_size, true, true);
  1492. }
  1493. static struct dm_cache_policy *cleaner_create(dm_cblock_t cache_size,
  1494. sector_t origin_size,
  1495. sector_t cache_block_size)
  1496. {
  1497. return __smq_create(cache_size, origin_size, cache_block_size, false, false);
  1498. }
  1499. /*----------------------------------------------------------------*/
  1500. static struct dm_cache_policy_type smq_policy_type = {
  1501. .name = "smq",
  1502. .version = {2, 0, 0},
  1503. .hint_size = 4,
  1504. .owner = THIS_MODULE,
  1505. .create = smq_create
  1506. };
  1507. static struct dm_cache_policy_type mq_policy_type = {
  1508. .name = "mq",
  1509. .version = {2, 0, 0},
  1510. .hint_size = 4,
  1511. .owner = THIS_MODULE,
  1512. .create = mq_create,
  1513. };
  1514. static struct dm_cache_policy_type cleaner_policy_type = {
  1515. .name = "cleaner",
  1516. .version = {2, 0, 0},
  1517. .hint_size = 4,
  1518. .owner = THIS_MODULE,
  1519. .create = cleaner_create,
  1520. };
  1521. static struct dm_cache_policy_type default_policy_type = {
  1522. .name = "default",
  1523. .version = {2, 0, 0},
  1524. .hint_size = 4,
  1525. .owner = THIS_MODULE,
  1526. .create = smq_create,
  1527. .real = &smq_policy_type
  1528. };
  1529. static int __init smq_init(void)
  1530. {
  1531. int r;
  1532. r = dm_cache_policy_register(&smq_policy_type);
  1533. if (r) {
  1534. DMERR("register failed %d", r);
  1535. return -ENOMEM;
  1536. }
  1537. r = dm_cache_policy_register(&mq_policy_type);
  1538. if (r) {
  1539. DMERR("register failed (as mq) %d", r);
  1540. goto out_mq;
  1541. }
  1542. r = dm_cache_policy_register(&cleaner_policy_type);
  1543. if (r) {
  1544. DMERR("register failed (as cleaner) %d", r);
  1545. goto out_cleaner;
  1546. }
  1547. r = dm_cache_policy_register(&default_policy_type);
  1548. if (r) {
  1549. DMERR("register failed (as default) %d", r);
  1550. goto out_default;
  1551. }
  1552. return 0;
  1553. out_default:
  1554. dm_cache_policy_unregister(&cleaner_policy_type);
  1555. out_cleaner:
  1556. dm_cache_policy_unregister(&mq_policy_type);
  1557. out_mq:
  1558. dm_cache_policy_unregister(&smq_policy_type);
  1559. return -ENOMEM;
  1560. }
  1561. static void __exit smq_exit(void)
  1562. {
  1563. dm_cache_policy_unregister(&cleaner_policy_type);
  1564. dm_cache_policy_unregister(&smq_policy_type);
  1565. dm_cache_policy_unregister(&mq_policy_type);
  1566. dm_cache_policy_unregister(&default_policy_type);
  1567. }
  1568. module_init(smq_init);
  1569. module_exit(smq_exit);
  1570. MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
  1571. MODULE_LICENSE("GPL");
  1572. MODULE_DESCRIPTION("smq cache policy");
  1573. MODULE_ALIAS("dm-cache-default");
  1574. MODULE_ALIAS("dm-cache-mq");
  1575. MODULE_ALIAS("dm-cache-cleaner");