sref.c 26 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054
  1. /*
  2. * Copyright (c) 2014-2019 Richard Braun.
  3. *
  4. * This program is free software: you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation, either version 3 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. *
  17. *
  18. * This implementation is based on the paper "RadixVM: Scalable address
  19. * spaces for multithreaded applications" by Austin T. Clements,
  20. * M. Frans Kaashoek, and Nickolai Zeldovich. Specifically, it implements
  21. * the Refcache component described in the paper, with a few differences
  22. * outlined below.
  23. *
  24. * Refcache flushes delta caches directly from an interrupt handler, and
  25. * disables interrupts and preemption on cache access. That behavior is
  26. * realtime-unfriendly because of the potentially large number of deltas
  27. * in a cache. This module uses dedicated manager threads to perform
  28. * cache flushes and queue reviews, and only disables preemption on
  29. * individual delta access.
  30. *
  31. * Locking protocol : cache -> counter -> global data
  32. */
  33. #include <assert.h>
  34. #include <errno.h>
  35. #include <stdbool.h>
  36. #include <stddef.h>
  37. #include <stdint.h>
  38. #include <stdio.h>
  39. #include <kern/atomic.h>
  40. #include <kern/clock.h>
  41. #include <kern/cpumap.h>
  42. #include <kern/init.h>
  43. #include <kern/list.h>
  44. #include <kern/log.h>
  45. #include <kern/macros.h>
  46. #include <kern/panic.h>
  47. #include <kern/percpu.h>
  48. #include <kern/slist.h>
  49. #include <kern/spinlock.h>
  50. #include <kern/sref.h>
  51. #include <kern/sref_i.h>
  52. #include <kern/syscnt.h>
  53. #include <kern/thread.h>
  54. #include <machine/cpu.h>
  55. /*
  56. * Per-cache delta table size.
  57. */
  58. #define SREF_CACHE_DELTA_TABLE_SIZE 4096
  59. #if !ISP2(SREF_CACHE_DELTA_TABLE_SIZE)
  60. #error "delta table size must be a power-of-two"
  61. #endif
  62. #ifdef __LP64__
  63. #define SREF_HASH_SHIFT 3
  64. #else
  65. #define SREF_HASH_SHIFT 2
  66. #endif
  67. /*
  68. * Negative close to 0 so that an overflow occurs early.
  69. */
  70. #define SREF_EPOCH_ID_INIT_VALUE ((unsigned int)-500)
  71. /*
  72. * Since review queues are processor-local, at least two local epochs
  73. * must have passed before a zero is considered a true zero. As a result,
  74. * three queues are required, one for the current epoch, and two more.
  75. * The queues are stored in an array used as a ring buffer that moves
  76. * forward with each new local epoch. Indexing in this array is done
  77. * with a binary mask instead of a modulo, for performance reasons, and
  78. * consequently, the array size must be at least the nearest power-of-two
  79. * above three.
  80. */
  81. #define SREF_NR_QUEUES P2ROUND(3, 2)
  82. /*
  83. * Number of counters in review queue beyond which to issue a warning.
  84. */
  85. #define SREF_NR_COUNTERS_WARN 10000
  86. /*
  87. * Global data.
  88. *
  89. * Processors regularly check the global epoch ID against their own,
  90. * locally cached epoch ID. If they're the same, a processor flushes
  91. * its cached deltas, acknowledges its flush by decrementing the number
  92. * of pending acknowledgment counter, and increments its local epoch ID,
  93. * preventing additional flushes during the same epoch.
  94. *
  95. * The last processor to acknowledge starts the next epoch.
  96. *
  97. * The epoch ID and the pending acknowledgments counter fill an entire
  98. * cache line each in order to avoid false sharing on SMP. Whenever
  99. * multiple processors may access them, they must use atomic operations
  100. * to avoid data races.
  101. *
  102. * Atomic operations on the pending acknowledgments counter are done
  103. * with acquire-release ordering to enforce the memory ordering
  104. * guarantees required by both the implementation and the interface.
  105. */
  106. struct sref_data {
  107. struct {
  108. alignas(CPU_L1_SIZE) unsigned int epoch_id;
  109. };
  110. struct {
  111. alignas(CPU_L1_SIZE) unsigned int nr_pending_acks;
  112. };
  113. uint64_t start_ts;
  114. struct syscnt sc_epochs;
  115. struct syscnt sc_dirty_zeroes;
  116. struct syscnt sc_true_zeroes;
  117. struct syscnt sc_revives;
  118. struct syscnt sc_last_epoch_ms;
  119. struct syscnt sc_longest_epoch_ms;
  120. };
  121. /*
  122. * Temporary difference to apply on a reference counter.
  123. *
  124. * Deltas are stored in per-processor caches and added to their global
  125. * counter when flushed. A delta is valid if and only if the counter it
  126. * points to isn't NULL.
  127. *
  128. * On cache flush, if a delta is valid, it must be flushed whatever its
  129. * value because a delta can be a dirty zero too. By flushing all valid
  130. * deltas, and clearing them all after a flush, activity on a counter is
  131. * reliably reported.
  132. */
  133. struct sref_delta {
  134. struct list node;
  135. struct sref_counter *counter;
  136. unsigned long value;
  137. };
  138. struct sref_queue {
  139. struct slist counters;
  140. unsigned long size;
  141. };
  142. /*
  143. * Per-processor cache of deltas.
  144. *
  145. * A cache is dirty if there is at least one delta that requires flushing.
  146. * It may only be flushed once per epoch.
  147. *
  148. * Delta caches are implemented with hash tables for quick ref count to
  149. * delta lookups. For now, a very simple replacement policy, similar to
  150. * that described in the RadixVM paper, is used. Improve with an LRU-like
  151. * algorithm if this turns out to be a problem.
  152. *
  153. * Periodic events (normally the system timer tick) trigger cache checks.
  154. * A cache check may wake up the manager thread if the cache needs management,
  155. * i.e. if it's dirty or if there are counters to review. Otherwise, the
  156. * flush acknowledgment is done directly to avoid the cost of a thread
  157. * wake-up.
  158. *
  159. * Interrupts and preemption must be disabled when accessing a delta cache.
  160. */
  161. struct sref_cache {
  162. struct sref_data *data;
  163. bool dirty;
  164. bool flushed;
  165. unsigned int epoch_id;
  166. struct sref_delta deltas[SREF_CACHE_DELTA_TABLE_SIZE];
  167. struct list valid_deltas;
  168. struct sref_queue queues[SREF_NR_QUEUES];
  169. struct thread *manager;
  170. struct syscnt sc_collisions;
  171. struct syscnt sc_flushes;
  172. };
  173. static struct sref_data sref_data;
  174. static struct sref_cache sref_cache __percpu;
  175. static unsigned int
  176. sref_data_get_epoch_id(const struct sref_data *data)
  177. {
  178. return data->epoch_id;
  179. }
  180. static bool
  181. sref_data_check_epoch_id(const struct sref_data *data, unsigned int epoch_id)
  182. {
  183. unsigned int global_epoch_id;
  184. global_epoch_id = atomic_load(&data->epoch_id, ATOMIC_RELAXED);
  185. if (unlikely(global_epoch_id == epoch_id)) {
  186. atomic_fence(ATOMIC_ACQUIRE);
  187. return true;
  188. }
  189. return false;
  190. }
  191. static void
  192. sref_data_start_epoch(struct sref_data *data)
  193. {
  194. uint64_t now, duration;
  195. unsigned int epoch_id;
  196. now = clock_get_time();
  197. duration = clock_ticks_to_ms(now - data->start_ts);
  198. syscnt_set(&data->sc_last_epoch_ms, duration);
  199. if (duration > syscnt_read(&data->sc_longest_epoch_ms)) {
  200. syscnt_set(&data->sc_longest_epoch_ms, duration);
  201. }
  202. assert(data->nr_pending_acks == 0);
  203. data->nr_pending_acks = cpu_count();
  204. data->start_ts = now;
  205. epoch_id = atomic_load(&data->epoch_id, ATOMIC_RELAXED);
  206. atomic_store(&data->epoch_id, epoch_id + 1, ATOMIC_RELEASE);
  207. }
  208. static void
  209. sref_data_ack_cpu(struct sref_data *data)
  210. {
  211. unsigned int prev;
  212. prev = atomic_fetch_sub(&data->nr_pending_acks, 1, ATOMIC_ACQ_REL);
  213. if (prev != 1) {
  214. assert(prev != 0);
  215. return;
  216. }
  217. syscnt_inc(&data->sc_epochs);
  218. sref_data_start_epoch(data);
  219. }
  220. static void
  221. sref_data_update_stats(struct sref_data *data, int64_t nr_dirty_zeroes,
  222. int64_t nr_true_zeroes, int64_t nr_revives)
  223. {
  224. syscnt_add(&data->sc_dirty_zeroes, nr_dirty_zeroes);
  225. syscnt_add(&data->sc_true_zeroes, nr_true_zeroes);
  226. syscnt_add(&data->sc_revives, nr_revives);
  227. }
  228. static bool
  229. sref_counter_aligned(const struct sref_counter *counter)
  230. {
  231. return ((uintptr_t)counter & (~SREF_WEAKREF_MASK)) == 0;
  232. }
  233. static void
  234. sref_weakref_init(struct sref_weakref *weakref, struct sref_counter *counter)
  235. {
  236. assert(sref_counter_aligned(counter));
  237. weakref->addr = (uintptr_t)counter;
  238. }
  239. static void
  240. sref_weakref_mark_dying(struct sref_weakref *weakref)
  241. {
  242. atomic_or(&weakref->addr, SREF_WEAKREF_DYING, ATOMIC_RELAXED);
  243. }
  244. static void
  245. sref_weakref_clear_dying(struct sref_weakref *weakref)
  246. {
  247. atomic_and(&weakref->addr, SREF_WEAKREF_MASK, ATOMIC_RELAXED);
  248. }
  249. static int
  250. sref_weakref_kill(struct sref_weakref *weakref)
  251. {
  252. uintptr_t addr, oldval;
  253. addr = atomic_load(&weakref->addr, ATOMIC_RELAXED) | SREF_WEAKREF_DYING;
  254. oldval = atomic_cas(&weakref->addr, addr, (uintptr_t)NULL, ATOMIC_RELAXED);
  255. if (oldval != addr) {
  256. assert((oldval & SREF_WEAKREF_MASK) == (addr & SREF_WEAKREF_MASK));
  257. return EBUSY;
  258. }
  259. return 0;
  260. }
  261. static struct sref_counter *
  262. sref_weakref_tryget(struct sref_weakref *weakref)
  263. {
  264. uintptr_t addr, oldval, newval;
  265. do {
  266. addr = atomic_load(&weakref->addr, ATOMIC_RELAXED);
  267. newval = addr & SREF_WEAKREF_MASK;
  268. oldval = atomic_cas(&weakref->addr, addr, newval, ATOMIC_RELAXED);
  269. } while (oldval != addr);
  270. return (struct sref_counter *)newval;
  271. }
  272. static uintptr_t
  273. sref_counter_hash(const struct sref_counter *counter)
  274. {
  275. uintptr_t va;
  276. va = (uintptr_t)counter;
  277. assert(P2ALIGNED(va, 1UL << SREF_HASH_SHIFT));
  278. return va >> SREF_HASH_SHIFT;
  279. }
  280. static bool
  281. sref_counter_is_queued(const struct sref_counter *counter)
  282. {
  283. return counter->flags & SREF_CNTF_QUEUED;
  284. }
  285. static void
  286. sref_counter_mark_queued(struct sref_counter *counter)
  287. {
  288. counter->flags |= SREF_CNTF_QUEUED;
  289. }
  290. static void
  291. sref_counter_clear_queued(struct sref_counter *counter)
  292. {
  293. counter->flags &= ~SREF_CNTF_QUEUED;
  294. }
  295. static bool
  296. sref_counter_is_dirty(const struct sref_counter *counter)
  297. {
  298. return counter->flags & SREF_CNTF_DIRTY;
  299. }
  300. static void
  301. sref_counter_mark_dirty(struct sref_counter *counter)
  302. {
  303. counter->flags |= SREF_CNTF_DIRTY;
  304. }
  305. static void
  306. sref_counter_clear_dirty(struct sref_counter *counter)
  307. {
  308. counter->flags &= ~SREF_CNTF_DIRTY;
  309. }
  310. #ifdef SREF_VERIFY
  311. static bool
  312. sref_counter_is_unreferenced(const struct sref_counter *counter)
  313. {
  314. return counter->flags & SREF_CNTF_UNREF;
  315. }
  316. static void
  317. sref_counter_mark_unreferenced(struct sref_counter *counter)
  318. {
  319. counter->flags |= SREF_CNTF_UNREF;
  320. }
  321. #endif /* SREF_VERIFY */
  322. static void
  323. sref_counter_mark_dying(struct sref_counter *counter)
  324. {
  325. if (counter->weakref == NULL) {
  326. return;
  327. }
  328. sref_weakref_mark_dying(counter->weakref);
  329. }
  330. static void
  331. sref_counter_clear_dying(struct sref_counter *counter)
  332. {
  333. if (counter->weakref == NULL) {
  334. return;
  335. }
  336. sref_weakref_clear_dying(counter->weakref);
  337. }
  338. static int
  339. sref_counter_kill_weakref(struct sref_counter *counter)
  340. {
  341. if (counter->weakref == NULL) {
  342. return 0;
  343. }
  344. return sref_weakref_kill(counter->weakref);
  345. }
  346. static void __init
  347. sref_queue_init(struct sref_queue *queue)
  348. {
  349. slist_init(&queue->counters);
  350. queue->size = 0;
  351. }
  352. static bool
  353. sref_queue_empty(const struct sref_queue *queue)
  354. {
  355. return queue->size == 0;
  356. }
  357. static void
  358. sref_queue_push(struct sref_queue *queue, struct sref_counter *counter)
  359. {
  360. slist_insert_tail(&queue->counters, &counter->node);
  361. queue->size++;
  362. }
  363. static struct sref_counter *
  364. sref_queue_pop(struct sref_queue *queue)
  365. {
  366. struct sref_counter *counter;
  367. counter = slist_first_entry(&queue->counters, typeof(*counter), node);
  368. slist_remove(&queue->counters, NULL);
  369. queue->size--;
  370. return counter;
  371. }
  372. static void
  373. sref_queue_move(struct sref_queue *dest, const struct sref_queue *src)
  374. {
  375. slist_set_head(&dest->counters, &src->counters);
  376. dest->size = src->size;
  377. }
  378. static struct sref_queue *
  379. sref_cache_get_queue(struct sref_cache *cache, size_t index)
  380. {
  381. assert(index < ARRAY_SIZE(cache->queues));
  382. return &cache->queues[index];
  383. }
  384. static struct sref_queue *
  385. sref_cache_get_queue_by_epoch_id(struct sref_cache *cache,
  386. unsigned int epoch_id)
  387. {
  388. size_t mask;
  389. mask = ARRAY_SIZE(cache->queues) - 1;
  390. return sref_cache_get_queue(cache, epoch_id & mask);
  391. }
  392. static void
  393. sref_cache_schedule_review(struct sref_cache *cache,
  394. struct sref_counter *counter)
  395. {
  396. struct sref_queue *queue;
  397. assert(!sref_counter_is_queued(counter));
  398. assert(!sref_counter_is_dirty(counter));
  399. sref_counter_mark_queued(counter);
  400. sref_counter_mark_dying(counter);
  401. queue = sref_cache_get_queue_by_epoch_id(cache, cache->epoch_id);
  402. sref_queue_push(queue, counter);
  403. }
  404. static void
  405. sref_counter_add(struct sref_counter *counter, unsigned long delta,
  406. struct sref_cache *cache)
  407. {
  408. assert(!cpu_intr_enabled());
  409. spinlock_lock(&counter->lock);
  410. counter->value += delta;
  411. if (counter->value == 0) {
  412. if (sref_counter_is_queued(counter)) {
  413. sref_counter_mark_dirty(counter);
  414. } else {
  415. sref_cache_schedule_review(cache, counter);
  416. }
  417. }
  418. spinlock_unlock(&counter->lock);
  419. }
  420. static void
  421. sref_counter_noref(struct work *work)
  422. {
  423. struct sref_counter *counter;
  424. counter = structof(work, struct sref_counter, work);
  425. counter->noref_fn(counter);
  426. }
  427. static void __init
  428. sref_delta_init(struct sref_delta *delta)
  429. {
  430. delta->counter = NULL;
  431. delta->value = 0;
  432. }
  433. static struct sref_counter *
  434. sref_delta_counter(struct sref_delta *delta)
  435. {
  436. return delta->counter;
  437. }
  438. static void
  439. sref_delta_set_counter(struct sref_delta *delta, struct sref_counter *counter)
  440. {
  441. assert(delta->value == 0);
  442. delta->counter = counter;
  443. }
  444. static void
  445. sref_delta_clear(struct sref_delta *delta)
  446. {
  447. assert(delta->value == 0);
  448. delta->counter = NULL;
  449. }
  450. static void
  451. sref_delta_inc(struct sref_delta *delta)
  452. {
  453. delta->value++;
  454. }
  455. static void
  456. sref_delta_dec(struct sref_delta *delta)
  457. {
  458. delta->value--;
  459. }
  460. static bool
  461. sref_delta_is_valid(const struct sref_delta *delta)
  462. {
  463. return delta->counter;
  464. }
  465. static void
  466. sref_delta_flush(struct sref_delta *delta, struct sref_cache *cache)
  467. {
  468. sref_counter_add(delta->counter, delta->value, cache);
  469. delta->value = 0;
  470. }
  471. static void
  472. sref_delta_evict(struct sref_delta *delta, struct sref_cache *cache)
  473. {
  474. sref_delta_flush(delta, cache);
  475. sref_delta_clear(delta);
  476. }
  477. static struct sref_cache *
  478. sref_get_local_cache(void)
  479. {
  480. return cpu_local_ptr(sref_cache);
  481. }
  482. static uintptr_t
  483. sref_cache_compute_counter_index(const struct sref_cache *cache,
  484. const struct sref_counter *counter)
  485. {
  486. return sref_counter_hash(counter) & (ARRAY_SIZE(cache->deltas) - 1);
  487. }
  488. static struct sref_delta *
  489. sref_cache_get_delta(struct sref_cache *cache, size_t index)
  490. {
  491. assert(index < ARRAY_SIZE(cache->deltas));
  492. return &cache->deltas[index];
  493. }
  494. static struct sref_cache *
  495. sref_cache_acquire(unsigned long *flags)
  496. {
  497. thread_preempt_disable_intr_save(flags);
  498. return sref_get_local_cache();
  499. }
  500. static void
  501. sref_cache_release(unsigned long flags)
  502. {
  503. thread_preempt_enable_intr_restore(flags);
  504. }
  505. static bool
  506. sref_cache_is_dirty(const struct sref_cache *cache)
  507. {
  508. return cache->dirty;
  509. }
  510. static void
  511. sref_cache_set_dirty(struct sref_cache *cache)
  512. {
  513. cache->dirty = true;
  514. }
  515. static void
  516. sref_cache_clear_dirty(struct sref_cache *cache)
  517. {
  518. cache->dirty = false;
  519. }
  520. static bool
  521. sref_cache_is_flushed(const struct sref_cache *cache)
  522. {
  523. return cache->flushed;
  524. }
  525. static void
  526. sref_cache_set_flushed(struct sref_cache *cache)
  527. {
  528. cache->flushed = true;
  529. }
  530. static void
  531. sref_cache_clear_flushed(struct sref_cache *cache)
  532. {
  533. cache->flushed = false;
  534. }
  535. static void
  536. sref_cache_add_delta(struct sref_cache *cache, struct sref_delta *delta,
  537. struct sref_counter *counter)
  538. {
  539. assert(!sref_delta_is_valid(delta));
  540. assert(counter);
  541. sref_delta_set_counter(delta, counter);
  542. list_insert_tail(&cache->valid_deltas, &delta->node);
  543. }
  544. static void
  545. sref_cache_remove_delta(struct sref_cache *cache, struct sref_delta *delta)
  546. {
  547. assert(sref_delta_is_valid(delta));
  548. sref_delta_evict(delta, cache);
  549. list_remove(&delta->node);
  550. }
  551. static struct sref_delta *
  552. sref_cache_take_delta(struct sref_cache *cache, struct sref_counter *counter)
  553. {
  554. struct sref_delta *delta;
  555. size_t index;
  556. index = sref_cache_compute_counter_index(cache, counter);
  557. delta = sref_cache_get_delta(cache, index);
  558. if (!sref_delta_is_valid(delta)) {
  559. sref_cache_add_delta(cache, delta, counter);
  560. } else if (sref_delta_counter(delta) != counter) {
  561. sref_cache_remove_delta(cache, delta);
  562. sref_cache_add_delta(cache, delta, counter);
  563. syscnt_inc(&cache->sc_collisions);
  564. }
  565. return delta;
  566. }
  567. static bool
  568. sref_cache_needs_management(struct sref_cache *cache)
  569. {
  570. const struct sref_queue *queue;
  571. assert(!cpu_intr_enabled());
  572. assert(!thread_preempt_enabled());
  573. queue = sref_cache_get_queue_by_epoch_id(cache, cache->epoch_id - 2);
  574. return sref_cache_is_dirty(cache) || !sref_queue_empty(queue);
  575. }
  576. static void
  577. sref_cache_end_epoch(struct sref_cache *cache)
  578. {
  579. assert(!sref_cache_needs_management(cache));
  580. sref_data_ack_cpu(cache->data);
  581. cache->epoch_id++;
  582. }
  583. static void
  584. sref_cache_flush(struct sref_cache *cache, struct sref_queue *queue)
  585. {
  586. struct sref_queue *prev_queue;
  587. unsigned long flags;
  588. for (;;) {
  589. struct sref_delta *delta;
  590. thread_preempt_disable_intr_save(&flags);
  591. if (list_empty(&cache->valid_deltas)) {
  592. break;
  593. }
  594. delta = list_first_entry(&cache->valid_deltas, typeof(*delta), node);
  595. sref_cache_remove_delta(cache, delta);
  596. thread_preempt_enable_intr_restore(flags);
  597. }
  598. sref_cache_clear_dirty(cache);
  599. sref_cache_set_flushed(cache);
  600. prev_queue = sref_cache_get_queue_by_epoch_id(cache, cache->epoch_id - 2);
  601. sref_queue_move(queue, prev_queue);
  602. sref_queue_init(prev_queue);
  603. sref_cache_end_epoch(cache);
  604. thread_preempt_enable_intr_restore(flags);
  605. syscnt_inc(&cache->sc_flushes);
  606. }
  607. static void
  608. sref_queue_review(struct sref_queue *queue, struct sref_cache *cache)
  609. {
  610. int64_t nr_dirty_zeroes, nr_true_zeroes, nr_revives;
  611. struct sref_counter *counter;
  612. struct work_queue works;
  613. unsigned long flags;
  614. bool requeue;
  615. int error;
  616. nr_dirty_zeroes = 0;
  617. nr_true_zeroes = 0;
  618. nr_revives = 0;
  619. work_queue_init(&works);
  620. while (!sref_queue_empty(queue)) {
  621. counter = sref_queue_pop(queue);
  622. spinlock_lock_intr_save(&counter->lock, &flags);
  623. #ifdef SREF_VERIFY
  624. assert(!sref_counter_is_unreferenced(counter));
  625. #endif /* SREF_VERIFY */
  626. assert(sref_counter_is_queued(counter));
  627. sref_counter_clear_queued(counter);
  628. if (counter->value != 0) {
  629. sref_counter_clear_dirty(counter);
  630. sref_counter_clear_dying(counter);
  631. spinlock_unlock_intr_restore(&counter->lock, flags);
  632. continue;
  633. }
  634. if (sref_counter_is_dirty(counter)) {
  635. requeue = true;
  636. nr_dirty_zeroes++;
  637. sref_counter_clear_dirty(counter);
  638. } else {
  639. error = sref_counter_kill_weakref(counter);
  640. if (!error) {
  641. requeue = false;
  642. } else {
  643. requeue = true;
  644. nr_revives++;
  645. }
  646. }
  647. if (requeue) {
  648. sref_cache_schedule_review(cache, counter);
  649. spinlock_unlock_intr_restore(&counter->lock, flags);
  650. } else {
  651. /*
  652. * Keep in mind that the work structure shares memory with
  653. * the counter data.
  654. */
  655. #ifdef SREF_VERIFY
  656. sref_counter_mark_unreferenced(counter);
  657. #endif /* SREF_VERIFY */
  658. /*
  659. * Unlocking isn't needed here, since this counter is now
  660. * really at 0, but do it for consistency.
  661. */
  662. spinlock_unlock_intr_restore(&counter->lock, flags);
  663. nr_true_zeroes++;
  664. work_init(&counter->work, sref_counter_noref);
  665. work_queue_push(&works, &counter->work);
  666. }
  667. }
  668. if (work_queue_nr_works(&works) != 0) {
  669. work_queue_schedule(&works, WORK_HIGHPRIO);
  670. }
  671. sref_data_update_stats(cache->data, nr_dirty_zeroes,
  672. nr_true_zeroes, nr_revives);
  673. }
  674. static void
  675. sref_cache_manage(void *arg)
  676. {
  677. struct sref_cache *cache;
  678. struct sref_queue queue;
  679. unsigned long flags;
  680. cache = arg;
  681. thread_preempt_disable_intr_save(&flags);
  682. for (;;) {
  683. while (sref_cache_is_flushed(cache)) {
  684. thread_sleep(NULL, cache, "sref");
  685. }
  686. thread_preempt_enable_intr_restore(flags);
  687. sref_cache_flush(cache, &queue);
  688. sref_queue_review(&queue, cache);
  689. thread_preempt_disable_intr_save(&flags);
  690. }
  691. /* Never reached */
  692. }
  693. static void
  694. sref_cache_check(struct sref_cache *cache)
  695. {
  696. bool same_epoch;
  697. same_epoch = sref_data_check_epoch_id(&sref_data, cache->epoch_id);
  698. if (!same_epoch) {
  699. return;
  700. }
  701. if (!sref_cache_needs_management(cache)) {
  702. sref_cache_end_epoch(cache);
  703. return;
  704. }
  705. sref_cache_clear_flushed(cache);
  706. thread_wakeup(cache->manager);
  707. }
  708. static void __init
  709. sref_cache_init(struct sref_cache *cache, unsigned int cpu,
  710. struct sref_data *data)
  711. {
  712. char name[SYSCNT_NAME_SIZE];
  713. cache->data = data;
  714. cache->dirty = false;
  715. cache->flushed = true;
  716. cache->epoch_id = sref_data_get_epoch_id(&sref_data) + 1;
  717. for (size_t i = 0; i < ARRAY_SIZE(cache->deltas); i++) {
  718. sref_delta_init(sref_cache_get_delta(cache, i));
  719. }
  720. list_init(&cache->valid_deltas);
  721. for (size_t i = 0; i < ARRAY_SIZE(cache->queues); i++) {
  722. sref_queue_init(sref_cache_get_queue(cache, i));
  723. }
  724. snprintf(name, sizeof(name), "sref_collisions/%u", cpu);
  725. syscnt_register(&cache->sc_collisions, name);
  726. snprintf(name, sizeof(name), "sref_flushes/%u", cpu);
  727. syscnt_register(&cache->sc_flushes, name);
  728. cache->manager = NULL;
  729. }
  730. static void __init
  731. sref_cache_init_manager(struct sref_cache *cache, unsigned int cpu)
  732. {
  733. char name[THREAD_NAME_SIZE];
  734. struct thread_attr attr;
  735. struct thread *manager;
  736. struct cpumap *cpumap;
  737. int error;
  738. error = cpumap_create(&cpumap);
  739. if (error) {
  740. panic("sref: unable to create manager thread CPU map");
  741. }
  742. cpumap_zero(cpumap);
  743. cpumap_set(cpumap, cpu);
  744. snprintf(name, sizeof(name), THREAD_KERNEL_PREFIX "sref_cache_manage/%u",
  745. cpu);
  746. thread_attr_init(&attr, name);
  747. thread_attr_set_cpumap(&attr, cpumap);
  748. thread_attr_set_priority(&attr, THREAD_SCHED_FS_PRIO_MAX);
  749. error = thread_create(&manager, &attr, sref_cache_manage, cache);
  750. cpumap_destroy(cpumap);
  751. if (error) {
  752. panic("sref: unable to create manager thread");
  753. }
  754. cache->manager = manager;
  755. }
  756. static void __init
  757. sref_data_init(struct sref_data *data)
  758. {
  759. data->epoch_id = SREF_EPOCH_ID_INIT_VALUE;
  760. data->nr_pending_acks = 0;
  761. data->start_ts = clock_get_time();
  762. syscnt_register(&data->sc_epochs, "sref_epochs");
  763. syscnt_register(&data->sc_dirty_zeroes, "sref_dirty_zeroes");
  764. syscnt_register(&data->sc_true_zeroes, "sref_true_zeroes");
  765. syscnt_register(&data->sc_revives, "sref_revives");
  766. syscnt_register(&data->sc_last_epoch_ms, "sref_last_epoch_ms");
  767. syscnt_register(&data->sc_longest_epoch_ms, "sref_longest_epoch_ms");
  768. }
  769. static int __init
  770. sref_bootstrap(void)
  771. {
  772. sref_data_init(&sref_data);
  773. sref_cache_init(sref_get_local_cache(), 0, &sref_data);
  774. return 0;
  775. }
  776. INIT_OP_DEFINE(sref_bootstrap,
  777. INIT_OP_DEP(cpu_setup, true),
  778. INIT_OP_DEP(spinlock_setup, true),
  779. INIT_OP_DEP(syscnt_setup, true),
  780. INIT_OP_DEP(thread_bootstrap, true));
  781. static int __init
  782. sref_setup(void)
  783. {
  784. for (unsigned int i = 1; i < cpu_count(); i++) {
  785. sref_cache_init(percpu_ptr(sref_cache, i), i, &sref_data);
  786. }
  787. for (unsigned int i = 0; i < cpu_count(); i++) {
  788. sref_cache_init_manager(percpu_ptr(sref_cache, i), i);
  789. }
  790. sref_data_start_epoch(&sref_data);
  791. return 0;
  792. }
  793. INIT_OP_DEFINE(sref_setup,
  794. INIT_OP_DEP(cpu_mp_probe, true),
  795. INIT_OP_DEP(cpumap_setup, true),
  796. INIT_OP_DEP(log_setup, true),
  797. INIT_OP_DEP(sref_bootstrap, true),
  798. INIT_OP_DEP(thread_setup, true));
  799. void
  800. sref_report_periodic_event(void)
  801. {
  802. sref_cache_check(sref_get_local_cache());
  803. }
  804. void
  805. sref_counter_init(struct sref_counter *counter,
  806. unsigned long init_value,
  807. struct sref_weakref *weakref,
  808. sref_noref_fn_t noref_fn)
  809. {
  810. assert(init_value != 0);
  811. counter->noref_fn = noref_fn;
  812. spinlock_init(&counter->lock);
  813. counter->flags = 0;
  814. counter->value = init_value;
  815. counter->weakref = weakref;
  816. if (weakref) {
  817. sref_weakref_init(weakref, counter);
  818. }
  819. }
  820. static void
  821. sref_counter_inc_common(struct sref_counter *counter, struct sref_cache *cache)
  822. {
  823. struct sref_delta *delta;
  824. sref_cache_set_dirty(cache);
  825. delta = sref_cache_take_delta(cache, counter);
  826. sref_delta_inc(delta);
  827. }
  828. void
  829. sref_counter_inc(struct sref_counter *counter)
  830. {
  831. struct sref_cache *cache;
  832. unsigned long flags;
  833. cache = sref_cache_acquire(&flags);
  834. sref_counter_inc_common(counter, cache);
  835. sref_cache_release(flags);
  836. }
  837. void
  838. sref_counter_dec(struct sref_counter *counter)
  839. {
  840. struct sref_cache *cache;
  841. struct sref_delta *delta;
  842. unsigned long flags;
  843. cache = sref_cache_acquire(&flags);
  844. sref_cache_set_dirty(cache);
  845. delta = sref_cache_take_delta(cache, counter);
  846. sref_delta_dec(delta);
  847. sref_cache_release(flags);
  848. }
  849. struct sref_counter *
  850. sref_weakref_get(struct sref_weakref *weakref)
  851. {
  852. struct sref_counter *counter;
  853. struct sref_cache *cache;
  854. unsigned long flags;
  855. cache = sref_cache_acquire(&flags);
  856. counter = sref_weakref_tryget(weakref);
  857. if (counter) {
  858. sref_counter_inc_common(counter, cache);
  859. }
  860. sref_cache_release(flags);
  861. return counter;
  862. }