rcu.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748
  1. /*
  2. * Copyright (c) 2018 Richard Braun.
  3. *
  4. * This program is free software: you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation, either version 3 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. *
  17. *
  18. * This implementation is based on the paper "Extending RCU for Realtime
  19. * and Embedded Workloads" by Paul E. McKenney, Ingo Molnar, Dipankar Sarma,
  20. * and Suparna Bhattacharya. Beside the mechanisms not implemented yet,
  21. * such as priority boosting, the differences are described below.
  22. *
  23. * First, this implementation uses scalable reference counters provided
  24. * by the sref module instead of per-CPU counters as described in the paper.
  25. * The main benefit of this approach is the centralization of most scalability
  26. * improvements in the sref module, which should propagate to all sref users,
  27. * including RCU.
  28. *
  29. * In addition, this implementation introduces the concept of windows, where
  30. * a window is a range in time to which readers may be linked. Here, a
  31. * grace period is defined as the time range at the end of a window where
  32. * various synchronization steps are performed to enforce the RCU guarantees.
  33. * The minimum duration of a window acts as a knob allowing users to tune
  34. * the behavior of the RCU system.
  35. *
  36. * Finally, the state machine described in the paper is updated to accommodate
  37. * for windows, since grace periods don't run back-to-back to each other.
  38. * Windows are regularly checked and flipped if the previous one isn't
  39. * active any more. From that moment, processors may notice the global flip
  40. * and perform a local flip of their work window ID. Once all processors
  41. * have acknowleged the flip, it is certain that no new work may be queued
  42. * on the previous window. At this point, the same occurs for the
  43. * processor-local reader window ID, and once all processors have
  44. * acknowleged that flip, there can be no new reader linked to the previous
  45. * window. The RCU system then releases its own reference to the previous
  46. * window and waits for the window reference counter to drop to 0, indicating
  47. * that all readers linked to the previous window have left their read-side
  48. * critical section. When this global event occurs, processors are requested
  49. * to flush the works queued for the previous window, and once they all have
  50. * acknowleged their flush, the window ends and becomes inactive, allowing
  51. * a new grace period to occur later on.
  52. *
  53. * Here is an informal diagram describing this process :
  54. *
  55. * t ---->
  56. *
  57. * reader window flip ---+ +--- no more readers
  58. * work window flip ------+ | | +- works flushed
  59. * (grace period start) | | | | (grace period / window end)
  60. * v v v v
  61. * +--------------+-+-----+-+
  62. * | . . . |
  63. * | window 0 . . gp . |
  64. * | removal . . . | reclamation
  65. * +--------------+-+-----+-+-----+----+
  66. * | . |
  67. * | window 1 . gp |
  68. * | removal . | reclamation
  69. * +---------------+----+--------
  70. * |
  71. * | window 2 ...
  72. * |
  73. * +-------------
  74. *
  75. * On each processor, work window flips are separate from reader window
  76. * flips in order to correctly handle situations such as this one, where
  77. * "wf" denotes a window flip for both works and readers :
  78. *
  79. * t ---->
  80. *
  81. * CPU0 wf load flush
  82. * CPU1 wf flush
  83. * global no-new-reader ... no-ref loaded value now invalid
  84. *
  85. * After its window flip, CPU0 may load data from the previous window with
  86. * a reader linked to the current window, because it doesn't know that there
  87. * may still be new works queued on the previous window.
  88. *
  89. * TODO Improve atomic acknowledgment scalability.
  90. * TODO Handle large amounts of deferred works.
  91. * TODO Priority boosting of slow readers.
  92. * TODO CPU registration for dyntick-friendly behavior.
  93. */
  94. #include <assert.h>
  95. #include <stdalign.h>
  96. #include <stdbool.h>
  97. #include <stddef.h>
  98. #include <stdio.h>
  99. #include <kern/atomic.h>
  100. #include <kern/clock.h>
  101. #include <kern/init.h>
  102. #include <kern/macros.h>
  103. #include <kern/rcu.h>
  104. #include <kern/panic.h>
  105. #include <kern/percpu.h>
  106. #include <kern/spinlock.h>
  107. #include <kern/sref.h>
  108. #include <kern/syscnt.h>
  109. #include <kern/thread.h>
  110. #include <kern/timer.h>
  111. #include <kern/work.h>
  112. #include <machine/cpu.h>
  113. // Negative close to 0 so that an overflow occurs early.
  114. #define RCU_WINDOW_ID_INIT_VALUE ((uint32_t)-500)
  115. /*
  116. * Interval (in milliseconds) between window checking.
  117. *
  118. * When windows are checked, a flip occurs if the previous window isn't
  119. * active any more.
  120. */
  121. #define RCU_WINDOW_CHECK_INTERVAL CONFIG_RCU_WINDOW_CHECK_INTERVAL
  122. /*
  123. * Grace period states.
  124. *
  125. * These states are only used to trigger per-CPU processing that is
  126. * globally acknowleged by decrementing a global atomic counter. They
  127. * do not completely represent the actual state of a grace period.
  128. */
  129. enum rcu_gp_state
  130. {
  131. RCU_GP_STATE_WORK_WINDOW_FLIP,
  132. RCU_GP_STATE_READER_WINDOW_FLIP,
  133. RCU_GP_STATE_WORK_FLUSH,
  134. };
  135. /*
  136. * Per-CPU view of a window.
  137. *
  138. * Deferred works are scheduled when the window ends.
  139. */
  140. struct rcu_cpu_window
  141. {
  142. struct work_queue works;
  143. };
  144. /*
  145. * Per-CPU RCU data.
  146. *
  147. * Each processor maintains two local window IDs. One is used as the current
  148. * window ID when deferring work, the other when detecting a reader. A local
  149. * flip occurs when a processor notices that the global grace period state
  150. * no longer matches the local grace period state. These checks only occur
  151. * on periodic events.
  152. *
  153. * Interrupts and preemption must be disabled when accessing local CPU data.
  154. */
  155. struct rcu_cpu_data
  156. {
  157. enum rcu_gp_state gp_state;
  158. uint32_t work_wid;
  159. uint32_t reader_wid;
  160. struct rcu_cpu_window windows[2];
  161. struct syscnt sc_nr_detected_readers;
  162. };
  163. /*
  164. * Global window.
  165. *
  166. * A window is a time range that tracks read-side references. Conceptually,
  167. * each reader adds a reference to the current window. In practice, references
  168. * are only added when readers are detected, which occurs on a context switch
  169. * (to track preempted threads) or a reader window flip (to prevent currently
  170. * running readers to be linked to the next window).
  171. *
  172. * When a window is started, its scalable reference counter is initialized
  173. * with a reference owned by the RCU system. That reference guarantees that
  174. * the window remains active as long as new readers may add references,
  175. * since it prevents the counter from dropping to 0. After a reader window
  176. * flip, there may not be new references to the window, and the initial
  177. * reference is dropped, allowing the counter to reach 0 once all detected
  178. * readers leave their critical section and unreference the window they're
  179. * linked to.
  180. */
  181. struct rcu_window
  182. {
  183. struct sref_counter nr_refs;
  184. uint64_t start_ts;
  185. bool active;
  186. };
  187. /*
  188. * Global data.
  189. *
  190. * Processors regularly check the grace period state against their own,
  191. * locally cached grace period state, and take action whenever they differ.
  192. * False sharing is avoided by making the global grace period state fill an
  193. * entire cache line on SMP.
  194. *
  195. * After processors notice a grace period state change, they acknowledge
  196. * noticing this change by decrementing the atomic acknowledgment counter,
  197. * which also fills a complete cache line on SMP in order to restrict cache
  198. * line bouncing. Atomic operations on this counter are done with
  199. * acquire-release ordering to enforce the memory ordering guarantees
  200. * required by the implementation, as well as those provided by the public
  201. * interface.
  202. *
  203. * In addition to the global window ID and the windows themselves, the data
  204. * include a timer, used to trigger the end of windows, i.e. grace periods.
  205. * Since the timer function, atomic acknowledgments, and window no-reference
  206. * function chain each other, there is currently no need for a global lock.
  207. */
  208. struct rcu_data
  209. {
  210. __cacheline_aligned enum rcu_gp_state gp_state;
  211. __cacheline_aligned uint32_t nr_acks;
  212. uint32_t wid;
  213. struct rcu_window windows[2];
  214. struct timer timer;
  215. struct syscnt sc_nr_windows;
  216. struct syscnt sc_last_window_ms;
  217. struct syscnt sc_longest_window_ms;
  218. };
  219. // Structure used to implement rcu_wait().
  220. struct rcu_waiter
  221. {
  222. struct work work;
  223. struct spinlock lock;
  224. struct thread *thread;
  225. bool done;
  226. };
  227. static struct rcu_data rcu_data;
  228. static struct rcu_cpu_data rcu_cpu_data __percpu;
  229. static struct rcu_cpu_data*
  230. rcu_get_cpu_data (void)
  231. {
  232. assert (!cpu_intr_enabled ());
  233. assert (!thread_preempt_enabled ());
  234. return (cpu_local_ptr (rcu_cpu_data));
  235. }
  236. static enum rcu_gp_state
  237. rcu_data_get_gp_state (const struct rcu_data *data)
  238. {
  239. return (data->gp_state);
  240. }
  241. static uint32_t
  242. rcu_data_get_wid (const struct rcu_data *data)
  243. {
  244. return (data->wid);
  245. }
  246. static struct rcu_window*
  247. rcu_data_get_window_from_index (struct rcu_data *data, size_t index)
  248. {
  249. assert (index < ARRAY_SIZE (data->windows));
  250. return (&data->windows[index]);
  251. }
  252. static struct rcu_window*
  253. rcu_data_get_window (struct rcu_data *data, uint32_t wid)
  254. {
  255. return (rcu_data_get_window_from_index (data, wid & 1));
  256. }
  257. static void
  258. rcu_data_update_gp_state (struct rcu_data *data, enum rcu_gp_state gp_state)
  259. {
  260. assert (!data->nr_acks);
  261. switch (gp_state)
  262. {
  263. case RCU_GP_STATE_WORK_WINDOW_FLIP:
  264. assert (data->gp_state == RCU_GP_STATE_WORK_FLUSH);
  265. break;
  266. case RCU_GP_STATE_READER_WINDOW_FLIP:
  267. assert (data->gp_state == RCU_GP_STATE_WORK_WINDOW_FLIP);
  268. break;
  269. case RCU_GP_STATE_WORK_FLUSH:
  270. assert (data->gp_state == RCU_GP_STATE_READER_WINDOW_FLIP);
  271. break;
  272. default:
  273. panic ("rcu: invalid grace period state");
  274. }
  275. data->nr_acks = cpu_count ();
  276. atomic_store_rel (&data->gp_state, gp_state);
  277. }
  278. static bool
  279. rcu_data_check_gp_state (const struct rcu_data *data,
  280. enum rcu_gp_state local_gp_state,
  281. enum rcu_gp_state *global_gp_state)
  282. {
  283. *global_gp_state = atomic_load_rlx (&data->gp_state);
  284. if (likely (local_gp_state == *global_gp_state))
  285. return (false);
  286. atomic_fence_acq ();
  287. return (true);
  288. }
  289. static void
  290. rcu_window_end (struct rcu_window *window)
  291. {
  292. assert (window->active);
  293. window->active = false;
  294. }
  295. static void
  296. rcu_window_ref (struct rcu_window *window)
  297. {
  298. sref_counter_inc (&window->nr_refs);
  299. }
  300. static void
  301. rcu_window_unref (struct rcu_window *window)
  302. {
  303. sref_counter_dec (&window->nr_refs);
  304. }
  305. static uint64_t
  306. rcu_window_get_start_ts (const struct rcu_window *window)
  307. {
  308. return (window->start_ts);
  309. }
  310. static void
  311. rcu_window_flush (struct sref_counter *counter __unused)
  312. {
  313. rcu_data_update_gp_state (&rcu_data, RCU_GP_STATE_WORK_FLUSH);
  314. }
  315. static void __init
  316. rcu_window_init (struct rcu_window *window)
  317. {
  318. window->active = false;
  319. }
  320. static void
  321. rcu_window_start (struct rcu_window *window)
  322. {
  323. assert (!window->active);
  324. sref_counter_init (&window->nr_refs, 1, NULL, rcu_window_flush);
  325. window->start_ts = clock_get_time ();
  326. window->active = true;
  327. }
  328. static bool
  329. rcu_window_active (const struct rcu_window *window)
  330. {
  331. return (window->active);
  332. }
  333. static void
  334. rcu_data_end_prev_window (struct rcu_data *data, uint64_t now)
  335. {
  336. _Auto window = rcu_data_get_window (data, data->wid - 1);
  337. uint64_t duration = clock_ticks_to_ms (now -
  338. rcu_window_get_start_ts (window));
  339. syscnt_set (&data->sc_last_window_ms, duration);
  340. if (duration > syscnt_read (&data->sc_longest_window_ms))
  341. syscnt_set (&data->sc_longest_window_ms, duration);
  342. rcu_window_end (window);
  343. }
  344. static void
  345. rcu_data_schedule_timer (struct rcu_data *data, uint64_t now)
  346. {
  347. uint64_t ticks = clock_ticks_from_ms (RCU_WINDOW_CHECK_INTERVAL);
  348. timer_schedule (&data->timer, now + ticks);
  349. }
  350. static void
  351. rcu_data_ack_cpu (struct rcu_data *data)
  352. {
  353. uint32_t prev_nr_acks = atomic_sub_acq_rel (&data->nr_acks, 1);
  354. if (prev_nr_acks != 1)
  355. {
  356. assert (prev_nr_acks);
  357. return;
  358. }
  359. uint64_t now;
  360. switch (data->gp_state)
  361. {
  362. case RCU_GP_STATE_WORK_WINDOW_FLIP:
  363. rcu_data_update_gp_state (data, RCU_GP_STATE_READER_WINDOW_FLIP);
  364. break;
  365. case RCU_GP_STATE_READER_WINDOW_FLIP:
  366. rcu_window_unref (rcu_data_get_window (data, data->wid - 1));
  367. break;
  368. case RCU_GP_STATE_WORK_FLUSH:
  369. now = clock_get_time ();
  370. rcu_data_end_prev_window (data, now);
  371. rcu_data_schedule_timer (data, now);
  372. break;
  373. default:
  374. panic ("rcu: invalid grace period state");
  375. }
  376. }
  377. static bool
  378. rcu_data_flip_windows (struct rcu_data *data)
  379. {
  380. _Auto window = rcu_data_get_window (data, data->wid - 1);
  381. if (rcu_window_active (window))
  382. return (false);
  383. rcu_window_start (window);
  384. syscnt_inc (&data->sc_nr_windows);
  385. ++data->wid;
  386. rcu_data_update_gp_state (data, RCU_GP_STATE_WORK_WINDOW_FLIP);
  387. return (true);
  388. }
  389. static void
  390. rcu_data_check_windows (struct timer *timer)
  391. {
  392. struct rcu_data *data = &rcu_data;
  393. if (!rcu_data_flip_windows (data))
  394. rcu_data_schedule_timer (data, timer_get_time (timer));
  395. }
  396. static void __init
  397. rcu_data_init (struct rcu_data *data)
  398. {
  399. data->gp_state = RCU_GP_STATE_WORK_FLUSH;
  400. data->nr_acks = 0;
  401. data->wid = RCU_WINDOW_ID_INIT_VALUE;
  402. for (size_t i = 0; i < ARRAY_SIZE (data->windows); i++)
  403. rcu_window_init (rcu_data_get_window_from_index (data, i));
  404. rcu_window_start (rcu_data_get_window (data, data->wid));
  405. timer_init (&data->timer, rcu_data_check_windows, 0);
  406. rcu_data_schedule_timer (data, clock_get_time ());
  407. syscnt_register (&data->sc_nr_windows, "rcu_nr_windows");
  408. syscnt_register (&data->sc_last_window_ms, "rcu_last_window_ms");
  409. syscnt_register (&data->sc_longest_window_ms, "rcu_longest_window_ms");
  410. }
  411. static void __init
  412. rcu_cpu_window_init (struct rcu_cpu_window *cpu_window)
  413. {
  414. work_queue_init (&cpu_window->works);
  415. }
  416. static void
  417. rcu_cpu_window_queue (struct rcu_cpu_window *cpu_window, struct work *work)
  418. {
  419. work_queue_push (&cpu_window->works, work);
  420. }
  421. static void
  422. rcu_cpu_window_flush (struct rcu_cpu_window *cpu_window)
  423. {
  424. work_queue_schedule (&cpu_window->works, 0);
  425. work_queue_init (&cpu_window->works);
  426. }
  427. static uint32_t
  428. rcu_cpu_data_get_reader_wid (const struct rcu_cpu_data *cpu_data)
  429. {
  430. return (cpu_data->reader_wid);
  431. }
  432. static struct rcu_cpu_window*
  433. rcu_cpu_data_get_window_from_index (struct rcu_cpu_data *cpu_data, size_t index)
  434. {
  435. assert (index < ARRAY_SIZE (cpu_data->windows));
  436. return (&cpu_data->windows[index]);
  437. }
  438. static struct rcu_cpu_window*
  439. rcu_cpu_data_get_window (struct rcu_cpu_data *cpu_data, uint32_t wid)
  440. {
  441. return (rcu_cpu_data_get_window_from_index (cpu_data, wid & 1));
  442. }
  443. static void __init
  444. rcu_cpu_data_init (struct rcu_cpu_data *cpu_data, uint32_t cpu)
  445. {
  446. struct rcu_data *data = &rcu_data;
  447. cpu_data->gp_state = rcu_data_get_gp_state (data);
  448. cpu_data->work_wid = rcu_data_get_wid (data);
  449. cpu_data->reader_wid = cpu_data->work_wid;
  450. for (size_t i = 0; i < ARRAY_SIZE (cpu_data->windows); i++)
  451. rcu_cpu_window_init (rcu_cpu_data_get_window_from_index (cpu_data, i));
  452. char name[SYSCNT_NAME_SIZE];
  453. snprintf (name, sizeof (name), "rcu_nr_detected_readers/%u", cpu);
  454. syscnt_register (&cpu_data->sc_nr_detected_readers, name);
  455. }
  456. static void
  457. rcu_cpu_data_queue (struct rcu_cpu_data *cpu_data, struct work *work)
  458. {
  459. _Auto cpu_window = rcu_cpu_data_get_window (cpu_data, cpu_data->work_wid);
  460. rcu_cpu_window_queue (cpu_window, work);
  461. }
  462. static void
  463. rcu_cpu_data_flush (struct rcu_cpu_data *cpu_data)
  464. {
  465. assert (cpu_data->work_wid == cpu_data->reader_wid);
  466. rcu_cpu_window_flush (rcu_cpu_data_get_window (cpu_data,
  467. cpu_data->work_wid - 1));
  468. }
  469. void
  470. rcu_reader_init (struct rcu_reader *reader)
  471. {
  472. reader->level = 0;
  473. reader->linked = false;
  474. }
  475. static void
  476. rcu_reader_link (struct rcu_reader *reader, struct rcu_cpu_data *cpu_data)
  477. {
  478. assert (!cpu_intr_enabled ());
  479. assert (reader == thread_rcu_reader (thread_self ()));
  480. assert (!rcu_reader_linked (reader));
  481. reader->wid = rcu_cpu_data_get_reader_wid (cpu_data);
  482. reader->linked = true;
  483. }
  484. static void
  485. rcu_reader_unlink (struct rcu_reader *reader)
  486. {
  487. assert (reader->level == 0);
  488. reader->linked = false;
  489. }
  490. static void
  491. rcu_reader_enter (struct rcu_reader *reader, struct rcu_cpu_data *cpu_data)
  492. {
  493. if (rcu_reader_linked (reader))
  494. return;
  495. struct rcu_data *data = &rcu_data;
  496. uint32_t wid = rcu_cpu_data_get_reader_wid (cpu_data);
  497. _Auto window = rcu_data_get_window (data, wid);
  498. rcu_reader_link (reader, cpu_data);
  499. rcu_window_ref (window);
  500. syscnt_inc (&cpu_data->sc_nr_detected_readers);
  501. }
  502. void
  503. rcu_reader_leave (struct rcu_reader *reader)
  504. {
  505. struct rcu_data *data = &rcu_data;
  506. _Auto window = rcu_data_get_window (data, reader->wid);
  507. rcu_window_unref (window);
  508. rcu_reader_unlink (reader);
  509. }
  510. static void
  511. rcu_reader_account (struct rcu_reader *reader, struct rcu_cpu_data *cpu_data)
  512. {
  513. if (rcu_reader_in_cs (reader))
  514. rcu_reader_enter (reader, cpu_data);
  515. }
  516. static void
  517. rcu_cpu_data_flip_work_wid (struct rcu_cpu_data *cpu_data)
  518. {
  519. assert (!cpu_intr_enabled ());
  520. assert (!thread_preempt_enabled ());
  521. ++cpu_data->work_wid;
  522. }
  523. static void
  524. rcu_cpu_data_flip_reader_wid (struct rcu_cpu_data *cpu_data)
  525. {
  526. assert (!cpu_intr_enabled ());
  527. assert (!thread_preempt_enabled ());
  528. rcu_reader_account (thread_rcu_reader (thread_self ()), cpu_data);
  529. ++cpu_data->reader_wid;
  530. }
  531. static void
  532. rcu_cpu_data_check_gp_state (struct rcu_cpu_data *cpu_data)
  533. {
  534. struct rcu_data *data = &rcu_data;
  535. /*
  536. * A loop is used to optimize the case where a processor is the last to
  537. * acknowledge a grace period state change, in which case the latter
  538. * also immediately changes and can be acknowleged right away. As a
  539. * result, this loop may never run more than twice.
  540. */
  541. for (size_t i = 0; /* no condition */; i++)
  542. {
  543. enum rcu_gp_state global_gp_state,
  544. local_gp_state = cpu_data->gp_state;
  545. bool diff = rcu_data_check_gp_state (data, local_gp_state,
  546. &global_gp_state);
  547. if (! diff)
  548. break;
  549. assert (i < 2);
  550. switch (global_gp_state)
  551. {
  552. case RCU_GP_STATE_WORK_WINDOW_FLIP:
  553. rcu_cpu_data_flip_work_wid (cpu_data);
  554. rcu_data_ack_cpu (data);
  555. break;
  556. case RCU_GP_STATE_READER_WINDOW_FLIP:
  557. rcu_cpu_data_flip_reader_wid (cpu_data);
  558. rcu_data_ack_cpu (data);
  559. break;
  560. case RCU_GP_STATE_WORK_FLUSH:
  561. rcu_cpu_data_flush (cpu_data);
  562. rcu_data_ack_cpu (data);
  563. break;
  564. default:
  565. panic ("rcu: invalid grace period state");
  566. }
  567. cpu_data->gp_state = global_gp_state;
  568. }
  569. }
  570. void
  571. rcu_report_context_switch (struct rcu_reader *reader)
  572. {
  573. assert (!cpu_intr_enabled ());
  574. assert (!thread_preempt_enabled ());
  575. /*
  576. * Most readers don't need to be accounted for because their execution
  577. * doesn't overlap with a grace period. If a reader is preempted however,
  578. * it must be accounted in case a grace period starts while the reader
  579. * is preempted. Accounting also occurs when a grace period starts, and
  580. * more exactly, when the reader window ID of a processor is flipped.
  581. */
  582. rcu_reader_account (reader, rcu_get_cpu_data ());
  583. }
  584. void
  585. rcu_report_periodic_event (void)
  586. {
  587. assert (!cpu_intr_enabled ());
  588. assert (!thread_preempt_enabled ());
  589. rcu_cpu_data_check_gp_state (rcu_get_cpu_data ());
  590. }
  591. void
  592. rcu_defer (struct work *work)
  593. {
  594. assert (!rcu_reader_in_cs (thread_rcu_reader (thread_self ())));
  595. cpu_flags_t flags;
  596. thread_preempt_disable_intr_save (&flags);
  597. _Auto cpu_data = rcu_get_cpu_data ();
  598. rcu_cpu_data_queue (cpu_data, work);
  599. thread_preempt_enable_intr_restore (flags);
  600. }
  601. static void
  602. rcu_waiter_wakeup (struct work *work)
  603. {
  604. _Auto waiter = structof (work, struct rcu_waiter, work);
  605. SPINLOCK_GUARD (&waiter->lock);
  606. waiter->done = true;
  607. thread_wakeup (waiter->thread);
  608. }
  609. static void
  610. rcu_waiter_init (struct rcu_waiter *waiter, struct thread *thread)
  611. {
  612. work_init (&waiter->work, rcu_waiter_wakeup);
  613. spinlock_init (&waiter->lock);
  614. waiter->thread = thread;
  615. waiter->done = false;
  616. }
  617. static void
  618. rcu_waiter_wait (struct rcu_waiter *waiter)
  619. {
  620. rcu_defer (&waiter->work);
  621. SPINLOCK_GUARD (&waiter->lock);
  622. while (!waiter->done)
  623. thread_sleep (&waiter->lock, waiter, "rcu_wait");
  624. }
  625. void
  626. rcu_wait (void)
  627. {
  628. struct rcu_waiter waiter;
  629. rcu_waiter_init (&waiter, thread_self ());
  630. rcu_waiter_wait (&waiter);
  631. }
  632. static int __init
  633. rcu_bootstrap (void)
  634. {
  635. rcu_data_init (&rcu_data);
  636. rcu_cpu_data_init (cpu_local_ptr (rcu_cpu_data), 0);
  637. return (0);
  638. }
  639. INIT_OP_DEFINE (rcu_bootstrap,
  640. INIT_OP_DEP (spinlock_setup, true),
  641. INIT_OP_DEP (sref_bootstrap, true),
  642. INIT_OP_DEP (syscnt_setup, true),
  643. INIT_OP_DEP (thread_bootstrap, true),
  644. INIT_OP_DEP (timer_bootstrap, true));
  645. static int __init
  646. rcu_setup (void)
  647. {
  648. for (uint32_t i = 1; i < cpu_count (); i++)
  649. rcu_cpu_data_init (percpu_ptr (rcu_cpu_data, i), i);
  650. return (0);
  651. }
  652. INIT_OP_DEFINE (rcu_setup,
  653. INIT_OP_DEP (cpu_mp_probe, true),
  654. INIT_OP_DEP (rcu_bootstrap, true));