rcu.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804
  1. /*
  2. * Copyright (c) 2018 Richard Braun.
  3. *
  4. * This program is free software: you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation, either version 3 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. *
  17. *
  18. * This implementation is based on the paper "Extending RCU for Realtime
  19. * and Embedded Workloads" by Paul E. McKenney, Ingo Molnar, Dipankar Sarma,
  20. * and Suparna Bhattacharya. Beside the mechanisms not implemented yet,
  21. * such as priority boosting, the differences are described below.
  22. *
  23. * First, this implementation uses scalable reference counters provided
  24. * by the sref module instead of per-CPU counters as described in the paper.
  25. * The main benefit of this approach is the centralization of most scalability
  26. * improvements in the sref module, which should propagate to all sref users,
  27. * including RCU.
  28. *
  29. * In addition, this implementation introduces the concept of windows, where
  30. * a window is a range in time to which readers may be linked. Here, a
  31. * grace period is defined as the time range at the end of a window where
  32. * various synchronization steps are performed to enforce the RCU guarantees.
  33. * The minimum duration of a window acts as a knob allowing users to tune
  34. * the behavior of the RCU system.
  35. *
  36. * Finally, the state machine described in the paper is updated to accommodate
  37. * for windows, since grace periods don't run back-to-back to each other.
  38. * Windows are regularly checked and flipped if the previous one isn't
  39. * active any more. From that moment, processors may notice the global flip
  40. * and perform a local flip of their work window ID. Once all processors
  41. * have acknowleged the flip, it is certain that no new work may be queued
  42. * on the previous window. At this point, the same occurs for the
  43. * processor-local reader window ID, and once all processors have
  44. * acknowleged that flip, there can be no new reader linked to the previous
  45. * window. The RCU system then releases its own reference to the previous
  46. * window and waits for the window reference counter to drop to 0, indicating
  47. * that all readers linked to the previous window have left their read-side
  48. * critical section. When this global event occurs, processors are requested
  49. * to flush the works queued for the previous window, and once they all have
  50. * acknowleged their flush, the window ends and becomes inactive, allowing
  51. * a new grace period to occur later on.
  52. *
  53. * Here is an informal diagram describing this process :
  54. *
  55. * t ---->
  56. *
  57. * reader window flip ---+ +--- no more readers
  58. * work window flip ------+ | | +- works flushed
  59. * (grace period start) | | | | (grace period / window end)
  60. * v v v v
  61. * +--------------+-+-----+-+
  62. * | . . . |
  63. * | window 0 . . gp . |
  64. * | removal . . . | reclamation
  65. * +--------------+-+-----+-+-----+----+
  66. * | . |
  67. * | window 1 . gp |
  68. * | removal . | reclamation
  69. * +---------------+----+--------
  70. * |
  71. * | window 2 ...
  72. * |
  73. * +-------------
  74. *
  75. * On each processor, work window flips are separate from reader window
  76. * flips in order to correctly handle situations such as this one, where
  77. * "wf" denotes a window flip for both works and readers :
  78. *
  79. * t ---->
  80. *
  81. * CPU0 wf load flush
  82. * CPU1 wf flush
  83. * global no-new-reader ... no-ref loaded value now invalid
  84. *
  85. * After its window flip, CPU0 may load data from the previous window with
  86. * a reader linked to the current window, because it doesn't know that there
  87. * may still be new works queued on the previous window.
  88. *
  89. * TODO Improve atomic acknowledgment scalability.
  90. * TODO Handle large amounts of deferred works.
  91. * TODO Priority boosting of slow readers.
  92. * TODO CPU registration for dyntick-friendly behavior.
  93. */
  94. #include <assert.h>
  95. #include <stdalign.h>
  96. #include <stdbool.h>
  97. #include <stddef.h>
  98. #include <stdio.h>
  99. #include <kern/atomic.h>
  100. #include <kern/clock.h>
  101. #include <kern/init.h>
  102. #include <kern/macros.h>
  103. #include <kern/rcu.h>
  104. #include <kern/panic.h>
  105. #include <kern/percpu.h>
  106. #include <kern/spinlock.h>
  107. #include <kern/sref.h>
  108. #include <kern/syscnt.h>
  109. #include <kern/thread.h>
  110. #include <kern/timer.h>
  111. #include <kern/work.h>
  112. #include <machine/cpu.h>
  113. /*
  114. * Negative close to 0 so that an overflow occurs early.
  115. */
  116. #define RCU_WINDOW_ID_INIT_VALUE ((unsigned int)-500)
  117. /*
  118. * Interval (in milliseconds) between window checking.
  119. *
  120. * When windows are checked, a flip occurs if the previous window isn't
  121. * active any more.
  122. */
  123. #define RCU_WINDOW_CHECK_INTERVAL CONFIG_RCU_WINDOW_CHECK_INTERVAL
  124. /*
  125. * Grace period states.
  126. *
  127. * These states are only used to trigger per-CPU processing that is
  128. * globally acknowleged by decrementing a global atomic counter. They
  129. * do not completely represent the actual state of a grace period.
  130. */
  131. enum rcu_gp_state {
  132. RCU_GP_STATE_WORK_WINDOW_FLIP,
  133. RCU_GP_STATE_READER_WINDOW_FLIP,
  134. RCU_GP_STATE_WORK_FLUSH,
  135. };
  136. /*
  137. * Per-CPU view of a window.
  138. *
  139. * Deferred works are scheduled when the window ends.
  140. */
  141. struct rcu_cpu_window {
  142. struct work_queue works;
  143. };
  144. /*
  145. * Per-CPU RCU data.
  146. *
  147. * Each processor maintains two local window IDs. One is used as the current
  148. * window ID when deferring work, the other when detecting a reader. A local
  149. * flip occurs when a processor notices that the global grace period state
  150. * no longer matches the local grace period state. These checks only occur
  151. * on periodic events.
  152. *
  153. * Interrupts and preemption must be disabled when accessing local CPU data.
  154. */
  155. struct rcu_cpu_data {
  156. enum rcu_gp_state gp_state;
  157. unsigned int work_wid;
  158. unsigned int reader_wid;
  159. struct rcu_cpu_window windows[2];
  160. struct syscnt sc_nr_detected_readers;
  161. };
  162. /*
  163. * Global window.
  164. *
  165. * A window is a time range that tracks read-side references. Conceptually,
  166. * each reader adds a reference to the current window. In practice, references
  167. * are only added when readers are detected, which occurs on a context switch
  168. * (to track preempted threads) or a reader window flip (to prevent currently
  169. * running readers to be linked to the next window).
  170. *
  171. * When a window is started, its scalable reference counter is initialized
  172. * with a reference owned by the RCU system. That reference guarantees that
  173. * the window remains active as long as new readers may add references,
  174. * since it prevents the counter from dropping to 0. After a reader window
  175. * flip, there may not be new references to the window, and the initial
  176. * reference is dropped, allowing the counter to reach 0 once all detected
  177. * readers leave their critical section and unreference the window they're
  178. * linked to.
  179. */
  180. struct rcu_window {
  181. struct sref_counter nr_refs;
  182. uint64_t start_ts;
  183. bool active;
  184. };
  185. /*
  186. * Global data.
  187. *
  188. * Processors regularly check the grace period state against their own,
  189. * locally cached grace period state, and take action whenever they differ.
  190. * False sharing is avoided by making the global grace period state fill an
  191. * entire cache line on SMP.
  192. *
  193. * After processors notice a grace period state change, they acknowledge
  194. * noticing this change by decrementing the atomic acknowledgment counter,
  195. * which also fills a complete cache line on SMP in order to restrict cache
  196. * line bouncing. Atomic operations on this counter are done with
  197. * acquire-release ordering to enforce the memory ordering guarantees
  198. * required by the implementation, as well as those provided by the public
  199. * interface.
  200. *
  201. * In addition to the global window ID and the windows themselves, the data
  202. * include a timer, used to trigger the end of windows, i.e. grace periods.
  203. * Since the timer function, atomic acknowledgments, and window no-reference
  204. * function chain each other, there is currently no need for a global lock.
  205. */
  206. struct rcu_data {
  207. struct {
  208. alignas(CPU_L1_SIZE) enum rcu_gp_state gp_state;
  209. };
  210. struct {
  211. alignas(CPU_L1_SIZE) unsigned int nr_acks;
  212. };
  213. unsigned int wid;
  214. struct rcu_window windows[2];
  215. struct timer timer;
  216. struct syscnt sc_nr_windows;
  217. struct syscnt sc_last_window_ms;
  218. struct syscnt sc_longest_window_ms;
  219. };
  220. /*
  221. * Structure used to implement rcu_wait().
  222. */
  223. struct rcu_waiter {
  224. struct work work;
  225. struct spinlock lock;
  226. struct thread *thread;
  227. bool done;
  228. };
  229. static struct rcu_data rcu_data;
  230. static struct rcu_cpu_data rcu_cpu_data __percpu;
  231. static struct rcu_cpu_data *
  232. rcu_get_cpu_data(void)
  233. {
  234. assert(!cpu_intr_enabled());
  235. assert(!thread_preempt_enabled());
  236. return cpu_local_ptr(rcu_cpu_data);
  237. }
  238. static enum rcu_gp_state
  239. rcu_data_get_gp_state(const struct rcu_data *data)
  240. {
  241. return data->gp_state;
  242. }
  243. static unsigned int
  244. rcu_data_get_wid(const struct rcu_data *data)
  245. {
  246. return data->wid;
  247. }
  248. static struct rcu_window *
  249. rcu_data_get_window_from_index(struct rcu_data *data, size_t index)
  250. {
  251. assert(index < ARRAY_SIZE(data->windows));
  252. return &data->windows[index];
  253. }
  254. static struct rcu_window *
  255. rcu_data_get_window(struct rcu_data *data, unsigned int wid)
  256. {
  257. return rcu_data_get_window_from_index(data, wid & 1);
  258. }
  259. static void
  260. rcu_data_update_gp_state(struct rcu_data *data, enum rcu_gp_state gp_state)
  261. {
  262. assert(data->nr_acks == 0);
  263. switch (gp_state) {
  264. case RCU_GP_STATE_WORK_WINDOW_FLIP:
  265. assert(data->gp_state == RCU_GP_STATE_WORK_FLUSH);
  266. break;
  267. case RCU_GP_STATE_READER_WINDOW_FLIP:
  268. assert(data->gp_state == RCU_GP_STATE_WORK_WINDOW_FLIP);
  269. break;
  270. case RCU_GP_STATE_WORK_FLUSH:
  271. assert(data->gp_state == RCU_GP_STATE_READER_WINDOW_FLIP);
  272. break;
  273. default:
  274. panic("rcu: invalid grace period state");
  275. }
  276. data->nr_acks = cpu_count();
  277. atomic_store(&data->gp_state, gp_state, ATOMIC_RELEASE);
  278. }
  279. static bool
  280. rcu_data_check_gp_state(const struct rcu_data *data,
  281. enum rcu_gp_state local_gp_state,
  282. enum rcu_gp_state *global_gp_state)
  283. {
  284. *global_gp_state = atomic_load(&data->gp_state, ATOMIC_RELAXED);
  285. if (unlikely(local_gp_state != *global_gp_state)) {
  286. atomic_fence(ATOMIC_ACQUIRE);
  287. return true;
  288. }
  289. return false;
  290. }
  291. static void
  292. rcu_window_end(struct rcu_window *window)
  293. {
  294. assert(window->active);
  295. window->active = false;
  296. }
  297. static void
  298. rcu_window_ref(struct rcu_window *window)
  299. {
  300. sref_counter_inc(&window->nr_refs);
  301. }
  302. static void
  303. rcu_window_unref(struct rcu_window *window)
  304. {
  305. sref_counter_dec(&window->nr_refs);
  306. }
  307. static uint64_t
  308. rcu_window_get_start_ts(const struct rcu_window *window)
  309. {
  310. return window->start_ts;
  311. }
  312. static void
  313. rcu_window_flush(struct sref_counter *counter)
  314. {
  315. (void)counter;
  316. rcu_data_update_gp_state(&rcu_data, RCU_GP_STATE_WORK_FLUSH);
  317. }
  318. static void __init
  319. rcu_window_init(struct rcu_window *window)
  320. {
  321. window->active = false;
  322. }
  323. static void
  324. rcu_window_start(struct rcu_window *window)
  325. {
  326. assert(!window->active);
  327. sref_counter_init(&window->nr_refs, 1, NULL, rcu_window_flush);
  328. window->start_ts = clock_get_time();
  329. window->active = true;
  330. }
  331. static bool
  332. rcu_window_active(const struct rcu_window *window)
  333. {
  334. return window->active;
  335. }
  336. static void
  337. rcu_data_end_prev_window(struct rcu_data *data, uint64_t now)
  338. {
  339. struct rcu_window *window;
  340. uint64_t duration;
  341. window = rcu_data_get_window(data, data->wid - 1);
  342. duration = clock_ticks_to_ms(now - rcu_window_get_start_ts(window));
  343. syscnt_set(&data->sc_last_window_ms, duration);
  344. if (duration > syscnt_read(&data->sc_longest_window_ms)) {
  345. syscnt_set(&data->sc_longest_window_ms, duration);
  346. }
  347. rcu_window_end(window);
  348. }
  349. static void
  350. rcu_data_schedule_timer(struct rcu_data *data, uint64_t now)
  351. {
  352. uint64_t ticks;
  353. ticks = clock_ticks_from_ms(RCU_WINDOW_CHECK_INTERVAL);
  354. timer_schedule(&data->timer, now + ticks);
  355. }
  356. static void
  357. rcu_data_ack_cpu(struct rcu_data *data)
  358. {
  359. struct rcu_window *window;
  360. unsigned int prev_nr_acks;
  361. uint64_t now;
  362. prev_nr_acks = atomic_fetch_sub(&data->nr_acks, 1, ATOMIC_ACQ_REL);
  363. if (prev_nr_acks != 1) {
  364. assert(prev_nr_acks != 0);
  365. return;
  366. }
  367. switch (data->gp_state) {
  368. case RCU_GP_STATE_WORK_WINDOW_FLIP:
  369. rcu_data_update_gp_state(data, RCU_GP_STATE_READER_WINDOW_FLIP);
  370. break;
  371. case RCU_GP_STATE_READER_WINDOW_FLIP:
  372. window = rcu_data_get_window(data, data->wid - 1);
  373. rcu_window_unref(window);
  374. break;
  375. case RCU_GP_STATE_WORK_FLUSH:
  376. now = clock_get_time();
  377. rcu_data_end_prev_window(data, now);
  378. rcu_data_schedule_timer(data, now);
  379. break;
  380. default:
  381. panic("rcu: invalid grace period state");
  382. }
  383. }
  384. static bool
  385. rcu_data_flip_windows(struct rcu_data *data)
  386. {
  387. struct rcu_window *window;
  388. window = rcu_data_get_window(data, data->wid - 1);
  389. if (rcu_window_active(window)) {
  390. return false;
  391. }
  392. rcu_window_start(window);
  393. syscnt_inc(&data->sc_nr_windows);
  394. data->wid++;
  395. rcu_data_update_gp_state(data, RCU_GP_STATE_WORK_WINDOW_FLIP);
  396. return true;
  397. }
  398. static void
  399. rcu_data_check_windows(struct timer *timer)
  400. {
  401. struct rcu_data *data;
  402. bool flipped;
  403. data = &rcu_data;
  404. flipped = rcu_data_flip_windows(data);
  405. if (!flipped) {
  406. rcu_data_schedule_timer(data, timer_get_time(timer));
  407. }
  408. }
  409. static void __init
  410. rcu_data_init(struct rcu_data *data)
  411. {
  412. data->gp_state = RCU_GP_STATE_WORK_FLUSH;
  413. data->nr_acks = 0;
  414. data->wid = RCU_WINDOW_ID_INIT_VALUE;
  415. for (size_t i = 0; i < ARRAY_SIZE(data->windows); i++) {
  416. rcu_window_init(rcu_data_get_window_from_index(data, i));
  417. }
  418. rcu_window_start(rcu_data_get_window(data, data->wid));
  419. timer_init(&data->timer, rcu_data_check_windows, 0);
  420. rcu_data_schedule_timer(data, clock_get_time());
  421. syscnt_register(&data->sc_nr_windows, "rcu_nr_windows");
  422. syscnt_register(&data->sc_last_window_ms, "rcu_last_window_ms");
  423. syscnt_register(&data->sc_longest_window_ms, "rcu_longest_window_ms");
  424. }
  425. static void __init
  426. rcu_cpu_window_init(struct rcu_cpu_window *cpu_window)
  427. {
  428. work_queue_init(&cpu_window->works);
  429. }
  430. static void
  431. rcu_cpu_window_queue(struct rcu_cpu_window *cpu_window, struct work *work)
  432. {
  433. work_queue_push(&cpu_window->works, work);
  434. }
  435. static void
  436. rcu_cpu_window_flush(struct rcu_cpu_window *cpu_window)
  437. {
  438. work_queue_schedule(&cpu_window->works, 0);
  439. work_queue_init(&cpu_window->works);
  440. }
  441. static unsigned int
  442. rcu_cpu_data_get_reader_wid(const struct rcu_cpu_data *cpu_data)
  443. {
  444. return cpu_data->reader_wid;
  445. }
  446. static struct rcu_cpu_window *
  447. rcu_cpu_data_get_window_from_index(struct rcu_cpu_data *cpu_data, size_t index)
  448. {
  449. assert(index < ARRAY_SIZE(cpu_data->windows));
  450. return &cpu_data->windows[index];
  451. }
  452. static struct rcu_cpu_window *
  453. rcu_cpu_data_get_window(struct rcu_cpu_data *cpu_data, unsigned int wid)
  454. {
  455. return rcu_cpu_data_get_window_from_index(cpu_data, wid & 1);
  456. }
  457. static void __init
  458. rcu_cpu_data_init(struct rcu_cpu_data *cpu_data, unsigned int cpu)
  459. {
  460. struct rcu_data *data;
  461. char name[SYSCNT_NAME_SIZE];
  462. data = &rcu_data;
  463. cpu_data->gp_state = rcu_data_get_gp_state(data);
  464. cpu_data->work_wid = rcu_data_get_wid(data);
  465. cpu_data->reader_wid = cpu_data->work_wid;
  466. for (size_t i = 0; i < ARRAY_SIZE(cpu_data->windows); i++) {
  467. rcu_cpu_window_init(rcu_cpu_data_get_window_from_index(cpu_data, i));
  468. }
  469. snprintf(name, sizeof(name), "rcu_nr_detected_readers/%u", cpu);
  470. syscnt_register(&cpu_data->sc_nr_detected_readers, name);
  471. }
  472. static void
  473. rcu_cpu_data_queue(struct rcu_cpu_data *cpu_data, struct work *work)
  474. {
  475. struct rcu_cpu_window *cpu_window;
  476. cpu_window = rcu_cpu_data_get_window(cpu_data, cpu_data->work_wid);
  477. rcu_cpu_window_queue(cpu_window, work);
  478. }
  479. static void
  480. rcu_cpu_data_flush(struct rcu_cpu_data *cpu_data)
  481. {
  482. struct rcu_cpu_window *cpu_window;
  483. assert(cpu_data->work_wid == cpu_data->reader_wid);
  484. cpu_window = rcu_cpu_data_get_window(cpu_data, cpu_data->work_wid - 1);
  485. rcu_cpu_window_flush(cpu_window);
  486. }
  487. void
  488. rcu_reader_init(struct rcu_reader *reader)
  489. {
  490. reader->level = 0;
  491. reader->linked = false;
  492. }
  493. static void
  494. rcu_reader_link(struct rcu_reader *reader, struct rcu_cpu_data *cpu_data)
  495. {
  496. assert(!cpu_intr_enabled());
  497. assert(reader == thread_rcu_reader(thread_self()));
  498. assert(!rcu_reader_linked(reader));
  499. reader->wid = rcu_cpu_data_get_reader_wid(cpu_data);
  500. reader->linked = true;
  501. }
  502. static void
  503. rcu_reader_unlink(struct rcu_reader *reader)
  504. {
  505. assert(reader->level == 0);
  506. reader->linked = false;
  507. }
  508. static void
  509. rcu_reader_enter(struct rcu_reader *reader, struct rcu_cpu_data *cpu_data)
  510. {
  511. struct rcu_window *window;
  512. struct rcu_data *data;
  513. unsigned int wid;
  514. if (rcu_reader_linked(reader)) {
  515. return;
  516. }
  517. data = &rcu_data;
  518. wid = rcu_cpu_data_get_reader_wid(cpu_data);
  519. window = rcu_data_get_window(data, wid);
  520. rcu_reader_link(reader, cpu_data);
  521. rcu_window_ref(window);
  522. syscnt_inc(&cpu_data->sc_nr_detected_readers);
  523. }
  524. void
  525. rcu_reader_leave(struct rcu_reader *reader)
  526. {
  527. struct rcu_window *window;
  528. struct rcu_data *data;
  529. data = &rcu_data;
  530. window = rcu_data_get_window(data, reader->wid);
  531. rcu_window_unref(window);
  532. rcu_reader_unlink(reader);
  533. }
  534. static void
  535. rcu_reader_account(struct rcu_reader *reader, struct rcu_cpu_data *cpu_data)
  536. {
  537. if (rcu_reader_in_cs(reader)) {
  538. rcu_reader_enter(reader, cpu_data);
  539. }
  540. }
  541. static void
  542. rcu_cpu_data_flip_work_wid(struct rcu_cpu_data *cpu_data)
  543. {
  544. assert(!cpu_intr_enabled());
  545. assert(!thread_preempt_enabled());
  546. cpu_data->work_wid++;
  547. }
  548. static void
  549. rcu_cpu_data_flip_reader_wid(struct rcu_cpu_data *cpu_data)
  550. {
  551. assert(!cpu_intr_enabled());
  552. assert(!thread_preempt_enabled());
  553. rcu_reader_account(thread_rcu_reader(thread_self()), cpu_data);
  554. cpu_data->reader_wid++;
  555. }
  556. static void
  557. rcu_cpu_data_check_gp_state(struct rcu_cpu_data *cpu_data)
  558. {
  559. enum rcu_gp_state local_gp_state, global_gp_state;
  560. struct rcu_data *data;
  561. bool diff;
  562. data = &rcu_data;
  563. /*
  564. * A loop is used to optimize the case where a processor is the last to
  565. * acknowledge a grace period state change, in which case the latter
  566. * also immediately changes and can be acknowleged right away. As a
  567. * result, this loop may never run more than twice.
  568. */
  569. for (unsigned int i = 0; /* no condition */; i++) {
  570. local_gp_state = cpu_data->gp_state;
  571. diff = rcu_data_check_gp_state(data, local_gp_state, &global_gp_state);
  572. if (!diff) {
  573. break;
  574. }
  575. assert(i < 2);
  576. switch (global_gp_state) {
  577. case RCU_GP_STATE_WORK_WINDOW_FLIP:
  578. rcu_cpu_data_flip_work_wid(cpu_data);
  579. rcu_data_ack_cpu(data);
  580. break;
  581. case RCU_GP_STATE_READER_WINDOW_FLIP:
  582. rcu_cpu_data_flip_reader_wid(cpu_data);
  583. rcu_data_ack_cpu(data);
  584. break;
  585. case RCU_GP_STATE_WORK_FLUSH:
  586. rcu_cpu_data_flush(cpu_data);
  587. rcu_data_ack_cpu(data);
  588. break;
  589. default:
  590. panic("rcu: invalid grace period state");
  591. }
  592. cpu_data->gp_state = global_gp_state;
  593. }
  594. }
  595. void
  596. rcu_report_context_switch(struct rcu_reader *reader)
  597. {
  598. assert(!cpu_intr_enabled());
  599. assert(!thread_preempt_enabled());
  600. /*
  601. * Most readers don't need to be accounted for because their execution
  602. * doesn't overlap with a grace period. If a reader is preempted however,
  603. * it must be accounted in case a grace period starts while the reader
  604. * is preempted. Accounting also occurs when a grace period starts, and
  605. * more exactly, when the reader window ID of a processor is flipped.
  606. */
  607. rcu_reader_account(reader, rcu_get_cpu_data());
  608. }
  609. void
  610. rcu_report_periodic_event(void)
  611. {
  612. assert(!cpu_intr_enabled());
  613. assert(!thread_preempt_enabled());
  614. rcu_cpu_data_check_gp_state(rcu_get_cpu_data());
  615. }
  616. void
  617. rcu_defer(struct work *work)
  618. {
  619. struct rcu_cpu_data *cpu_data;
  620. unsigned long flags;
  621. assert(!rcu_reader_in_cs(thread_rcu_reader(thread_self())));
  622. thread_preempt_disable_intr_save(&flags);
  623. cpu_data = rcu_get_cpu_data();
  624. rcu_cpu_data_queue(cpu_data, work);
  625. thread_preempt_enable_intr_restore(flags);
  626. }
  627. static void
  628. rcu_waiter_wakeup(struct work *work)
  629. {
  630. struct rcu_waiter *waiter;
  631. waiter = structof(work, struct rcu_waiter, work);
  632. spinlock_lock(&waiter->lock);
  633. waiter->done = true;
  634. thread_wakeup(waiter->thread);
  635. spinlock_unlock(&waiter->lock);
  636. }
  637. static void
  638. rcu_waiter_init(struct rcu_waiter *waiter, struct thread *thread)
  639. {
  640. work_init(&waiter->work, rcu_waiter_wakeup);
  641. spinlock_init(&waiter->lock);
  642. waiter->thread = thread;
  643. waiter->done = false;
  644. }
  645. static void
  646. rcu_waiter_wait(struct rcu_waiter *waiter)
  647. {
  648. rcu_defer(&waiter->work);
  649. spinlock_lock(&waiter->lock);
  650. while (!waiter->done) {
  651. thread_sleep(&waiter->lock, waiter, "rcu_wait");
  652. }
  653. spinlock_unlock(&waiter->lock);
  654. }
  655. void
  656. rcu_wait(void)
  657. {
  658. struct rcu_waiter waiter;
  659. rcu_waiter_init(&waiter, thread_self()),
  660. rcu_waiter_wait(&waiter);
  661. }
  662. static int __init
  663. rcu_bootstrap(void)
  664. {
  665. rcu_data_init(&rcu_data);
  666. rcu_cpu_data_init(cpu_local_ptr(rcu_cpu_data), 0);
  667. return 0;
  668. }
  669. INIT_OP_DEFINE(rcu_bootstrap,
  670. INIT_OP_DEP(spinlock_setup, true),
  671. INIT_OP_DEP(sref_bootstrap, true),
  672. INIT_OP_DEP(syscnt_setup, true),
  673. INIT_OP_DEP(thread_bootstrap, true),
  674. INIT_OP_DEP(timer_bootstrap, true));
  675. static int __init
  676. rcu_setup(void)
  677. {
  678. for (unsigned int i = 1; i < cpu_count(); i++) {
  679. rcu_cpu_data_init(percpu_ptr(rcu_cpu_data, i), i);
  680. }
  681. return 0;
  682. }
  683. INIT_OP_DEFINE(rcu_setup,
  684. INIT_OP_DEP(cpu_mp_probe, true),
  685. INIT_OP_DEP(rcu_bootstrap, true));