tree_exp.h 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812
  1. /*
  2. * RCU expedited grace periods
  3. *
  4. * This program is free software; you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation; either version 2 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, you can access it online at
  16. * http://www.gnu.org/licenses/gpl-2.0.html.
  17. *
  18. * Copyright IBM Corporation, 2016
  19. *
  20. * Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  21. */
  22. #include <linux/lockdep.h>
  23. /*
  24. * Record the start of an expedited grace period.
  25. */
  26. static void rcu_exp_gp_seq_start(struct rcu_state *rsp)
  27. {
  28. rcu_seq_start(&rsp->expedited_sequence);
  29. }
  30. /*
  31. * Return then value that expedited-grace-period counter will have
  32. * at the end of the current grace period.
  33. */
  34. static __maybe_unused unsigned long rcu_exp_gp_seq_endval(struct rcu_state *rsp)
  35. {
  36. return rcu_seq_endval(&rsp->expedited_sequence);
  37. }
  38. /*
  39. * Record the end of an expedited grace period.
  40. */
  41. static void rcu_exp_gp_seq_end(struct rcu_state *rsp)
  42. {
  43. rcu_seq_end(&rsp->expedited_sequence);
  44. smp_mb(); /* Ensure that consecutive grace periods serialize. */
  45. }
  46. /*
  47. * Take a snapshot of the expedited-grace-period counter.
  48. */
  49. static unsigned long rcu_exp_gp_seq_snap(struct rcu_state *rsp)
  50. {
  51. unsigned long s;
  52. smp_mb(); /* Caller's modifications seen first by other CPUs. */
  53. s = rcu_seq_snap(&rsp->expedited_sequence);
  54. trace_rcu_exp_grace_period(rsp->name, s, TPS("snap"));
  55. return s;
  56. }
  57. /*
  58. * Given a counter snapshot from rcu_exp_gp_seq_snap(), return true
  59. * if a full expedited grace period has elapsed since that snapshot
  60. * was taken.
  61. */
  62. static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s)
  63. {
  64. return rcu_seq_done(&rsp->expedited_sequence, s);
  65. }
  66. /*
  67. * Reset the ->expmaskinit values in the rcu_node tree to reflect any
  68. * recent CPU-online activity. Note that these masks are not cleared
  69. * when CPUs go offline, so they reflect the union of all CPUs that have
  70. * ever been online. This means that this function normally takes its
  71. * no-work-to-do fastpath.
  72. */
  73. static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp)
  74. {
  75. bool done;
  76. unsigned long flags;
  77. unsigned long mask;
  78. unsigned long oldmask;
  79. int ncpus = smp_load_acquire(&rsp->ncpus); /* Order against locking. */
  80. struct rcu_node *rnp;
  81. struct rcu_node *rnp_up;
  82. /* If no new CPUs onlined since last time, nothing to do. */
  83. if (likely(ncpus == rsp->ncpus_snap))
  84. return;
  85. rsp->ncpus_snap = ncpus;
  86. /*
  87. * Each pass through the following loop propagates newly onlined
  88. * CPUs for the current rcu_node structure up the rcu_node tree.
  89. */
  90. rcu_for_each_leaf_node(rsp, rnp) {
  91. raw_spin_lock_irqsave_rcu_node(rnp, flags);
  92. if (rnp->expmaskinit == rnp->expmaskinitnext) {
  93. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  94. continue; /* No new CPUs, nothing to do. */
  95. }
  96. /* Update this node's mask, track old value for propagation. */
  97. oldmask = rnp->expmaskinit;
  98. rnp->expmaskinit = rnp->expmaskinitnext;
  99. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  100. /* If was already nonzero, nothing to propagate. */
  101. if (oldmask)
  102. continue;
  103. /* Propagate the new CPU up the tree. */
  104. mask = rnp->grpmask;
  105. rnp_up = rnp->parent;
  106. done = false;
  107. while (rnp_up) {
  108. raw_spin_lock_irqsave_rcu_node(rnp_up, flags);
  109. if (rnp_up->expmaskinit)
  110. done = true;
  111. rnp_up->expmaskinit |= mask;
  112. raw_spin_unlock_irqrestore_rcu_node(rnp_up, flags);
  113. if (done)
  114. break;
  115. mask = rnp_up->grpmask;
  116. rnp_up = rnp_up->parent;
  117. }
  118. }
  119. }
  120. /*
  121. * Reset the ->expmask values in the rcu_node tree in preparation for
  122. * a new expedited grace period.
  123. */
  124. static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp)
  125. {
  126. unsigned long flags;
  127. struct rcu_node *rnp;
  128. sync_exp_reset_tree_hotplug(rsp);
  129. rcu_for_each_node_breadth_first(rsp, rnp) {
  130. raw_spin_lock_irqsave_rcu_node(rnp, flags);
  131. WARN_ON_ONCE(rnp->expmask);
  132. rnp->expmask = rnp->expmaskinit;
  133. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  134. }
  135. }
  136. /*
  137. * Return non-zero if there is no RCU expedited grace period in progress
  138. * for the specified rcu_node structure, in other words, if all CPUs and
  139. * tasks covered by the specified rcu_node structure have done their bit
  140. * for the current expedited grace period. Works only for preemptible
  141. * RCU -- other RCU implementation use other means.
  142. *
  143. * Caller must hold the specificed rcu_node structure's ->lock
  144. */
  145. static bool sync_rcu_preempt_exp_done(struct rcu_node *rnp)
  146. {
  147. raw_lockdep_assert_held_rcu_node(rnp);
  148. return rnp->exp_tasks == NULL &&
  149. READ_ONCE(rnp->expmask) == 0;
  150. }
  151. /*
  152. * Like sync_rcu_preempt_exp_done(), but this function assumes the caller
  153. * doesn't hold the rcu_node's ->lock, and will acquire and release the lock
  154. * itself
  155. */
  156. static bool sync_rcu_preempt_exp_done_unlocked(struct rcu_node *rnp)
  157. {
  158. unsigned long flags;
  159. bool ret;
  160. raw_spin_lock_irqsave_rcu_node(rnp, flags);
  161. ret = sync_rcu_preempt_exp_done(rnp);
  162. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  163. return ret;
  164. }
  165. /*
  166. * Report the exit from RCU read-side critical section for the last task
  167. * that queued itself during or before the current expedited preemptible-RCU
  168. * grace period. This event is reported either to the rcu_node structure on
  169. * which the task was queued or to one of that rcu_node structure's ancestors,
  170. * recursively up the tree. (Calm down, calm down, we do the recursion
  171. * iteratively!)
  172. *
  173. * Caller must hold the specified rcu_node structure's ->lock.
  174. */
  175. static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
  176. bool wake, unsigned long flags)
  177. __releases(rnp->lock)
  178. {
  179. unsigned long mask;
  180. for (;;) {
  181. if (!sync_rcu_preempt_exp_done(rnp)) {
  182. if (!rnp->expmask)
  183. rcu_initiate_boost(rnp, flags);
  184. else
  185. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  186. break;
  187. }
  188. if (rnp->parent == NULL) {
  189. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  190. if (wake) {
  191. smp_mb(); /* EGP done before wake_up(). */
  192. swake_up_one(&rsp->expedited_wq);
  193. }
  194. break;
  195. }
  196. mask = rnp->grpmask;
  197. raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled */
  198. rnp = rnp->parent;
  199. raw_spin_lock_rcu_node(rnp); /* irqs already disabled */
  200. WARN_ON_ONCE(!(rnp->expmask & mask));
  201. rnp->expmask &= ~mask;
  202. }
  203. }
  204. /*
  205. * Report expedited quiescent state for specified node. This is a
  206. * lock-acquisition wrapper function for __rcu_report_exp_rnp().
  207. */
  208. static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,
  209. struct rcu_node *rnp, bool wake)
  210. {
  211. unsigned long flags;
  212. raw_spin_lock_irqsave_rcu_node(rnp, flags);
  213. __rcu_report_exp_rnp(rsp, rnp, wake, flags);
  214. }
  215. /*
  216. * Report expedited quiescent state for multiple CPUs, all covered by the
  217. * specified leaf rcu_node structure.
  218. */
  219. static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp,
  220. unsigned long mask, bool wake)
  221. {
  222. unsigned long flags;
  223. raw_spin_lock_irqsave_rcu_node(rnp, flags);
  224. if (!(rnp->expmask & mask)) {
  225. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  226. return;
  227. }
  228. rnp->expmask &= ~mask;
  229. __rcu_report_exp_rnp(rsp, rnp, wake, flags); /* Releases rnp->lock. */
  230. }
  231. /*
  232. * Report expedited quiescent state for specified rcu_data (CPU).
  233. */
  234. static void rcu_report_exp_rdp(struct rcu_state *rsp, struct rcu_data *rdp,
  235. bool wake)
  236. {
  237. rcu_report_exp_cpu_mult(rsp, rdp->mynode, rdp->grpmask, wake);
  238. }
  239. /* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */
  240. static bool sync_exp_work_done(struct rcu_state *rsp, unsigned long s)
  241. {
  242. if (rcu_exp_gp_seq_done(rsp, s)) {
  243. trace_rcu_exp_grace_period(rsp->name, s, TPS("done"));
  244. /* Ensure test happens before caller kfree(). */
  245. smp_mb__before_atomic(); /* ^^^ */
  246. return true;
  247. }
  248. return false;
  249. }
  250. /*
  251. * Funnel-lock acquisition for expedited grace periods. Returns true
  252. * if some other task completed an expedited grace period that this task
  253. * can piggy-back on, and with no mutex held. Otherwise, returns false
  254. * with the mutex held, indicating that the caller must actually do the
  255. * expedited grace period.
  256. */
  257. static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
  258. {
  259. struct rcu_data *rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
  260. struct rcu_node *rnp = rdp->mynode;
  261. struct rcu_node *rnp_root = rcu_get_root(rsp);
  262. /* Low-contention fastpath. */
  263. if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s) &&
  264. (rnp == rnp_root ||
  265. ULONG_CMP_LT(READ_ONCE(rnp_root->exp_seq_rq), s)) &&
  266. mutex_trylock(&rsp->exp_mutex))
  267. goto fastpath;
  268. /*
  269. * Each pass through the following loop works its way up
  270. * the rcu_node tree, returning if others have done the work or
  271. * otherwise falls through to acquire rsp->exp_mutex. The mapping
  272. * from CPU to rcu_node structure can be inexact, as it is just
  273. * promoting locality and is not strictly needed for correctness.
  274. */
  275. for (; rnp != NULL; rnp = rnp->parent) {
  276. if (sync_exp_work_done(rsp, s))
  277. return true;
  278. /* Work not done, either wait here or go up. */
  279. spin_lock(&rnp->exp_lock);
  280. if (ULONG_CMP_GE(rnp->exp_seq_rq, s)) {
  281. /* Someone else doing GP, so wait for them. */
  282. spin_unlock(&rnp->exp_lock);
  283. trace_rcu_exp_funnel_lock(rsp->name, rnp->level,
  284. rnp->grplo, rnp->grphi,
  285. TPS("wait"));
  286. wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
  287. sync_exp_work_done(rsp, s));
  288. return true;
  289. }
  290. rnp->exp_seq_rq = s; /* Followers can wait on us. */
  291. spin_unlock(&rnp->exp_lock);
  292. trace_rcu_exp_funnel_lock(rsp->name, rnp->level, rnp->grplo,
  293. rnp->grphi, TPS("nxtlvl"));
  294. }
  295. mutex_lock(&rsp->exp_mutex);
  296. fastpath:
  297. if (sync_exp_work_done(rsp, s)) {
  298. mutex_unlock(&rsp->exp_mutex);
  299. return true;
  300. }
  301. rcu_exp_gp_seq_start(rsp);
  302. trace_rcu_exp_grace_period(rsp->name, s, TPS("start"));
  303. return false;
  304. }
  305. /* Invoked on each online non-idle CPU for expedited quiescent state. */
  306. static void sync_sched_exp_handler(void *data)
  307. {
  308. struct rcu_data *rdp;
  309. struct rcu_node *rnp;
  310. struct rcu_state *rsp = data;
  311. rdp = this_cpu_ptr(rsp->rda);
  312. rnp = rdp->mynode;
  313. if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
  314. __this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))
  315. return;
  316. if (rcu_is_cpu_rrupt_from_idle()) {
  317. rcu_report_exp_rdp(&rcu_sched_state,
  318. this_cpu_ptr(&rcu_sched_data), true);
  319. return;
  320. }
  321. __this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, true);
  322. /* Store .exp before .rcu_urgent_qs. */
  323. smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true);
  324. resched_cpu(smp_processor_id());
  325. }
  326. /* Send IPI for expedited cleanup if needed at end of CPU-hotplug operation. */
  327. static void sync_sched_exp_online_cleanup(int cpu)
  328. {
  329. struct rcu_data *rdp;
  330. int ret;
  331. struct rcu_node *rnp;
  332. struct rcu_state *rsp = &rcu_sched_state;
  333. rdp = per_cpu_ptr(rsp->rda, cpu);
  334. rnp = rdp->mynode;
  335. if (!(READ_ONCE(rnp->expmask) & rdp->grpmask))
  336. return;
  337. ret = smp_call_function_single(cpu, sync_sched_exp_handler, rsp, 0);
  338. WARN_ON_ONCE(ret);
  339. }
  340. /*
  341. * Select the CPUs within the specified rcu_node that the upcoming
  342. * expedited grace period needs to wait for.
  343. */
  344. static void sync_rcu_exp_select_node_cpus(struct work_struct *wp)
  345. {
  346. int cpu;
  347. unsigned long flags;
  348. smp_call_func_t func;
  349. unsigned long mask_ofl_test;
  350. unsigned long mask_ofl_ipi;
  351. int ret;
  352. struct rcu_exp_work *rewp =
  353. container_of(wp, struct rcu_exp_work, rew_work);
  354. struct rcu_node *rnp = container_of(rewp, struct rcu_node, rew);
  355. struct rcu_state *rsp = rewp->rew_rsp;
  356. func = rewp->rew_func;
  357. raw_spin_lock_irqsave_rcu_node(rnp, flags);
  358. /* Each pass checks a CPU for identity, offline, and idle. */
  359. mask_ofl_test = 0;
  360. for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
  361. unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
  362. struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
  363. struct rcu_dynticks *rdtp = per_cpu_ptr(&rcu_dynticks, cpu);
  364. int snap;
  365. if (raw_smp_processor_id() == cpu ||
  366. !(rnp->qsmaskinitnext & mask)) {
  367. mask_ofl_test |= mask;
  368. } else {
  369. snap = rcu_dynticks_snap(rdtp);
  370. if (rcu_dynticks_in_eqs(snap))
  371. mask_ofl_test |= mask;
  372. else
  373. rdp->exp_dynticks_snap = snap;
  374. }
  375. }
  376. mask_ofl_ipi = rnp->expmask & ~mask_ofl_test;
  377. /*
  378. * Need to wait for any blocked tasks as well. Note that
  379. * additional blocking tasks will also block the expedited GP
  380. * until such time as the ->expmask bits are cleared.
  381. */
  382. if (rcu_preempt_has_tasks(rnp))
  383. rnp->exp_tasks = rnp->blkd_tasks.next;
  384. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  385. /* IPI the remaining CPUs for expedited quiescent state. */
  386. for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
  387. unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
  388. struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
  389. if (!(mask_ofl_ipi & mask))
  390. continue;
  391. retry_ipi:
  392. if (rcu_dynticks_in_eqs_since(rdp->dynticks,
  393. rdp->exp_dynticks_snap)) {
  394. mask_ofl_test |= mask;
  395. continue;
  396. }
  397. ret = smp_call_function_single(cpu, func, rsp, 0);
  398. if (!ret) {
  399. mask_ofl_ipi &= ~mask;
  400. continue;
  401. }
  402. /* Failed, raced with CPU hotplug operation. */
  403. raw_spin_lock_irqsave_rcu_node(rnp, flags);
  404. if ((rnp->qsmaskinitnext & mask) &&
  405. (rnp->expmask & mask)) {
  406. /* Online, so delay for a bit and try again. */
  407. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  408. trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("selectofl"));
  409. schedule_timeout_uninterruptible(1);
  410. goto retry_ipi;
  411. }
  412. /* CPU really is offline, so we can ignore it. */
  413. if (!(rnp->expmask & mask))
  414. mask_ofl_ipi &= ~mask;
  415. raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
  416. }
  417. /* Report quiescent states for those that went offline. */
  418. mask_ofl_test |= mask_ofl_ipi;
  419. if (mask_ofl_test)
  420. rcu_report_exp_cpu_mult(rsp, rnp, mask_ofl_test, false);
  421. }
  422. /*
  423. * Select the nodes that the upcoming expedited grace period needs
  424. * to wait for.
  425. */
  426. static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
  427. smp_call_func_t func)
  428. {
  429. int cpu;
  430. struct rcu_node *rnp;
  431. trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("reset"));
  432. sync_exp_reset_tree(rsp);
  433. trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("select"));
  434. /* Schedule work for each leaf rcu_node structure. */
  435. rcu_for_each_leaf_node(rsp, rnp) {
  436. rnp->exp_need_flush = false;
  437. if (!READ_ONCE(rnp->expmask))
  438. continue; /* Avoid early boot non-existent wq. */
  439. rnp->rew.rew_func = func;
  440. rnp->rew.rew_rsp = rsp;
  441. if (!READ_ONCE(rcu_par_gp_wq) ||
  442. rcu_scheduler_active != RCU_SCHEDULER_RUNNING ||
  443. rcu_is_last_leaf_node(rsp, rnp)) {
  444. /* No workqueues yet or last leaf, do direct call. */
  445. sync_rcu_exp_select_node_cpus(&rnp->rew.rew_work);
  446. continue;
  447. }
  448. INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus);
  449. preempt_disable();
  450. cpu = cpumask_next(rnp->grplo - 1, cpu_online_mask);
  451. /* If all offline, queue the work on an unbound CPU. */
  452. if (unlikely(cpu > rnp->grphi))
  453. cpu = WORK_CPU_UNBOUND;
  454. queue_work_on(cpu, rcu_par_gp_wq, &rnp->rew.rew_work);
  455. preempt_enable();
  456. rnp->exp_need_flush = true;
  457. }
  458. /* Wait for workqueue jobs (if any) to complete. */
  459. rcu_for_each_leaf_node(rsp, rnp)
  460. if (rnp->exp_need_flush)
  461. flush_work(&rnp->rew.rew_work);
  462. }
  463. static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
  464. {
  465. int cpu;
  466. unsigned long jiffies_stall;
  467. unsigned long jiffies_start;
  468. unsigned long mask;
  469. int ndetected;
  470. struct rcu_node *rnp;
  471. struct rcu_node *rnp_root = rcu_get_root(rsp);
  472. int ret;
  473. trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("startwait"));
  474. jiffies_stall = rcu_jiffies_till_stall_check();
  475. jiffies_start = jiffies;
  476. for (;;) {
  477. ret = swait_event_timeout_exclusive(
  478. rsp->expedited_wq,
  479. sync_rcu_preempt_exp_done_unlocked(rnp_root),
  480. jiffies_stall);
  481. if (ret > 0 || sync_rcu_preempt_exp_done_unlocked(rnp_root))
  482. return;
  483. WARN_ON(ret < 0); /* workqueues should not be signaled. */
  484. if (rcu_cpu_stall_suppress)
  485. continue;
  486. panic_on_rcu_stall();
  487. pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {",
  488. rsp->name);
  489. ndetected = 0;
  490. rcu_for_each_leaf_node(rsp, rnp) {
  491. ndetected += rcu_print_task_exp_stall(rnp);
  492. for_each_leaf_node_possible_cpu(rnp, cpu) {
  493. struct rcu_data *rdp;
  494. mask = leaf_node_cpu_bit(rnp, cpu);
  495. if (!(rnp->expmask & mask))
  496. continue;
  497. ndetected++;
  498. rdp = per_cpu_ptr(rsp->rda, cpu);
  499. pr_cont(" %d-%c%c%c", cpu,
  500. "O."[!!cpu_online(cpu)],
  501. "o."[!!(rdp->grpmask & rnp->expmaskinit)],
  502. "N."[!!(rdp->grpmask & rnp->expmaskinitnext)]);
  503. }
  504. }
  505. pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
  506. jiffies - jiffies_start, rsp->expedited_sequence,
  507. rnp_root->expmask, ".T"[!!rnp_root->exp_tasks]);
  508. if (ndetected) {
  509. pr_err("blocking rcu_node structures:");
  510. rcu_for_each_node_breadth_first(rsp, rnp) {
  511. if (rnp == rnp_root)
  512. continue; /* printed unconditionally */
  513. if (sync_rcu_preempt_exp_done_unlocked(rnp))
  514. continue;
  515. pr_cont(" l=%u:%d-%d:%#lx/%c",
  516. rnp->level, rnp->grplo, rnp->grphi,
  517. rnp->expmask,
  518. ".T"[!!rnp->exp_tasks]);
  519. }
  520. pr_cont("\n");
  521. }
  522. rcu_for_each_leaf_node(rsp, rnp) {
  523. for_each_leaf_node_possible_cpu(rnp, cpu) {
  524. mask = leaf_node_cpu_bit(rnp, cpu);
  525. if (!(rnp->expmask & mask))
  526. continue;
  527. dump_cpu_task(cpu);
  528. }
  529. }
  530. jiffies_stall = 3 * rcu_jiffies_till_stall_check() + 3;
  531. }
  532. }
  533. /*
  534. * Wait for the current expedited grace period to complete, and then
  535. * wake up everyone who piggybacked on the just-completed expedited
  536. * grace period. Also update all the ->exp_seq_rq counters as needed
  537. * in order to avoid counter-wrap problems.
  538. */
  539. static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s)
  540. {
  541. struct rcu_node *rnp;
  542. synchronize_sched_expedited_wait(rsp);
  543. rcu_exp_gp_seq_end(rsp);
  544. trace_rcu_exp_grace_period(rsp->name, s, TPS("end"));
  545. /*
  546. * Switch over to wakeup mode, allowing the next GP, but -only- the
  547. * next GP, to proceed.
  548. */
  549. mutex_lock(&rsp->exp_wake_mutex);
  550. rcu_for_each_node_breadth_first(rsp, rnp) {
  551. if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s)) {
  552. spin_lock(&rnp->exp_lock);
  553. /* Recheck, avoid hang in case someone just arrived. */
  554. if (ULONG_CMP_LT(rnp->exp_seq_rq, s))
  555. rnp->exp_seq_rq = s;
  556. spin_unlock(&rnp->exp_lock);
  557. }
  558. smp_mb(); /* All above changes before wakeup. */
  559. wake_up_all(&rnp->exp_wq[rcu_seq_ctr(rsp->expedited_sequence) & 0x3]);
  560. }
  561. trace_rcu_exp_grace_period(rsp->name, s, TPS("endwake"));
  562. mutex_unlock(&rsp->exp_wake_mutex);
  563. }
  564. /*
  565. * Common code to drive an expedited grace period forward, used by
  566. * workqueues and mid-boot-time tasks.
  567. */
  568. static void rcu_exp_sel_wait_wake(struct rcu_state *rsp,
  569. smp_call_func_t func, unsigned long s)
  570. {
  571. /* Initialize the rcu_node tree in preparation for the wait. */
  572. sync_rcu_exp_select_cpus(rsp, func);
  573. /* Wait and clean up, including waking everyone. */
  574. rcu_exp_wait_wake(rsp, s);
  575. }
  576. /*
  577. * Work-queue handler to drive an expedited grace period forward.
  578. */
  579. static void wait_rcu_exp_gp(struct work_struct *wp)
  580. {
  581. struct rcu_exp_work *rewp;
  582. rewp = container_of(wp, struct rcu_exp_work, rew_work);
  583. rcu_exp_sel_wait_wake(rewp->rew_rsp, rewp->rew_func, rewp->rew_s);
  584. }
  585. /*
  586. * Given an rcu_state pointer and a smp_call_function() handler, kick
  587. * off the specified flavor of expedited grace period.
  588. */
  589. static void _synchronize_rcu_expedited(struct rcu_state *rsp,
  590. smp_call_func_t func)
  591. {
  592. struct rcu_data *rdp;
  593. struct rcu_exp_work rew;
  594. struct rcu_node *rnp;
  595. unsigned long s;
  596. /* If expedited grace periods are prohibited, fall back to normal. */
  597. if (rcu_gp_is_normal()) {
  598. wait_rcu_gp(rsp->call);
  599. return;
  600. }
  601. /* Take a snapshot of the sequence number. */
  602. s = rcu_exp_gp_seq_snap(rsp);
  603. if (exp_funnel_lock(rsp, s))
  604. return; /* Someone else did our work for us. */
  605. /* Ensure that load happens before action based on it. */
  606. if (unlikely(rcu_scheduler_active == RCU_SCHEDULER_INIT)) {
  607. /* Direct call during scheduler init and early_initcalls(). */
  608. rcu_exp_sel_wait_wake(rsp, func, s);
  609. } else {
  610. /* Marshall arguments & schedule the expedited grace period. */
  611. rew.rew_func = func;
  612. rew.rew_rsp = rsp;
  613. rew.rew_s = s;
  614. INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp);
  615. queue_work(rcu_gp_wq, &rew.rew_work);
  616. }
  617. /* Wait for expedited grace period to complete. */
  618. rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
  619. rnp = rcu_get_root(rsp);
  620. wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
  621. sync_exp_work_done(rsp, s));
  622. smp_mb(); /* Workqueue actions happen before return. */
  623. /* Let the next expedited grace period start. */
  624. mutex_unlock(&rsp->exp_mutex);
  625. }
  626. /**
  627. * synchronize_sched_expedited - Brute-force RCU-sched grace period
  628. *
  629. * Wait for an RCU-sched grace period to elapse, but use a "big hammer"
  630. * approach to force the grace period to end quickly. This consumes
  631. * significant time on all CPUs and is unfriendly to real-time workloads,
  632. * so is thus not recommended for any sort of common-case code. In fact,
  633. * if you are using synchronize_sched_expedited() in a loop, please
  634. * restructure your code to batch your updates, and then use a single
  635. * synchronize_sched() instead.
  636. *
  637. * This implementation can be thought of as an application of sequence
  638. * locking to expedited grace periods, but using the sequence counter to
  639. * determine when someone else has already done the work instead of for
  640. * retrying readers.
  641. */
  642. void synchronize_sched_expedited(void)
  643. {
  644. struct rcu_state *rsp = &rcu_sched_state;
  645. RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
  646. lock_is_held(&rcu_lock_map) ||
  647. lock_is_held(&rcu_sched_lock_map),
  648. "Illegal synchronize_sched_expedited() in RCU read-side critical section");
  649. /* If only one CPU, this is automatically a grace period. */
  650. if (rcu_blocking_is_gp())
  651. return;
  652. _synchronize_rcu_expedited(rsp, sync_sched_exp_handler);
  653. }
  654. EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
  655. #ifdef CONFIG_PREEMPT_RCU
  656. /*
  657. * Remote handler for smp_call_function_single(). If there is an
  658. * RCU read-side critical section in effect, request that the
  659. * next rcu_read_unlock() record the quiescent state up the
  660. * ->expmask fields in the rcu_node tree. Otherwise, immediately
  661. * report the quiescent state.
  662. */
  663. static void sync_rcu_exp_handler(void *info)
  664. {
  665. struct rcu_data *rdp;
  666. struct rcu_state *rsp = info;
  667. struct task_struct *t = current;
  668. /*
  669. * Within an RCU read-side critical section, request that the next
  670. * rcu_read_unlock() report. Unless this RCU read-side critical
  671. * section has already blocked, in which case it is already set
  672. * up for the expedited grace period to wait on it.
  673. */
  674. if (t->rcu_read_lock_nesting > 0 &&
  675. !t->rcu_read_unlock_special.b.blocked) {
  676. t->rcu_read_unlock_special.b.exp_need_qs = true;
  677. return;
  678. }
  679. /*
  680. * We are either exiting an RCU read-side critical section (negative
  681. * values of t->rcu_read_lock_nesting) or are not in one at all
  682. * (zero value of t->rcu_read_lock_nesting). Or we are in an RCU
  683. * read-side critical section that blocked before this expedited
  684. * grace period started. Either way, we can immediately report
  685. * the quiescent state.
  686. */
  687. rdp = this_cpu_ptr(rsp->rda);
  688. rcu_report_exp_rdp(rsp, rdp, true);
  689. }
  690. /**
  691. * synchronize_rcu_expedited - Brute-force RCU grace period
  692. *
  693. * Wait for an RCU-preempt grace period, but expedite it. The basic
  694. * idea is to IPI all non-idle non-nohz online CPUs. The IPI handler
  695. * checks whether the CPU is in an RCU-preempt critical section, and
  696. * if so, it sets a flag that causes the outermost rcu_read_unlock()
  697. * to report the quiescent state. On the other hand, if the CPU is
  698. * not in an RCU read-side critical section, the IPI handler reports
  699. * the quiescent state immediately.
  700. *
  701. * Although this is a greate improvement over previous expedited
  702. * implementations, it is still unfriendly to real-time workloads, so is
  703. * thus not recommended for any sort of common-case code. In fact, if
  704. * you are using synchronize_rcu_expedited() in a loop, please restructure
  705. * your code to batch your updates, and then Use a single synchronize_rcu()
  706. * instead.
  707. */
  708. void synchronize_rcu_expedited(void)
  709. {
  710. struct rcu_state *rsp = rcu_state_p;
  711. RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
  712. lock_is_held(&rcu_lock_map) ||
  713. lock_is_held(&rcu_sched_lock_map),
  714. "Illegal synchronize_rcu_expedited() in RCU read-side critical section");
  715. if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
  716. return;
  717. _synchronize_rcu_expedited(rsp, sync_rcu_exp_handler);
  718. }
  719. EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
  720. #else /* #ifdef CONFIG_PREEMPT_RCU */
  721. /*
  722. * Wait for an rcu-preempt grace period, but make it happen quickly.
  723. * But because preemptible RCU does not exist, map to rcu-sched.
  724. */
  725. void synchronize_rcu_expedited(void)
  726. {
  727. synchronize_sched_expedited();
  728. }
  729. EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
  730. #endif /* #else #ifdef CONFIG_PREEMPT_RCU */