rstat.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419
  1. #include "cgroup-internal.h"
  2. #include <linux/sched/cputime.h>
  3. static DEFINE_SPINLOCK(cgroup_rstat_lock);
  4. static DEFINE_PER_CPU(raw_spinlock_t, cgroup_rstat_cpu_lock);
  5. static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu);
  6. static struct cgroup_rstat_cpu *cgroup_rstat_cpu(struct cgroup *cgrp, int cpu)
  7. {
  8. return per_cpu_ptr(cgrp->rstat_cpu, cpu);
  9. }
  10. /**
  11. * cgroup_rstat_updated - keep track of updated rstat_cpu
  12. * @cgrp: target cgroup
  13. * @cpu: cpu on which rstat_cpu was updated
  14. *
  15. * @cgrp's rstat_cpu on @cpu was updated. Put it on the parent's matching
  16. * rstat_cpu->updated_children list. See the comment on top of
  17. * cgroup_rstat_cpu definition for details.
  18. */
  19. void cgroup_rstat_updated(struct cgroup *cgrp, int cpu)
  20. {
  21. raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu);
  22. struct cgroup *parent;
  23. unsigned long flags;
  24. /* nothing to do for root */
  25. if (!cgroup_parent(cgrp))
  26. return;
  27. /*
  28. * Paired with the one in cgroup_rstat_cpu_pop_upated(). Either we
  29. * see NULL updated_next or they see our updated stat.
  30. */
  31. smp_mb();
  32. /*
  33. * Because @parent's updated_children is terminated with @parent
  34. * instead of NULL, we can tell whether @cgrp is on the list by
  35. * testing the next pointer for NULL.
  36. */
  37. if (cgroup_rstat_cpu(cgrp, cpu)->updated_next)
  38. return;
  39. raw_spin_lock_irqsave(cpu_lock, flags);
  40. /* put @cgrp and all ancestors on the corresponding updated lists */
  41. for (parent = cgroup_parent(cgrp); parent;
  42. cgrp = parent, parent = cgroup_parent(cgrp)) {
  43. struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
  44. struct cgroup_rstat_cpu *prstatc = cgroup_rstat_cpu(parent, cpu);
  45. /*
  46. * Both additions and removals are bottom-up. If a cgroup
  47. * is already in the tree, all ancestors are.
  48. */
  49. if (rstatc->updated_next)
  50. break;
  51. rstatc->updated_next = prstatc->updated_children;
  52. prstatc->updated_children = cgrp;
  53. }
  54. raw_spin_unlock_irqrestore(cpu_lock, flags);
  55. }
  56. EXPORT_SYMBOL_GPL(cgroup_rstat_updated);
  57. /**
  58. * cgroup_rstat_cpu_pop_updated - iterate and dismantle rstat_cpu updated tree
  59. * @pos: current position
  60. * @root: root of the tree to traversal
  61. * @cpu: target cpu
  62. *
  63. * Walks the udpated rstat_cpu tree on @cpu from @root. %NULL @pos starts
  64. * the traversal and %NULL return indicates the end. During traversal,
  65. * each returned cgroup is unlinked from the tree. Must be called with the
  66. * matching cgroup_rstat_cpu_lock held.
  67. *
  68. * The only ordering guarantee is that, for a parent and a child pair
  69. * covered by a given traversal, if a child is visited, its parent is
  70. * guaranteed to be visited afterwards.
  71. */
  72. static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos,
  73. struct cgroup *root, int cpu)
  74. {
  75. struct cgroup_rstat_cpu *rstatc;
  76. if (pos == root)
  77. return NULL;
  78. /*
  79. * We're gonna walk down to the first leaf and visit/remove it. We
  80. * can pick whatever unvisited node as the starting point.
  81. */
  82. if (!pos)
  83. pos = root;
  84. else
  85. pos = cgroup_parent(pos);
  86. /* walk down to the first leaf */
  87. while (true) {
  88. rstatc = cgroup_rstat_cpu(pos, cpu);
  89. if (rstatc->updated_children == pos)
  90. break;
  91. pos = rstatc->updated_children;
  92. }
  93. /*
  94. * Unlink @pos from the tree. As the updated_children list is
  95. * singly linked, we have to walk it to find the removal point.
  96. * However, due to the way we traverse, @pos will be the first
  97. * child in most cases. The only exception is @root.
  98. */
  99. if (rstatc->updated_next) {
  100. struct cgroup *parent = cgroup_parent(pos);
  101. struct cgroup_rstat_cpu *prstatc = cgroup_rstat_cpu(parent, cpu);
  102. struct cgroup_rstat_cpu *nrstatc;
  103. struct cgroup **nextp;
  104. nextp = &prstatc->updated_children;
  105. while (true) {
  106. nrstatc = cgroup_rstat_cpu(*nextp, cpu);
  107. if (*nextp == pos)
  108. break;
  109. WARN_ON_ONCE(*nextp == parent);
  110. nextp = &nrstatc->updated_next;
  111. }
  112. *nextp = rstatc->updated_next;
  113. rstatc->updated_next = NULL;
  114. /*
  115. * Paired with the one in cgroup_rstat_cpu_updated().
  116. * Either they see NULL updated_next or we see their
  117. * updated stat.
  118. */
  119. smp_mb();
  120. return pos;
  121. }
  122. /* only happens for @root */
  123. return NULL;
  124. }
  125. /* see cgroup_rstat_flush() */
  126. static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep)
  127. __releases(&cgroup_rstat_lock) __acquires(&cgroup_rstat_lock)
  128. {
  129. int cpu;
  130. lockdep_assert_held(&cgroup_rstat_lock);
  131. for_each_possible_cpu(cpu) {
  132. raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock,
  133. cpu);
  134. struct cgroup *pos = NULL;
  135. raw_spin_lock(cpu_lock);
  136. while ((pos = cgroup_rstat_cpu_pop_updated(pos, cgrp, cpu))) {
  137. struct cgroup_subsys_state *css;
  138. cgroup_base_stat_flush(pos, cpu);
  139. rcu_read_lock();
  140. list_for_each_entry_rcu(css, &pos->rstat_css_list,
  141. rstat_css_node)
  142. css->ss->css_rstat_flush(css, cpu);
  143. rcu_read_unlock();
  144. }
  145. raw_spin_unlock(cpu_lock);
  146. /* if @may_sleep, play nice and yield if necessary */
  147. if (may_sleep && (need_resched() ||
  148. spin_needbreak(&cgroup_rstat_lock))) {
  149. spin_unlock_irq(&cgroup_rstat_lock);
  150. if (!cond_resched())
  151. cpu_relax();
  152. spin_lock_irq(&cgroup_rstat_lock);
  153. }
  154. }
  155. }
  156. /**
  157. * cgroup_rstat_flush - flush stats in @cgrp's subtree
  158. * @cgrp: target cgroup
  159. *
  160. * Collect all per-cpu stats in @cgrp's subtree into the global counters
  161. * and propagate them upwards. After this function returns, all cgroups in
  162. * the subtree have up-to-date ->stat.
  163. *
  164. * This also gets all cgroups in the subtree including @cgrp off the
  165. * ->updated_children lists.
  166. *
  167. * This function may block.
  168. */
  169. void cgroup_rstat_flush(struct cgroup *cgrp)
  170. {
  171. might_sleep();
  172. spin_lock_irq(&cgroup_rstat_lock);
  173. cgroup_rstat_flush_locked(cgrp, true);
  174. spin_unlock_irq(&cgroup_rstat_lock);
  175. }
  176. /**
  177. * cgroup_rstat_flush_irqsafe - irqsafe version of cgroup_rstat_flush()
  178. * @cgrp: target cgroup
  179. *
  180. * This function can be called from any context.
  181. */
  182. void cgroup_rstat_flush_irqsafe(struct cgroup *cgrp)
  183. {
  184. unsigned long flags;
  185. spin_lock_irqsave(&cgroup_rstat_lock, flags);
  186. cgroup_rstat_flush_locked(cgrp, false);
  187. spin_unlock_irqrestore(&cgroup_rstat_lock, flags);
  188. }
  189. /**
  190. * cgroup_rstat_flush_begin - flush stats in @cgrp's subtree and hold
  191. * @cgrp: target cgroup
  192. *
  193. * Flush stats in @cgrp's subtree and prevent further flushes. Must be
  194. * paired with cgroup_rstat_flush_release().
  195. *
  196. * This function may block.
  197. */
  198. void cgroup_rstat_flush_hold(struct cgroup *cgrp)
  199. __acquires(&cgroup_rstat_lock)
  200. {
  201. might_sleep();
  202. spin_lock_irq(&cgroup_rstat_lock);
  203. cgroup_rstat_flush_locked(cgrp, true);
  204. }
  205. /**
  206. * cgroup_rstat_flush_release - release cgroup_rstat_flush_hold()
  207. */
  208. void cgroup_rstat_flush_release(void)
  209. __releases(&cgroup_rstat_lock)
  210. {
  211. spin_unlock_irq(&cgroup_rstat_lock);
  212. }
  213. int cgroup_rstat_init(struct cgroup *cgrp)
  214. {
  215. int cpu;
  216. /* the root cgrp has rstat_cpu preallocated */
  217. if (!cgrp->rstat_cpu) {
  218. cgrp->rstat_cpu = alloc_percpu(struct cgroup_rstat_cpu);
  219. if (!cgrp->rstat_cpu)
  220. return -ENOMEM;
  221. }
  222. /* ->updated_children list is self terminated */
  223. for_each_possible_cpu(cpu) {
  224. struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
  225. rstatc->updated_children = cgrp;
  226. u64_stats_init(&rstatc->bsync);
  227. }
  228. return 0;
  229. }
  230. void cgroup_rstat_exit(struct cgroup *cgrp)
  231. {
  232. int cpu;
  233. cgroup_rstat_flush(cgrp);
  234. /* sanity check */
  235. for_each_possible_cpu(cpu) {
  236. struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
  237. if (WARN_ON_ONCE(rstatc->updated_children != cgrp) ||
  238. WARN_ON_ONCE(rstatc->updated_next))
  239. return;
  240. }
  241. free_percpu(cgrp->rstat_cpu);
  242. cgrp->rstat_cpu = NULL;
  243. }
  244. void __init cgroup_rstat_boot(void)
  245. {
  246. int cpu;
  247. for_each_possible_cpu(cpu)
  248. raw_spin_lock_init(per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu));
  249. BUG_ON(cgroup_rstat_init(&cgrp_dfl_root.cgrp));
  250. }
  251. /*
  252. * Functions for cgroup basic resource statistics implemented on top of
  253. * rstat.
  254. */
  255. static void cgroup_base_stat_accumulate(struct cgroup_base_stat *dst_bstat,
  256. struct cgroup_base_stat *src_bstat)
  257. {
  258. dst_bstat->cputime.utime += src_bstat->cputime.utime;
  259. dst_bstat->cputime.stime += src_bstat->cputime.stime;
  260. dst_bstat->cputime.sum_exec_runtime += src_bstat->cputime.sum_exec_runtime;
  261. }
  262. static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu)
  263. {
  264. struct cgroup *parent = cgroup_parent(cgrp);
  265. struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
  266. struct task_cputime *last_cputime = &rstatc->last_bstat.cputime;
  267. struct task_cputime cputime;
  268. struct cgroup_base_stat delta;
  269. unsigned seq;
  270. /* fetch the current per-cpu values */
  271. do {
  272. seq = __u64_stats_fetch_begin(&rstatc->bsync);
  273. cputime = rstatc->bstat.cputime;
  274. } while (__u64_stats_fetch_retry(&rstatc->bsync, seq));
  275. /* calculate the delta to propgate */
  276. delta.cputime.utime = cputime.utime - last_cputime->utime;
  277. delta.cputime.stime = cputime.stime - last_cputime->stime;
  278. delta.cputime.sum_exec_runtime = cputime.sum_exec_runtime -
  279. last_cputime->sum_exec_runtime;
  280. *last_cputime = cputime;
  281. /* transfer the pending stat into delta */
  282. cgroup_base_stat_accumulate(&delta, &cgrp->pending_bstat);
  283. memset(&cgrp->pending_bstat, 0, sizeof(cgrp->pending_bstat));
  284. /* propagate delta into the global stat and the parent's pending */
  285. cgroup_base_stat_accumulate(&cgrp->bstat, &delta);
  286. if (parent)
  287. cgroup_base_stat_accumulate(&parent->pending_bstat, &delta);
  288. }
  289. static struct cgroup_rstat_cpu *
  290. cgroup_base_stat_cputime_account_begin(struct cgroup *cgrp)
  291. {
  292. struct cgroup_rstat_cpu *rstatc;
  293. rstatc = get_cpu_ptr(cgrp->rstat_cpu);
  294. u64_stats_update_begin(&rstatc->bsync);
  295. return rstatc;
  296. }
  297. static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp,
  298. struct cgroup_rstat_cpu *rstatc)
  299. {
  300. u64_stats_update_end(&rstatc->bsync);
  301. cgroup_rstat_updated(cgrp, smp_processor_id());
  302. put_cpu_ptr(rstatc);
  303. }
  304. void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec)
  305. {
  306. struct cgroup_rstat_cpu *rstatc;
  307. rstatc = cgroup_base_stat_cputime_account_begin(cgrp);
  308. rstatc->bstat.cputime.sum_exec_runtime += delta_exec;
  309. cgroup_base_stat_cputime_account_end(cgrp, rstatc);
  310. }
  311. void __cgroup_account_cputime_field(struct cgroup *cgrp,
  312. enum cpu_usage_stat index, u64 delta_exec)
  313. {
  314. struct cgroup_rstat_cpu *rstatc;
  315. rstatc = cgroup_base_stat_cputime_account_begin(cgrp);
  316. switch (index) {
  317. case CPUTIME_USER:
  318. case CPUTIME_NICE:
  319. rstatc->bstat.cputime.utime += delta_exec;
  320. break;
  321. case CPUTIME_SYSTEM:
  322. case CPUTIME_IRQ:
  323. case CPUTIME_SOFTIRQ:
  324. rstatc->bstat.cputime.stime += delta_exec;
  325. break;
  326. default:
  327. break;
  328. }
  329. cgroup_base_stat_cputime_account_end(cgrp, rstatc);
  330. }
  331. void cgroup_base_stat_cputime_show(struct seq_file *seq)
  332. {
  333. struct cgroup *cgrp = seq_css(seq)->cgroup;
  334. u64 usage, utime, stime;
  335. if (!cgroup_parent(cgrp))
  336. return;
  337. cgroup_rstat_flush_hold(cgrp);
  338. usage = cgrp->bstat.cputime.sum_exec_runtime;
  339. cputime_adjust(&cgrp->bstat.cputime, &cgrp->prev_cputime, &utime, &stime);
  340. cgroup_rstat_flush_release();
  341. do_div(usage, NSEC_PER_USEC);
  342. do_div(utime, NSEC_PER_USEC);
  343. do_div(stime, NSEC_PER_USEC);
  344. seq_printf(seq, "usage_usec %llu\n"
  345. "user_usec %llu\n"
  346. "system_usec %llu\n",
  347. usage, utime, stime);
  348. }