page_counter.c 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Lockless hierarchical page accounting & limiting
  4. *
  5. * Copyright (C) 2014 Red Hat, Inc., Johannes Weiner
  6. */
  7. #include <linux/page_counter.h>
  8. #include <linux/atomic.h>
  9. #include <linux/kernel.h>
  10. #include <linux/string.h>
  11. #include <linux/sched.h>
  12. #include <linux/bug.h>
  13. #include <asm/page.h>
  14. static void propagate_protected_usage(struct page_counter *c,
  15. unsigned long usage)
  16. {
  17. unsigned long protected, old_protected;
  18. long delta;
  19. if (!c->parent)
  20. return;
  21. if (c->min || atomic_long_read(&c->min_usage)) {
  22. if (usage <= c->min)
  23. protected = usage;
  24. else
  25. protected = 0;
  26. old_protected = atomic_long_xchg(&c->min_usage, protected);
  27. delta = protected - old_protected;
  28. if (delta)
  29. atomic_long_add(delta, &c->parent->children_min_usage);
  30. }
  31. if (c->low || atomic_long_read(&c->low_usage)) {
  32. if (usage <= c->low)
  33. protected = usage;
  34. else
  35. protected = 0;
  36. old_protected = atomic_long_xchg(&c->low_usage, protected);
  37. delta = protected - old_protected;
  38. if (delta)
  39. atomic_long_add(delta, &c->parent->children_low_usage);
  40. }
  41. }
  42. /**
  43. * page_counter_cancel - take pages out of the local counter
  44. * @counter: counter
  45. * @nr_pages: number of pages to cancel
  46. */
  47. void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages)
  48. {
  49. long new;
  50. new = atomic_long_sub_return(nr_pages, &counter->usage);
  51. propagate_protected_usage(counter, new);
  52. /* More uncharges than charges? */
  53. WARN_ON_ONCE(new < 0);
  54. }
  55. /**
  56. * page_counter_charge - hierarchically charge pages
  57. * @counter: counter
  58. * @nr_pages: number of pages to charge
  59. *
  60. * NOTE: This does not consider any configured counter limits.
  61. */
  62. void page_counter_charge(struct page_counter *counter, unsigned long nr_pages)
  63. {
  64. struct page_counter *c;
  65. for (c = counter; c; c = c->parent) {
  66. long new;
  67. new = atomic_long_add_return(nr_pages, &c->usage);
  68. propagate_protected_usage(counter, new);
  69. /*
  70. * This is indeed racy, but we can live with some
  71. * inaccuracy in the watermark.
  72. */
  73. if (new > c->watermark)
  74. c->watermark = new;
  75. }
  76. }
  77. /**
  78. * page_counter_try_charge - try to hierarchically charge pages
  79. * @counter: counter
  80. * @nr_pages: number of pages to charge
  81. * @fail: points first counter to hit its limit, if any
  82. *
  83. * Returns %true on success, or %false and @fail if the counter or one
  84. * of its ancestors has hit its configured limit.
  85. */
  86. bool page_counter_try_charge(struct page_counter *counter,
  87. unsigned long nr_pages,
  88. struct page_counter **fail)
  89. {
  90. struct page_counter *c;
  91. for (c = counter; c; c = c->parent) {
  92. long new;
  93. /*
  94. * Charge speculatively to avoid an expensive CAS. If
  95. * a bigger charge fails, it might falsely lock out a
  96. * racing smaller charge and send it into reclaim
  97. * early, but the error is limited to the difference
  98. * between the two sizes, which is less than 2M/4M in
  99. * case of a THP locking out a regular page charge.
  100. *
  101. * The atomic_long_add_return() implies a full memory
  102. * barrier between incrementing the count and reading
  103. * the limit. When racing with page_counter_limit(),
  104. * we either see the new limit or the setter sees the
  105. * counter has changed and retries.
  106. */
  107. new = atomic_long_add_return(nr_pages, &c->usage);
  108. if (new > c->max) {
  109. atomic_long_sub(nr_pages, &c->usage);
  110. propagate_protected_usage(counter, new);
  111. /*
  112. * This is racy, but we can live with some
  113. * inaccuracy in the failcnt.
  114. */
  115. c->failcnt++;
  116. *fail = c;
  117. goto failed;
  118. }
  119. propagate_protected_usage(counter, new);
  120. /*
  121. * Just like with failcnt, we can live with some
  122. * inaccuracy in the watermark.
  123. */
  124. if (new > c->watermark)
  125. c->watermark = new;
  126. }
  127. return true;
  128. failed:
  129. for (c = counter; c != *fail; c = c->parent)
  130. page_counter_cancel(c, nr_pages);
  131. return false;
  132. }
  133. /**
  134. * page_counter_uncharge - hierarchically uncharge pages
  135. * @counter: counter
  136. * @nr_pages: number of pages to uncharge
  137. */
  138. void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages)
  139. {
  140. struct page_counter *c;
  141. for (c = counter; c; c = c->parent)
  142. page_counter_cancel(c, nr_pages);
  143. }
  144. /**
  145. * page_counter_set_max - set the maximum number of pages allowed
  146. * @counter: counter
  147. * @nr_pages: limit to set
  148. *
  149. * Returns 0 on success, -EBUSY if the current number of pages on the
  150. * counter already exceeds the specified limit.
  151. *
  152. * The caller must serialize invocations on the same counter.
  153. */
  154. int page_counter_set_max(struct page_counter *counter, unsigned long nr_pages)
  155. {
  156. for (;;) {
  157. unsigned long old;
  158. long usage;
  159. /*
  160. * Update the limit while making sure that it's not
  161. * below the concurrently-changing counter value.
  162. *
  163. * The xchg implies two full memory barriers before
  164. * and after, so the read-swap-read is ordered and
  165. * ensures coherency with page_counter_try_charge():
  166. * that function modifies the count before checking
  167. * the limit, so if it sees the old limit, we see the
  168. * modified counter and retry.
  169. */
  170. usage = atomic_long_read(&counter->usage);
  171. if (usage > nr_pages)
  172. return -EBUSY;
  173. old = xchg(&counter->max, nr_pages);
  174. if (atomic_long_read(&counter->usage) <= usage)
  175. return 0;
  176. counter->max = old;
  177. cond_resched();
  178. }
  179. }
  180. /**
  181. * page_counter_set_min - set the amount of protected memory
  182. * @counter: counter
  183. * @nr_pages: value to set
  184. *
  185. * The caller must serialize invocations on the same counter.
  186. */
  187. void page_counter_set_min(struct page_counter *counter, unsigned long nr_pages)
  188. {
  189. struct page_counter *c;
  190. counter->min = nr_pages;
  191. for (c = counter; c; c = c->parent)
  192. propagate_protected_usage(c, atomic_long_read(&c->usage));
  193. }
  194. /**
  195. * page_counter_set_low - set the amount of protected memory
  196. * @counter: counter
  197. * @nr_pages: value to set
  198. *
  199. * The caller must serialize invocations on the same counter.
  200. */
  201. void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages)
  202. {
  203. struct page_counter *c;
  204. counter->low = nr_pages;
  205. for (c = counter; c; c = c->parent)
  206. propagate_protected_usage(c, atomic_long_read(&c->usage));
  207. }
  208. /**
  209. * page_counter_memparse - memparse() for page counter limits
  210. * @buf: string to parse
  211. * @max: string meaning maximum possible value
  212. * @nr_pages: returns the result in number of pages
  213. *
  214. * Returns -EINVAL, or 0 and @nr_pages on success. @nr_pages will be
  215. * limited to %PAGE_COUNTER_MAX.
  216. */
  217. int page_counter_memparse(const char *buf, const char *max,
  218. unsigned long *nr_pages)
  219. {
  220. char *end;
  221. u64 bytes;
  222. if (!strcmp(buf, max)) {
  223. *nr_pages = PAGE_COUNTER_MAX;
  224. return 0;
  225. }
  226. bytes = memparse(buf, &end);
  227. if (*end != '\0')
  228. return -EINVAL;
  229. *nr_pages = min(bytes / PAGE_SIZE, (u64)PAGE_COUNTER_MAX);
  230. return 0;
  231. }