slab_common.c 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Slab allocator functions that are independent of the allocator strategy
  4. *
  5. * (C) 2012 Christoph Lameter <cl@linux.com>
  6. */
  7. #include <linux/slab.h>
  8. #include <linux/mm.h>
  9. #include <linux/poison.h>
  10. #include <linux/interrupt.h>
  11. #include <linux/memory.h>
  12. #include <linux/cache.h>
  13. #include <linux/compiler.h>
  14. #include <linux/module.h>
  15. #include <linux/cpu.h>
  16. #include <linux/uaccess.h>
  17. #include <linux/seq_file.h>
  18. #include <linux/proc_fs.h>
  19. #include <asm/cacheflush.h>
  20. #include <asm/tlbflush.h>
  21. #include <asm/page.h>
  22. #include <linux/memcontrol.h>
  23. #define CREATE_TRACE_POINTS
  24. #include <trace/events/kmem.h>
  25. #include "slab.h"
  26. enum slab_state slab_state;
  27. LIST_HEAD(slab_caches);
  28. DEFINE_MUTEX(slab_mutex);
  29. struct kmem_cache *kmem_cache;
  30. #ifdef CONFIG_HARDENED_USERCOPY
  31. bool usercopy_fallback __ro_after_init =
  32. IS_ENABLED(CONFIG_HARDENED_USERCOPY_FALLBACK);
  33. module_param(usercopy_fallback, bool, 0400);
  34. MODULE_PARM_DESC(usercopy_fallback,
  35. "WARN instead of reject usercopy whitelist violations");
  36. #endif
  37. static LIST_HEAD(slab_caches_to_rcu_destroy);
  38. static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work);
  39. static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
  40. slab_caches_to_rcu_destroy_workfn);
  41. /*
  42. * Set of flags that will prevent slab merging
  43. */
  44. #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
  45. SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \
  46. SLAB_FAILSLAB | SLAB_KASAN)
  47. #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
  48. SLAB_CACHE_DMA32 | SLAB_ACCOUNT)
  49. /*
  50. * Merge control. If this is set then no merging of slab caches will occur.
  51. */
  52. static bool slab_nomerge = !IS_ENABLED(CONFIG_SLAB_MERGE_DEFAULT);
  53. static int __init setup_slab_nomerge(char *str)
  54. {
  55. slab_nomerge = true;
  56. return 1;
  57. }
  58. #ifdef CONFIG_SLUB
  59. __setup_param("slub_nomerge", slub_nomerge, setup_slab_nomerge, 0);
  60. #endif
  61. __setup("slab_nomerge", setup_slab_nomerge);
  62. /*
  63. * Determine the size of a slab object
  64. */
  65. unsigned int kmem_cache_size(struct kmem_cache *s)
  66. {
  67. return s->object_size;
  68. }
  69. EXPORT_SYMBOL(kmem_cache_size);
  70. #ifdef CONFIG_DEBUG_VM
  71. static int kmem_cache_sanity_check(const char *name, unsigned int size)
  72. {
  73. if (!name || in_interrupt() || size < sizeof(void *) ||
  74. size > KMALLOC_MAX_SIZE) {
  75. pr_err("kmem_cache_create(%s) integrity check failed\n", name);
  76. return -EINVAL;
  77. }
  78. WARN_ON(strchr(name, ' ')); /* It confuses parsers */
  79. return 0;
  80. }
  81. #else
  82. static inline int kmem_cache_sanity_check(const char *name, unsigned int size)
  83. {
  84. return 0;
  85. }
  86. #endif
  87. void __kmem_cache_free_bulk(struct kmem_cache *s, size_t nr, void **p)
  88. {
  89. size_t i;
  90. for (i = 0; i < nr; i++) {
  91. if (s)
  92. kmem_cache_free(s, p[i]);
  93. else
  94. kfree(p[i]);
  95. }
  96. }
  97. int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
  98. void **p)
  99. {
  100. size_t i;
  101. for (i = 0; i < nr; i++) {
  102. void *x = p[i] = kmem_cache_alloc(s, flags);
  103. if (!x) {
  104. __kmem_cache_free_bulk(s, i, p);
  105. return 0;
  106. }
  107. }
  108. return i;
  109. }
  110. #ifdef CONFIG_MEMCG_KMEM
  111. LIST_HEAD(slab_root_caches);
  112. void slab_init_memcg_params(struct kmem_cache *s)
  113. {
  114. s->memcg_params.root_cache = NULL;
  115. RCU_INIT_POINTER(s->memcg_params.memcg_caches, NULL);
  116. INIT_LIST_HEAD(&s->memcg_params.children);
  117. s->memcg_params.dying = false;
  118. }
  119. static int init_memcg_params(struct kmem_cache *s,
  120. struct mem_cgroup *memcg, struct kmem_cache *root_cache)
  121. {
  122. struct memcg_cache_array *arr;
  123. if (root_cache) {
  124. s->memcg_params.root_cache = root_cache;
  125. s->memcg_params.memcg = memcg;
  126. INIT_LIST_HEAD(&s->memcg_params.children_node);
  127. INIT_LIST_HEAD(&s->memcg_params.kmem_caches_node);
  128. return 0;
  129. }
  130. slab_init_memcg_params(s);
  131. if (!memcg_nr_cache_ids)
  132. return 0;
  133. arr = kvzalloc(sizeof(struct memcg_cache_array) +
  134. memcg_nr_cache_ids * sizeof(void *),
  135. GFP_KERNEL);
  136. if (!arr)
  137. return -ENOMEM;
  138. RCU_INIT_POINTER(s->memcg_params.memcg_caches, arr);
  139. return 0;
  140. }
  141. static void destroy_memcg_params(struct kmem_cache *s)
  142. {
  143. if (is_root_cache(s))
  144. kvfree(rcu_access_pointer(s->memcg_params.memcg_caches));
  145. }
  146. static void free_memcg_params(struct rcu_head *rcu)
  147. {
  148. struct memcg_cache_array *old;
  149. old = container_of(rcu, struct memcg_cache_array, rcu);
  150. kvfree(old);
  151. }
  152. static int update_memcg_params(struct kmem_cache *s, int new_array_size)
  153. {
  154. struct memcg_cache_array *old, *new;
  155. new = kvzalloc(sizeof(struct memcg_cache_array) +
  156. new_array_size * sizeof(void *), GFP_KERNEL);
  157. if (!new)
  158. return -ENOMEM;
  159. old = rcu_dereference_protected(s->memcg_params.memcg_caches,
  160. lockdep_is_held(&slab_mutex));
  161. if (old)
  162. memcpy(new->entries, old->entries,
  163. memcg_nr_cache_ids * sizeof(void *));
  164. rcu_assign_pointer(s->memcg_params.memcg_caches, new);
  165. if (old)
  166. call_rcu(&old->rcu, free_memcg_params);
  167. return 0;
  168. }
  169. int memcg_update_all_caches(int num_memcgs)
  170. {
  171. struct kmem_cache *s;
  172. int ret = 0;
  173. mutex_lock(&slab_mutex);
  174. list_for_each_entry(s, &slab_root_caches, root_caches_node) {
  175. ret = update_memcg_params(s, num_memcgs);
  176. /*
  177. * Instead of freeing the memory, we'll just leave the caches
  178. * up to this point in an updated state.
  179. */
  180. if (ret)
  181. break;
  182. }
  183. mutex_unlock(&slab_mutex);
  184. return ret;
  185. }
  186. void memcg_link_cache(struct kmem_cache *s)
  187. {
  188. if (is_root_cache(s)) {
  189. list_add(&s->root_caches_node, &slab_root_caches);
  190. } else {
  191. list_add(&s->memcg_params.children_node,
  192. &s->memcg_params.root_cache->memcg_params.children);
  193. list_add(&s->memcg_params.kmem_caches_node,
  194. &s->memcg_params.memcg->kmem_caches);
  195. }
  196. }
  197. static void memcg_unlink_cache(struct kmem_cache *s)
  198. {
  199. if (is_root_cache(s)) {
  200. list_del(&s->root_caches_node);
  201. } else {
  202. list_del(&s->memcg_params.children_node);
  203. list_del(&s->memcg_params.kmem_caches_node);
  204. }
  205. }
  206. #else
  207. static inline int init_memcg_params(struct kmem_cache *s,
  208. struct mem_cgroup *memcg, struct kmem_cache *root_cache)
  209. {
  210. return 0;
  211. }
  212. static inline void destroy_memcg_params(struct kmem_cache *s)
  213. {
  214. }
  215. static inline void memcg_unlink_cache(struct kmem_cache *s)
  216. {
  217. }
  218. #endif /* CONFIG_MEMCG_KMEM */
  219. /*
  220. * Figure out what the alignment of the objects will be given a set of
  221. * flags, a user specified alignment and the size of the objects.
  222. */
  223. static unsigned int calculate_alignment(slab_flags_t flags,
  224. unsigned int align, unsigned int size)
  225. {
  226. /*
  227. * If the user wants hardware cache aligned objects then follow that
  228. * suggestion if the object is sufficiently large.
  229. *
  230. * The hardware cache alignment cannot override the specified
  231. * alignment though. If that is greater then use it.
  232. */
  233. if (flags & SLAB_HWCACHE_ALIGN) {
  234. unsigned int ralign;
  235. ralign = cache_line_size();
  236. while (size <= ralign / 2)
  237. ralign /= 2;
  238. align = max(align, ralign);
  239. }
  240. if (align < ARCH_SLAB_MINALIGN)
  241. align = ARCH_SLAB_MINALIGN;
  242. return ALIGN(align, sizeof(void *));
  243. }
  244. /*
  245. * Find a mergeable slab cache
  246. */
  247. int slab_unmergeable(struct kmem_cache *s)
  248. {
  249. if (slab_nomerge || (s->flags & SLAB_NEVER_MERGE))
  250. return 1;
  251. if (!is_root_cache(s))
  252. return 1;
  253. if (s->ctor)
  254. return 1;
  255. if (s->usersize)
  256. return 1;
  257. /*
  258. * We may have set a slab to be unmergeable during bootstrap.
  259. */
  260. if (s->refcount < 0)
  261. return 1;
  262. return 0;
  263. }
  264. struct kmem_cache *find_mergeable(unsigned int size, unsigned int align,
  265. slab_flags_t flags, const char *name, void (*ctor)(void *))
  266. {
  267. struct kmem_cache *s;
  268. if (slab_nomerge)
  269. return NULL;
  270. if (ctor)
  271. return NULL;
  272. size = ALIGN(size, sizeof(void *));
  273. align = calculate_alignment(flags, align, size);
  274. size = ALIGN(size, align);
  275. flags = kmem_cache_flags(size, flags, name, NULL);
  276. if (flags & SLAB_NEVER_MERGE)
  277. return NULL;
  278. list_for_each_entry_reverse(s, &slab_root_caches, root_caches_node) {
  279. if (slab_unmergeable(s))
  280. continue;
  281. if (size > s->size)
  282. continue;
  283. if ((flags & SLAB_MERGE_SAME) != (s->flags & SLAB_MERGE_SAME))
  284. continue;
  285. /*
  286. * Check if alignment is compatible.
  287. * Courtesy of Adrian Drzewiecki
  288. */
  289. if ((s->size & ~(align - 1)) != s->size)
  290. continue;
  291. if (s->size - size >= sizeof(void *))
  292. continue;
  293. if (IS_ENABLED(CONFIG_SLAB) && align &&
  294. (align > s->align || s->align % align))
  295. continue;
  296. return s;
  297. }
  298. return NULL;
  299. }
  300. static struct kmem_cache *create_cache(const char *name,
  301. unsigned int object_size, unsigned int align,
  302. slab_flags_t flags, unsigned int useroffset,
  303. unsigned int usersize, void (*ctor)(void *),
  304. struct mem_cgroup *memcg, struct kmem_cache *root_cache)
  305. {
  306. struct kmem_cache *s;
  307. int err;
  308. if (WARN_ON(useroffset + usersize > object_size))
  309. useroffset = usersize = 0;
  310. err = -ENOMEM;
  311. s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);
  312. if (!s)
  313. goto out;
  314. s->name = name;
  315. s->size = s->object_size = object_size;
  316. s->align = align;
  317. s->ctor = ctor;
  318. s->useroffset = useroffset;
  319. s->usersize = usersize;
  320. err = init_memcg_params(s, memcg, root_cache);
  321. if (err)
  322. goto out_free_cache;
  323. err = __kmem_cache_create(s, flags);
  324. if (err)
  325. goto out_free_cache;
  326. s->refcount = 1;
  327. list_add(&s->list, &slab_caches);
  328. memcg_link_cache(s);
  329. out:
  330. if (err)
  331. return ERR_PTR(err);
  332. return s;
  333. out_free_cache:
  334. destroy_memcg_params(s);
  335. kmem_cache_free(kmem_cache, s);
  336. goto out;
  337. }
  338. /*
  339. * kmem_cache_create_usercopy - Create a cache.
  340. * @name: A string which is used in /proc/slabinfo to identify this cache.
  341. * @size: The size of objects to be created in this cache.
  342. * @align: The required alignment for the objects.
  343. * @flags: SLAB flags
  344. * @useroffset: Usercopy region offset
  345. * @usersize: Usercopy region size
  346. * @ctor: A constructor for the objects.
  347. *
  348. * Returns a ptr to the cache on success, NULL on failure.
  349. * Cannot be called within a interrupt, but can be interrupted.
  350. * The @ctor is run when new pages are allocated by the cache.
  351. *
  352. * The flags are
  353. *
  354. * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
  355. * to catch references to uninitialised memory.
  356. *
  357. * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
  358. * for buffer overruns.
  359. *
  360. * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
  361. * cacheline. This can be beneficial if you're counting cycles as closely
  362. * as davem.
  363. */
  364. struct kmem_cache *
  365. kmem_cache_create_usercopy(const char *name,
  366. unsigned int size, unsigned int align,
  367. slab_flags_t flags,
  368. unsigned int useroffset, unsigned int usersize,
  369. void (*ctor)(void *))
  370. {
  371. struct kmem_cache *s = NULL;
  372. const char *cache_name;
  373. int err;
  374. get_online_cpus();
  375. get_online_mems();
  376. memcg_get_cache_ids();
  377. mutex_lock(&slab_mutex);
  378. err = kmem_cache_sanity_check(name, size);
  379. if (err) {
  380. goto out_unlock;
  381. }
  382. /* Refuse requests with allocator specific flags */
  383. if (flags & ~SLAB_FLAGS_PERMITTED) {
  384. err = -EINVAL;
  385. goto out_unlock;
  386. }
  387. /*
  388. * Some allocators will constraint the set of valid flags to a subset
  389. * of all flags. We expect them to define CACHE_CREATE_MASK in this
  390. * case, and we'll just provide them with a sanitized version of the
  391. * passed flags.
  392. */
  393. flags &= CACHE_CREATE_MASK;
  394. /* Fail closed on bad usersize of useroffset values. */
  395. if (WARN_ON(!usersize && useroffset) ||
  396. WARN_ON(size < usersize || size - usersize < useroffset))
  397. usersize = useroffset = 0;
  398. if (!usersize)
  399. s = __kmem_cache_alias(name, size, align, flags, ctor);
  400. if (s)
  401. goto out_unlock;
  402. cache_name = kstrdup_const(name, GFP_KERNEL);
  403. if (!cache_name) {
  404. err = -ENOMEM;
  405. goto out_unlock;
  406. }
  407. s = create_cache(cache_name, size,
  408. calculate_alignment(flags, align, size),
  409. flags, useroffset, usersize, ctor, NULL, NULL);
  410. if (IS_ERR(s)) {
  411. err = PTR_ERR(s);
  412. kfree_const(cache_name);
  413. }
  414. out_unlock:
  415. mutex_unlock(&slab_mutex);
  416. memcg_put_cache_ids();
  417. put_online_mems();
  418. put_online_cpus();
  419. if (err) {
  420. if (flags & SLAB_PANIC)
  421. panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n",
  422. name, err);
  423. else {
  424. pr_warn("kmem_cache_create(%s) failed with error %d\n",
  425. name, err);
  426. dump_stack();
  427. }
  428. return NULL;
  429. }
  430. return s;
  431. }
  432. EXPORT_SYMBOL(kmem_cache_create_usercopy);
  433. struct kmem_cache *
  434. kmem_cache_create(const char *name, unsigned int size, unsigned int align,
  435. slab_flags_t flags, void (*ctor)(void *))
  436. {
  437. return kmem_cache_create_usercopy(name, size, align, flags, 0, 0,
  438. ctor);
  439. }
  440. EXPORT_SYMBOL(kmem_cache_create);
  441. static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work)
  442. {
  443. LIST_HEAD(to_destroy);
  444. struct kmem_cache *s, *s2;
  445. /*
  446. * On destruction, SLAB_TYPESAFE_BY_RCU kmem_caches are put on the
  447. * @slab_caches_to_rcu_destroy list. The slab pages are freed
  448. * through RCU and and the associated kmem_cache are dereferenced
  449. * while freeing the pages, so the kmem_caches should be freed only
  450. * after the pending RCU operations are finished. As rcu_barrier()
  451. * is a pretty slow operation, we batch all pending destructions
  452. * asynchronously.
  453. */
  454. mutex_lock(&slab_mutex);
  455. list_splice_init(&slab_caches_to_rcu_destroy, &to_destroy);
  456. mutex_unlock(&slab_mutex);
  457. if (list_empty(&to_destroy))
  458. return;
  459. rcu_barrier();
  460. list_for_each_entry_safe(s, s2, &to_destroy, list) {
  461. #ifdef SLAB_SUPPORTS_SYSFS
  462. sysfs_slab_release(s);
  463. #else
  464. slab_kmem_cache_release(s);
  465. #endif
  466. }
  467. }
  468. static int shutdown_cache(struct kmem_cache *s)
  469. {
  470. /* free asan quarantined objects */
  471. kasan_cache_shutdown(s);
  472. if (__kmem_cache_shutdown(s) != 0)
  473. return -EBUSY;
  474. memcg_unlink_cache(s);
  475. list_del(&s->list);
  476. if (s->flags & SLAB_TYPESAFE_BY_RCU) {
  477. #ifdef SLAB_SUPPORTS_SYSFS
  478. sysfs_slab_unlink(s);
  479. #endif
  480. list_add_tail(&s->list, &slab_caches_to_rcu_destroy);
  481. schedule_work(&slab_caches_to_rcu_destroy_work);
  482. } else {
  483. #ifdef SLAB_SUPPORTS_SYSFS
  484. sysfs_slab_unlink(s);
  485. sysfs_slab_release(s);
  486. #else
  487. slab_kmem_cache_release(s);
  488. #endif
  489. }
  490. return 0;
  491. }
  492. #ifdef CONFIG_MEMCG_KMEM
  493. /*
  494. * memcg_create_kmem_cache - Create a cache for a memory cgroup.
  495. * @memcg: The memory cgroup the new cache is for.
  496. * @root_cache: The parent of the new cache.
  497. *
  498. * This function attempts to create a kmem cache that will serve allocation
  499. * requests going from @memcg to @root_cache. The new cache inherits properties
  500. * from its parent.
  501. */
  502. void memcg_create_kmem_cache(struct mem_cgroup *memcg,
  503. struct kmem_cache *root_cache)
  504. {
  505. static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */
  506. struct cgroup_subsys_state *css = &memcg->css;
  507. struct memcg_cache_array *arr;
  508. struct kmem_cache *s = NULL;
  509. char *cache_name;
  510. int idx;
  511. get_online_cpus();
  512. get_online_mems();
  513. mutex_lock(&slab_mutex);
  514. /*
  515. * The memory cgroup could have been offlined while the cache
  516. * creation work was pending.
  517. */
  518. if (memcg->kmem_state != KMEM_ONLINE || root_cache->memcg_params.dying)
  519. goto out_unlock;
  520. idx = memcg_cache_id(memcg);
  521. arr = rcu_dereference_protected(root_cache->memcg_params.memcg_caches,
  522. lockdep_is_held(&slab_mutex));
  523. /*
  524. * Since per-memcg caches are created asynchronously on first
  525. * allocation (see memcg_kmem_get_cache()), several threads can try to
  526. * create the same cache, but only one of them may succeed.
  527. */
  528. if (arr->entries[idx])
  529. goto out_unlock;
  530. cgroup_name(css->cgroup, memcg_name_buf, sizeof(memcg_name_buf));
  531. cache_name = kasprintf(GFP_KERNEL, "%s(%llu:%s)", root_cache->name,
  532. css->serial_nr, memcg_name_buf);
  533. if (!cache_name)
  534. goto out_unlock;
  535. s = create_cache(cache_name, root_cache->object_size,
  536. root_cache->align,
  537. root_cache->flags & CACHE_CREATE_MASK,
  538. root_cache->useroffset, root_cache->usersize,
  539. root_cache->ctor, memcg, root_cache);
  540. /*
  541. * If we could not create a memcg cache, do not complain, because
  542. * that's not critical at all as we can always proceed with the root
  543. * cache.
  544. */
  545. if (IS_ERR(s)) {
  546. kfree(cache_name);
  547. goto out_unlock;
  548. }
  549. /*
  550. * Since readers won't lock (see cache_from_memcg_idx()), we need a
  551. * barrier here to ensure nobody will see the kmem_cache partially
  552. * initialized.
  553. */
  554. smp_wmb();
  555. arr->entries[idx] = s;
  556. out_unlock:
  557. mutex_unlock(&slab_mutex);
  558. put_online_mems();
  559. put_online_cpus();
  560. }
  561. static void kmemcg_deactivate_workfn(struct work_struct *work)
  562. {
  563. struct kmem_cache *s = container_of(work, struct kmem_cache,
  564. memcg_params.deact_work);
  565. get_online_cpus();
  566. get_online_mems();
  567. mutex_lock(&slab_mutex);
  568. s->memcg_params.deact_fn(s);
  569. mutex_unlock(&slab_mutex);
  570. put_online_mems();
  571. put_online_cpus();
  572. /* done, put the ref from slab_deactivate_memcg_cache_rcu_sched() */
  573. css_put(&s->memcg_params.memcg->css);
  574. }
  575. static void kmemcg_deactivate_rcufn(struct rcu_head *head)
  576. {
  577. struct kmem_cache *s = container_of(head, struct kmem_cache,
  578. memcg_params.deact_rcu_head);
  579. /*
  580. * We need to grab blocking locks. Bounce to ->deact_work. The
  581. * work item shares the space with the RCU head and can't be
  582. * initialized eariler.
  583. */
  584. INIT_WORK(&s->memcg_params.deact_work, kmemcg_deactivate_workfn);
  585. queue_work(memcg_kmem_cache_wq, &s->memcg_params.deact_work);
  586. }
  587. /**
  588. * slab_deactivate_memcg_cache_rcu_sched - schedule deactivation after a
  589. * sched RCU grace period
  590. * @s: target kmem_cache
  591. * @deact_fn: deactivation function to call
  592. *
  593. * Schedule @deact_fn to be invoked with online cpus, mems and slab_mutex
  594. * held after a sched RCU grace period. The slab is guaranteed to stay
  595. * alive until @deact_fn is finished. This is to be used from
  596. * __kmemcg_cache_deactivate().
  597. */
  598. void slab_deactivate_memcg_cache_rcu_sched(struct kmem_cache *s,
  599. void (*deact_fn)(struct kmem_cache *))
  600. {
  601. if (WARN_ON_ONCE(is_root_cache(s)) ||
  602. WARN_ON_ONCE(s->memcg_params.deact_fn))
  603. return;
  604. if (s->memcg_params.root_cache->memcg_params.dying)
  605. return;
  606. /* pin memcg so that @s doesn't get destroyed in the middle */
  607. css_get(&s->memcg_params.memcg->css);
  608. s->memcg_params.deact_fn = deact_fn;
  609. call_rcu_sched(&s->memcg_params.deact_rcu_head, kmemcg_deactivate_rcufn);
  610. }
  611. void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
  612. {
  613. int idx;
  614. struct memcg_cache_array *arr;
  615. struct kmem_cache *s, *c;
  616. idx = memcg_cache_id(memcg);
  617. get_online_cpus();
  618. get_online_mems();
  619. mutex_lock(&slab_mutex);
  620. list_for_each_entry(s, &slab_root_caches, root_caches_node) {
  621. arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
  622. lockdep_is_held(&slab_mutex));
  623. c = arr->entries[idx];
  624. if (!c)
  625. continue;
  626. __kmemcg_cache_deactivate(c);
  627. arr->entries[idx] = NULL;
  628. }
  629. mutex_unlock(&slab_mutex);
  630. put_online_mems();
  631. put_online_cpus();
  632. }
  633. void memcg_destroy_kmem_caches(struct mem_cgroup *memcg)
  634. {
  635. struct kmem_cache *s, *s2;
  636. get_online_cpus();
  637. get_online_mems();
  638. mutex_lock(&slab_mutex);
  639. list_for_each_entry_safe(s, s2, &memcg->kmem_caches,
  640. memcg_params.kmem_caches_node) {
  641. /*
  642. * The cgroup is about to be freed and therefore has no charges
  643. * left. Hence, all its caches must be empty by now.
  644. */
  645. BUG_ON(shutdown_cache(s));
  646. }
  647. mutex_unlock(&slab_mutex);
  648. put_online_mems();
  649. put_online_cpus();
  650. }
  651. static int shutdown_memcg_caches(struct kmem_cache *s)
  652. {
  653. struct memcg_cache_array *arr;
  654. struct kmem_cache *c, *c2;
  655. LIST_HEAD(busy);
  656. int i;
  657. BUG_ON(!is_root_cache(s));
  658. /*
  659. * First, shutdown active caches, i.e. caches that belong to online
  660. * memory cgroups.
  661. */
  662. arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
  663. lockdep_is_held(&slab_mutex));
  664. for_each_memcg_cache_index(i) {
  665. c = arr->entries[i];
  666. if (!c)
  667. continue;
  668. if (shutdown_cache(c))
  669. /*
  670. * The cache still has objects. Move it to a temporary
  671. * list so as not to try to destroy it for a second
  672. * time while iterating over inactive caches below.
  673. */
  674. list_move(&c->memcg_params.children_node, &busy);
  675. else
  676. /*
  677. * The cache is empty and will be destroyed soon. Clear
  678. * the pointer to it in the memcg_caches array so that
  679. * it will never be accessed even if the root cache
  680. * stays alive.
  681. */
  682. arr->entries[i] = NULL;
  683. }
  684. /*
  685. * Second, shutdown all caches left from memory cgroups that are now
  686. * offline.
  687. */
  688. list_for_each_entry_safe(c, c2, &s->memcg_params.children,
  689. memcg_params.children_node)
  690. shutdown_cache(c);
  691. list_splice(&busy, &s->memcg_params.children);
  692. /*
  693. * A cache being destroyed must be empty. In particular, this means
  694. * that all per memcg caches attached to it must be empty too.
  695. */
  696. if (!list_empty(&s->memcg_params.children))
  697. return -EBUSY;
  698. return 0;
  699. }
  700. static void flush_memcg_workqueue(struct kmem_cache *s)
  701. {
  702. mutex_lock(&slab_mutex);
  703. s->memcg_params.dying = true;
  704. mutex_unlock(&slab_mutex);
  705. /*
  706. * SLUB deactivates the kmem_caches through call_rcu_sched. Make
  707. * sure all registered rcu callbacks have been invoked.
  708. */
  709. if (IS_ENABLED(CONFIG_SLUB))
  710. rcu_barrier_sched();
  711. /*
  712. * SLAB and SLUB create memcg kmem_caches through workqueue and SLUB
  713. * deactivates the memcg kmem_caches through workqueue. Make sure all
  714. * previous workitems on workqueue are processed.
  715. */
  716. if (likely(memcg_kmem_cache_wq))
  717. flush_workqueue(memcg_kmem_cache_wq);
  718. }
  719. #else
  720. static inline int shutdown_memcg_caches(struct kmem_cache *s)
  721. {
  722. return 0;
  723. }
  724. static inline void flush_memcg_workqueue(struct kmem_cache *s)
  725. {
  726. }
  727. #endif /* CONFIG_MEMCG_KMEM */
  728. void slab_kmem_cache_release(struct kmem_cache *s)
  729. {
  730. __kmem_cache_release(s);
  731. destroy_memcg_params(s);
  732. kfree_const(s->name);
  733. kmem_cache_free(kmem_cache, s);
  734. }
  735. void kmem_cache_destroy(struct kmem_cache *s)
  736. {
  737. int err;
  738. if (unlikely(!s))
  739. return;
  740. flush_memcg_workqueue(s);
  741. get_online_cpus();
  742. get_online_mems();
  743. mutex_lock(&slab_mutex);
  744. s->refcount--;
  745. if (s->refcount)
  746. goto out_unlock;
  747. err = shutdown_memcg_caches(s);
  748. if (!err)
  749. err = shutdown_cache(s);
  750. if (err) {
  751. pr_err("kmem_cache_destroy %s: Slab cache still has objects\n",
  752. s->name);
  753. dump_stack();
  754. }
  755. out_unlock:
  756. mutex_unlock(&slab_mutex);
  757. put_online_mems();
  758. put_online_cpus();
  759. }
  760. EXPORT_SYMBOL(kmem_cache_destroy);
  761. /**
  762. * kmem_cache_shrink - Shrink a cache.
  763. * @cachep: The cache to shrink.
  764. *
  765. * Releases as many slabs as possible for a cache.
  766. * To help debugging, a zero exit status indicates all slabs were released.
  767. */
  768. int kmem_cache_shrink(struct kmem_cache *cachep)
  769. {
  770. int ret;
  771. get_online_cpus();
  772. get_online_mems();
  773. kasan_cache_shrink(cachep);
  774. ret = __kmem_cache_shrink(cachep);
  775. put_online_mems();
  776. put_online_cpus();
  777. return ret;
  778. }
  779. EXPORT_SYMBOL(kmem_cache_shrink);
  780. bool slab_is_available(void)
  781. {
  782. return slab_state >= UP;
  783. }
  784. #ifndef CONFIG_SLOB
  785. /* Create a cache during boot when no slab services are available yet */
  786. void __init create_boot_cache(struct kmem_cache *s, const char *name,
  787. unsigned int size, slab_flags_t flags,
  788. unsigned int useroffset, unsigned int usersize)
  789. {
  790. int err;
  791. s->name = name;
  792. s->size = s->object_size = size;
  793. s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size);
  794. s->useroffset = useroffset;
  795. s->usersize = usersize;
  796. slab_init_memcg_params(s);
  797. err = __kmem_cache_create(s, flags);
  798. if (err)
  799. panic("Creation of kmalloc slab %s size=%u failed. Reason %d\n",
  800. name, size, err);
  801. s->refcount = -1; /* Exempt from merging for now */
  802. }
  803. struct kmem_cache *__init create_kmalloc_cache(const char *name,
  804. unsigned int size, slab_flags_t flags,
  805. unsigned int useroffset, unsigned int usersize)
  806. {
  807. struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
  808. if (!s)
  809. panic("Out of memory when creating slab %s\n", name);
  810. create_boot_cache(s, name, size, flags, useroffset, usersize);
  811. list_add(&s->list, &slab_caches);
  812. memcg_link_cache(s);
  813. s->refcount = 1;
  814. return s;
  815. }
  816. struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1] __ro_after_init;
  817. EXPORT_SYMBOL(kmalloc_caches);
  818. #ifdef CONFIG_ZONE_DMA
  819. struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1] __ro_after_init;
  820. EXPORT_SYMBOL(kmalloc_dma_caches);
  821. #endif
  822. /*
  823. * Conversion table for small slabs sizes / 8 to the index in the
  824. * kmalloc array. This is necessary for slabs < 192 since we have non power
  825. * of two cache sizes there. The size of larger slabs can be determined using
  826. * fls.
  827. */
  828. static u8 size_index[24] __ro_after_init = {
  829. 3, /* 8 */
  830. 4, /* 16 */
  831. 5, /* 24 */
  832. 5, /* 32 */
  833. 6, /* 40 */
  834. 6, /* 48 */
  835. 6, /* 56 */
  836. 6, /* 64 */
  837. 1, /* 72 */
  838. 1, /* 80 */
  839. 1, /* 88 */
  840. 1, /* 96 */
  841. 7, /* 104 */
  842. 7, /* 112 */
  843. 7, /* 120 */
  844. 7, /* 128 */
  845. 2, /* 136 */
  846. 2, /* 144 */
  847. 2, /* 152 */
  848. 2, /* 160 */
  849. 2, /* 168 */
  850. 2, /* 176 */
  851. 2, /* 184 */
  852. 2 /* 192 */
  853. };
  854. static inline unsigned int size_index_elem(unsigned int bytes)
  855. {
  856. return (bytes - 1) / 8;
  857. }
  858. /*
  859. * Find the kmem_cache structure that serves a given size of
  860. * allocation
  861. */
  862. struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
  863. {
  864. unsigned int index;
  865. if (size <= 192) {
  866. if (!size)
  867. return ZERO_SIZE_PTR;
  868. index = size_index[size_index_elem(size)];
  869. } else {
  870. if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
  871. WARN_ON(1);
  872. return NULL;
  873. }
  874. index = fls(size - 1);
  875. }
  876. #ifdef CONFIG_ZONE_DMA
  877. if (unlikely((flags & GFP_DMA)))
  878. return kmalloc_dma_caches[index];
  879. #endif
  880. return kmalloc_caches[index];
  881. }
  882. /*
  883. * kmalloc_info[] is to make slub_debug=,kmalloc-xx option work at boot time.
  884. * kmalloc_index() supports up to 2^26=64MB, so the final entry of the table is
  885. * kmalloc-67108864.
  886. */
  887. const struct kmalloc_info_struct kmalloc_info[] __initconst = {
  888. {NULL, 0}, {"kmalloc-96", 96},
  889. {"kmalloc-192", 192}, {"kmalloc-8", 8},
  890. {"kmalloc-16", 16}, {"kmalloc-32", 32},
  891. {"kmalloc-64", 64}, {"kmalloc-128", 128},
  892. {"kmalloc-256", 256}, {"kmalloc-512", 512},
  893. {"kmalloc-1024", 1024}, {"kmalloc-2048", 2048},
  894. {"kmalloc-4096", 4096}, {"kmalloc-8192", 8192},
  895. {"kmalloc-16384", 16384}, {"kmalloc-32768", 32768},
  896. {"kmalloc-65536", 65536}, {"kmalloc-131072", 131072},
  897. {"kmalloc-262144", 262144}, {"kmalloc-524288", 524288},
  898. {"kmalloc-1048576", 1048576}, {"kmalloc-2097152", 2097152},
  899. {"kmalloc-4194304", 4194304}, {"kmalloc-8388608", 8388608},
  900. {"kmalloc-16777216", 16777216}, {"kmalloc-33554432", 33554432},
  901. {"kmalloc-67108864", 67108864}
  902. };
  903. /*
  904. * Patch up the size_index table if we have strange large alignment
  905. * requirements for the kmalloc array. This is only the case for
  906. * MIPS it seems. The standard arches will not generate any code here.
  907. *
  908. * Largest permitted alignment is 256 bytes due to the way we
  909. * handle the index determination for the smaller caches.
  910. *
  911. * Make sure that nothing crazy happens if someone starts tinkering
  912. * around with ARCH_KMALLOC_MINALIGN
  913. */
  914. void __init setup_kmalloc_cache_index_table(void)
  915. {
  916. unsigned int i;
  917. BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
  918. (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
  919. for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
  920. unsigned int elem = size_index_elem(i);
  921. if (elem >= ARRAY_SIZE(size_index))
  922. break;
  923. size_index[elem] = KMALLOC_SHIFT_LOW;
  924. }
  925. if (KMALLOC_MIN_SIZE >= 64) {
  926. /*
  927. * The 96 byte size cache is not used if the alignment
  928. * is 64 byte.
  929. */
  930. for (i = 64 + 8; i <= 96; i += 8)
  931. size_index[size_index_elem(i)] = 7;
  932. }
  933. if (KMALLOC_MIN_SIZE >= 128) {
  934. /*
  935. * The 192 byte sized cache is not used if the alignment
  936. * is 128 byte. Redirect kmalloc to use the 256 byte cache
  937. * instead.
  938. */
  939. for (i = 128 + 8; i <= 192; i += 8)
  940. size_index[size_index_elem(i)] = 8;
  941. }
  942. }
  943. static void __init new_kmalloc_cache(int idx, slab_flags_t flags)
  944. {
  945. kmalloc_caches[idx] = create_kmalloc_cache(kmalloc_info[idx].name,
  946. kmalloc_info[idx].size, flags, 0,
  947. kmalloc_info[idx].size);
  948. }
  949. /*
  950. * Create the kmalloc array. Some of the regular kmalloc arrays
  951. * may already have been created because they were needed to
  952. * enable allocations for slab creation.
  953. */
  954. void __init create_kmalloc_caches(slab_flags_t flags)
  955. {
  956. int i;
  957. for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
  958. if (!kmalloc_caches[i])
  959. new_kmalloc_cache(i, flags);
  960. /*
  961. * Caches that are not of the two-to-the-power-of size.
  962. * These have to be created immediately after the
  963. * earlier power of two caches
  964. */
  965. if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[1] && i == 6)
  966. new_kmalloc_cache(1, flags);
  967. if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[2] && i == 7)
  968. new_kmalloc_cache(2, flags);
  969. }
  970. /* Kmalloc array is now usable */
  971. slab_state = UP;
  972. #ifdef CONFIG_ZONE_DMA
  973. for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) {
  974. struct kmem_cache *s = kmalloc_caches[i];
  975. if (s) {
  976. unsigned int size = kmalloc_size(i);
  977. char *n = kasprintf(GFP_NOWAIT,
  978. "dma-kmalloc-%u", size);
  979. BUG_ON(!n);
  980. kmalloc_dma_caches[i] = create_kmalloc_cache(n,
  981. size, SLAB_CACHE_DMA | flags, 0, 0);
  982. }
  983. }
  984. #endif
  985. }
  986. #endif /* !CONFIG_SLOB */
  987. /*
  988. * To avoid unnecessary overhead, we pass through large allocation requests
  989. * directly to the page allocator. We use __GFP_COMP, because we will need to
  990. * know the allocation order to free the pages properly in kfree.
  991. */
  992. void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
  993. {
  994. void *ret;
  995. struct page *page;
  996. flags |= __GFP_COMP;
  997. page = alloc_pages(flags, order);
  998. ret = page ? page_address(page) : NULL;
  999. kmemleak_alloc(ret, size, 1, flags);
  1000. kasan_kmalloc_large(ret, size, flags);
  1001. return ret;
  1002. }
  1003. EXPORT_SYMBOL(kmalloc_order);
  1004. #ifdef CONFIG_TRACING
  1005. void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
  1006. {
  1007. void *ret = kmalloc_order(size, flags, order);
  1008. trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
  1009. return ret;
  1010. }
  1011. EXPORT_SYMBOL(kmalloc_order_trace);
  1012. #endif
  1013. #ifdef CONFIG_SLAB_FREELIST_RANDOM
  1014. /* Randomize a generic freelist */
  1015. static void freelist_randomize(struct rnd_state *state, unsigned int *list,
  1016. unsigned int count)
  1017. {
  1018. unsigned int rand;
  1019. unsigned int i;
  1020. for (i = 0; i < count; i++)
  1021. list[i] = i;
  1022. /* Fisher-Yates shuffle */
  1023. for (i = count - 1; i > 0; i--) {
  1024. rand = prandom_u32_state(state);
  1025. rand %= (i + 1);
  1026. swap(list[i], list[rand]);
  1027. }
  1028. }
  1029. /* Create a random sequence per cache */
  1030. int cache_random_seq_create(struct kmem_cache *cachep, unsigned int count,
  1031. gfp_t gfp)
  1032. {
  1033. struct rnd_state state;
  1034. if (count < 2 || cachep->random_seq)
  1035. return 0;
  1036. cachep->random_seq = kcalloc(count, sizeof(unsigned int), gfp);
  1037. if (!cachep->random_seq)
  1038. return -ENOMEM;
  1039. /* Get best entropy at this stage of boot */
  1040. prandom_seed_state(&state, get_random_long());
  1041. freelist_randomize(&state, cachep->random_seq, count);
  1042. return 0;
  1043. }
  1044. /* Destroy the per-cache random freelist sequence */
  1045. void cache_random_seq_destroy(struct kmem_cache *cachep)
  1046. {
  1047. kfree(cachep->random_seq);
  1048. cachep->random_seq = NULL;
  1049. }
  1050. #endif /* CONFIG_SLAB_FREELIST_RANDOM */
  1051. #if defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG)
  1052. #ifdef CONFIG_SLAB
  1053. #define SLABINFO_RIGHTS (0600)
  1054. #else
  1055. #define SLABINFO_RIGHTS (0400)
  1056. #endif
  1057. static void print_slabinfo_header(struct seq_file *m)
  1058. {
  1059. /*
  1060. * Output format version, so at least we can change it
  1061. * without _too_ many complaints.
  1062. */
  1063. #ifdef CONFIG_DEBUG_SLAB
  1064. seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
  1065. #else
  1066. seq_puts(m, "slabinfo - version: 2.1\n");
  1067. #endif
  1068. seq_puts(m, "# name <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab>");
  1069. seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
  1070. seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
  1071. #ifdef CONFIG_DEBUG_SLAB
  1072. seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> <error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
  1073. seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
  1074. #endif
  1075. seq_putc(m, '\n');
  1076. }
  1077. void *slab_start(struct seq_file *m, loff_t *pos)
  1078. {
  1079. mutex_lock(&slab_mutex);
  1080. return seq_list_start(&slab_root_caches, *pos);
  1081. }
  1082. void *slab_next(struct seq_file *m, void *p, loff_t *pos)
  1083. {
  1084. return seq_list_next(p, &slab_root_caches, pos);
  1085. }
  1086. void slab_stop(struct seq_file *m, void *p)
  1087. {
  1088. mutex_unlock(&slab_mutex);
  1089. }
  1090. static void
  1091. memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info)
  1092. {
  1093. struct kmem_cache *c;
  1094. struct slabinfo sinfo;
  1095. if (!is_root_cache(s))
  1096. return;
  1097. for_each_memcg_cache(c, s) {
  1098. memset(&sinfo, 0, sizeof(sinfo));
  1099. get_slabinfo(c, &sinfo);
  1100. info->active_slabs += sinfo.active_slabs;
  1101. info->num_slabs += sinfo.num_slabs;
  1102. info->shared_avail += sinfo.shared_avail;
  1103. info->active_objs += sinfo.active_objs;
  1104. info->num_objs += sinfo.num_objs;
  1105. }
  1106. }
  1107. static void cache_show(struct kmem_cache *s, struct seq_file *m)
  1108. {
  1109. struct slabinfo sinfo;
  1110. memset(&sinfo, 0, sizeof(sinfo));
  1111. get_slabinfo(s, &sinfo);
  1112. memcg_accumulate_slabinfo(s, &sinfo);
  1113. seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
  1114. cache_name(s), sinfo.active_objs, sinfo.num_objs, s->size,
  1115. sinfo.objects_per_slab, (1 << sinfo.cache_order));
  1116. seq_printf(m, " : tunables %4u %4u %4u",
  1117. sinfo.limit, sinfo.batchcount, sinfo.shared);
  1118. seq_printf(m, " : slabdata %6lu %6lu %6lu",
  1119. sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail);
  1120. slabinfo_show_stats(m, s);
  1121. seq_putc(m, '\n');
  1122. }
  1123. static int slab_show(struct seq_file *m, void *p)
  1124. {
  1125. struct kmem_cache *s = list_entry(p, struct kmem_cache, root_caches_node);
  1126. if (p == slab_root_caches.next)
  1127. print_slabinfo_header(m);
  1128. cache_show(s, m);
  1129. return 0;
  1130. }
  1131. void dump_unreclaimable_slab(void)
  1132. {
  1133. struct kmem_cache *s, *s2;
  1134. struct slabinfo sinfo;
  1135. /*
  1136. * Here acquiring slab_mutex is risky since we don't prefer to get
  1137. * sleep in oom path. But, without mutex hold, it may introduce a
  1138. * risk of crash.
  1139. * Use mutex_trylock to protect the list traverse, dump nothing
  1140. * without acquiring the mutex.
  1141. */
  1142. if (!mutex_trylock(&slab_mutex)) {
  1143. pr_warn("excessive unreclaimable slab but cannot dump stats\n");
  1144. return;
  1145. }
  1146. pr_info("Unreclaimable slab info:\n");
  1147. pr_info("Name Used Total\n");
  1148. list_for_each_entry_safe(s, s2, &slab_caches, list) {
  1149. if (!is_root_cache(s) || (s->flags & SLAB_RECLAIM_ACCOUNT))
  1150. continue;
  1151. get_slabinfo(s, &sinfo);
  1152. if (sinfo.num_objs > 0)
  1153. pr_info("%-17s %10luKB %10luKB\n", cache_name(s),
  1154. (sinfo.active_objs * s->size) / 1024,
  1155. (sinfo.num_objs * s->size) / 1024);
  1156. }
  1157. mutex_unlock(&slab_mutex);
  1158. }
  1159. #if defined(CONFIG_MEMCG)
  1160. void *memcg_slab_start(struct seq_file *m, loff_t *pos)
  1161. {
  1162. struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
  1163. mutex_lock(&slab_mutex);
  1164. return seq_list_start(&memcg->kmem_caches, *pos);
  1165. }
  1166. void *memcg_slab_next(struct seq_file *m, void *p, loff_t *pos)
  1167. {
  1168. struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
  1169. return seq_list_next(p, &memcg->kmem_caches, pos);
  1170. }
  1171. void memcg_slab_stop(struct seq_file *m, void *p)
  1172. {
  1173. mutex_unlock(&slab_mutex);
  1174. }
  1175. int memcg_slab_show(struct seq_file *m, void *p)
  1176. {
  1177. struct kmem_cache *s = list_entry(p, struct kmem_cache,
  1178. memcg_params.kmem_caches_node);
  1179. struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
  1180. if (p == memcg->kmem_caches.next)
  1181. print_slabinfo_header(m);
  1182. cache_show(s, m);
  1183. return 0;
  1184. }
  1185. #endif
  1186. /*
  1187. * slabinfo_op - iterator that generates /proc/slabinfo
  1188. *
  1189. * Output layout:
  1190. * cache-name
  1191. * num-active-objs
  1192. * total-objs
  1193. * object size
  1194. * num-active-slabs
  1195. * total-slabs
  1196. * num-pages-per-slab
  1197. * + further values on SMP and with statistics enabled
  1198. */
  1199. static const struct seq_operations slabinfo_op = {
  1200. .start = slab_start,
  1201. .next = slab_next,
  1202. .stop = slab_stop,
  1203. .show = slab_show,
  1204. };
  1205. static int slabinfo_open(struct inode *inode, struct file *file)
  1206. {
  1207. return seq_open(file, &slabinfo_op);
  1208. }
  1209. static const struct file_operations proc_slabinfo_operations = {
  1210. .open = slabinfo_open,
  1211. .read = seq_read,
  1212. .write = slabinfo_write,
  1213. .llseek = seq_lseek,
  1214. .release = seq_release,
  1215. };
  1216. static int __init slab_proc_init(void)
  1217. {
  1218. proc_create("slabinfo", SLABINFO_RIGHTS, NULL,
  1219. &proc_slabinfo_operations);
  1220. return 0;
  1221. }
  1222. module_init(slab_proc_init);
  1223. #endif /* CONFIG_SLAB || CONFIG_SLUB_DEBUG */
  1224. static __always_inline void *__do_krealloc(const void *p, size_t new_size,
  1225. gfp_t flags)
  1226. {
  1227. void *ret;
  1228. size_t ks = 0;
  1229. if (p)
  1230. ks = ksize(p);
  1231. if (ks >= new_size) {
  1232. kasan_krealloc((void *)p, new_size, flags);
  1233. return (void *)p;
  1234. }
  1235. ret = kmalloc_track_caller(new_size, flags);
  1236. if (ret && p)
  1237. memcpy(ret, p, ks);
  1238. return ret;
  1239. }
  1240. /**
  1241. * __krealloc - like krealloc() but don't free @p.
  1242. * @p: object to reallocate memory for.
  1243. * @new_size: how many bytes of memory are required.
  1244. * @flags: the type of memory to allocate.
  1245. *
  1246. * This function is like krealloc() except it never frees the originally
  1247. * allocated buffer. Use this if you don't want to free the buffer immediately
  1248. * like, for example, with RCU.
  1249. */
  1250. void *__krealloc(const void *p, size_t new_size, gfp_t flags)
  1251. {
  1252. if (unlikely(!new_size))
  1253. return ZERO_SIZE_PTR;
  1254. return __do_krealloc(p, new_size, flags);
  1255. }
  1256. EXPORT_SYMBOL(__krealloc);
  1257. /**
  1258. * krealloc - reallocate memory. The contents will remain unchanged.
  1259. * @p: object to reallocate memory for.
  1260. * @new_size: how many bytes of memory are required.
  1261. * @flags: the type of memory to allocate.
  1262. *
  1263. * The contents of the object pointed to are preserved up to the
  1264. * lesser of the new and old sizes. If @p is %NULL, krealloc()
  1265. * behaves exactly like kmalloc(). If @new_size is 0 and @p is not a
  1266. * %NULL pointer, the object pointed to is freed.
  1267. */
  1268. void *krealloc(const void *p, size_t new_size, gfp_t flags)
  1269. {
  1270. void *ret;
  1271. if (unlikely(!new_size)) {
  1272. kfree(p);
  1273. return ZERO_SIZE_PTR;
  1274. }
  1275. ret = __do_krealloc(p, new_size, flags);
  1276. if (ret && p != ret)
  1277. kfree(p);
  1278. return ret;
  1279. }
  1280. EXPORT_SYMBOL(krealloc);
  1281. /**
  1282. * kzfree - like kfree but zero memory
  1283. * @p: object to free memory of
  1284. *
  1285. * The memory of the object @p points to is zeroed before freed.
  1286. * If @p is %NULL, kzfree() does nothing.
  1287. *
  1288. * Note: this function zeroes the whole allocated buffer which can be a good
  1289. * deal bigger than the requested buffer size passed to kmalloc(). So be
  1290. * careful when using this function in performance sensitive code.
  1291. */
  1292. void kzfree(const void *p)
  1293. {
  1294. size_t ks;
  1295. void *mem = (void *)p;
  1296. if (unlikely(ZERO_OR_NULL_PTR(mem)))
  1297. return;
  1298. ks = ksize(mem);
  1299. memset(mem, 0, ks);
  1300. kfree(mem);
  1301. }
  1302. EXPORT_SYMBOL(kzfree);
  1303. /* Tracepoints definitions. */
  1304. EXPORT_TRACEPOINT_SYMBOL(kmalloc);
  1305. EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
  1306. EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
  1307. EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
  1308. EXPORT_TRACEPOINT_SYMBOL(kfree);
  1309. EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
  1310. int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
  1311. {
  1312. if (__should_failslab(s, gfpflags))
  1313. return -ENOMEM;
  1314. return 0;
  1315. }
  1316. ALLOW_ERROR_INJECTION(should_failslab, ERRNO);