kmem.c 37 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372
  1. /*
  2. * Copyright (c) 2010-2018 Richard Braun.
  3. *
  4. * This program is free software: you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation, either version 3 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. *
  17. *
  18. * This allocator is based on the paper "The Slab Allocator: An Object-Caching
  19. * Kernel Memory Allocator" by Jeff Bonwick.
  20. *
  21. * It allows the allocation of objects (i.e. fixed-size typed buffers) from
  22. * caches and is efficient in both space and time. This implementation follows
  23. * many of the indications from the paper mentioned. The most notable
  24. * differences are outlined below.
  25. *
  26. * The per-cache self-scaling hash table for buffer-to-bufctl conversion,
  27. * described in 3.2.3 "Slab Layout for Large Objects", has been replaced with
  28. * a constant time buffer-to-slab lookup that relies on the VM system.
  29. *
  30. * Slabs are allocated from the physical page allocator if they're page-sized,
  31. * and from kernel virtual memory if they're bigger, in order to prevent
  32. * physical memory fragmentation from making slab allocations fail.
  33. *
  34. * This implementation uses per-CPU pools of objects, which service most
  35. * allocation requests. These pools act as caches (but are named differently
  36. * to avoid confusion with CPU caches) that reduce contention on multiprocessor
  37. * systems. When a pool is empty and cannot provide an object, it is filled by
  38. * transferring multiple objects from the slab layer. The symmetric case is
  39. * handled likewise.
  40. *
  41. * TODO Rework the CPU pool layer to use the SLQB algorithm by Nick Piggin.
  42. */
  43. #include <assert.h>
  44. #include <limits.h>
  45. #include <stdbool.h>
  46. #include <stddef.h>
  47. #include <stdint.h>
  48. #include <stdio.h>
  49. #include <string.h>
  50. #include <kern/adaptive_lock.h>
  51. #include <kern/init.h>
  52. #include <kern/list.h>
  53. #include <kern/log.h>
  54. #include <kern/log2.h>
  55. #include <kern/kmem.h>
  56. #include <kern/macros.h>
  57. #include <kern/panic.h>
  58. #include <kern/shell.h>
  59. #include <kern/thread.h>
  60. #include <machine/cpu.h>
  61. #include <machine/page.h>
  62. #include <machine/pmap.h>
  63. #include <vm/kmem.h>
  64. #include <vm/page.h>
  65. // Minimum required alignment.
  66. #define KMEM_ALIGN_MIN 8
  67. /*
  68. * Minimum number of buffers per slab.
  69. *
  70. * This value is ignored when the slab size exceeds a threshold.
  71. */
  72. #define KMEM_MIN_BUFS_PER_SLAB 8
  73. /*
  74. * Special slab size beyond which the minimum number of buffers per slab is
  75. * ignored when computing the slab size of a cache.
  76. */
  77. #define KMEM_SLAB_SIZE_THRESHOLD (8 * PAGE_SIZE)
  78. /*
  79. * Special buffer size under which slab data is unconditionally allocated
  80. * from its associated slab.
  81. */
  82. #define KMEM_BUF_SIZE_THRESHOLD (PAGE_SIZE / 8)
  83. /*
  84. * The transfer size of a CPU pool is computed by dividing the pool size by
  85. * this value.
  86. */
  87. #define KMEM_CPU_POOL_TRANSFER_RATIO 2
  88. // Logarithm of the size of the smallest general cache.
  89. #define KMEM_CACHES_FIRST_ORDER 5
  90. // Number of caches backing general purpose allocations.
  91. #define KMEM_NR_MEM_CACHES 13
  92. // Error codes for kmem_cache_error().
  93. #define KMEM_ERR_INVALID 0 // Invalid address being freed
  94. #define KMEM_ERR_DOUBLEFREE 1 // Freeing already free address
  95. #define KMEM_ERR_BUFTAG 2 // Invalid buftag content
  96. #define KMEM_ERR_MODIFIED 3 // Buffer modified while free
  97. #define KMEM_ERR_REDZONE 4 // Redzone violation
  98. #ifdef KMEM_USE_CPU_LAYER
  99. /*
  100. * Available CPU pool types.
  101. *
  102. * For each entry, the CPU pool size applies from the entry buf_size
  103. * (excluded) up to (and including) the buf_size of the preceding entry.
  104. *
  105. * See struct kmem_cpu_pool_type for a description of the values.
  106. */
  107. static struct kmem_cpu_pool_type kmem_cpu_pool_types[] __read_mostly =
  108. {
  109. { 32768, 1, 0, NULL },
  110. { 4096, 8, CPU_L1_SIZE, NULL },
  111. { 256, 64, CPU_L1_SIZE, NULL },
  112. { 0, 128, CPU_L1_SIZE, NULL }
  113. };
  114. // Caches where CPU pool arrays are allocated from.
  115. #define KMEM_CPU_ARRAY_CACHE_SIZE ARRAY_SIZE (kmem_cpu_pool_types)
  116. static struct kmem_cache kmem_cpu_array_caches[KMEM_CPU_ARRAY_CACHE_SIZE];
  117. #endif
  118. // Cache for off slab data.
  119. static struct kmem_cache kmem_slab_cache;
  120. // General caches array.
  121. static struct kmem_cache kmem_caches[KMEM_NR_MEM_CACHES];
  122. // List of all caches managed by the allocator.
  123. static struct list kmem_cache_list;
  124. static struct adaptive_lock kmem_cache_list_lock;
  125. static void kmem_cache_error (struct kmem_cache *cache, void *buf, int error,
  126. void *arg);
  127. static void* kmem_cache_alloc_from_slab (struct kmem_cache *cache);
  128. static void kmem_cache_free_to_slab (struct kmem_cache *cache, void *buf);
  129. /*
  130. * Buffer descriptor.
  131. *
  132. * For normal caches (i.e. without KMEM_CF_VERIFY), bufctls are located at the
  133. * end of (but inside) each buffer. If KMEM_CF_VERIFY is set, bufctls are
  134. * located after each buffer.
  135. *
  136. * When an object is allocated to a client, its bufctl isn't used. This memory
  137. * is instead used for redzoning if cache debugging is in effect.
  138. */
  139. union kmem_bufctl
  140. {
  141. union kmem_bufctl *next;
  142. uintptr_t redzone;
  143. };
  144. // Redzone guard word.
  145. #ifdef __LP64__
  146. #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  147. #define KMEM_REDZONE_WORD 0xfeedfacefeedfaceUL
  148. #else
  149. #define KMEM_REDZONE_WORD 0xcefaedfecefaedfeUL
  150. #endif
  151. #else
  152. #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  153. #define KMEM_REDZONE_WORD 0xfeedfaceUL
  154. #else
  155. #define KMEM_REDZONE_WORD 0xcefaedfeUL
  156. #endif
  157. #endif
  158. // Redzone byte for padding.
  159. #define KMEM_REDZONE_BYTE 0xbb
  160. /*
  161. * Buffer tag.
  162. *
  163. * This structure is only used for KMEM_CF_VERIFY caches. It is located after
  164. * the bufctl and includes information about the state of the buffer it
  165. * describes (allocated or not). It should be thought of as a debugging
  166. * extension of the bufctl.
  167. */
  168. struct kmem_buftag
  169. {
  170. uintptr_t state;
  171. };
  172. // Values the buftag state member can take.
  173. #ifdef __LP64__
  174. #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  175. #define KMEM_BUFTAG_ALLOC 0xa110c8eda110c8edUL
  176. #define KMEM_BUFTAG_FREE 0xf4eeb10cf4eeb10cUL
  177. #else
  178. #define KMEM_BUFTAG_ALLOC 0xedc810a1edc810a1UL
  179. #define KMEM_BUFTAG_FREE 0x0cb1eef40cb1eef4UL
  180. #endif
  181. #else
  182. #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  183. #define KMEM_BUFTAG_ALLOC 0xa110c8edUL
  184. #define KMEM_BUFTAG_FREE 0xf4eeb10cUL
  185. #else
  186. #define KMEM_BUFTAG_ALLOC 0xedc810a1UL
  187. #define KMEM_BUFTAG_FREE 0x0cb1eef4UL
  188. #endif
  189. #endif
  190. /*
  191. * Free and uninitialized patterns.
  192. *
  193. * These values are unconditionally 64-bit wide since buffers are at least
  194. * 8-byte aligned.
  195. */
  196. #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  197. #define KMEM_FREE_PATTERN 0xdeadbeefdeadbeefULL
  198. #define KMEM_UNINIT_PATTERN 0xbaddcafebaddcafeULL
  199. #else
  200. #define KMEM_FREE_PATTERN 0xefbeaddeefbeaddeULL
  201. #define KMEM_UNINIT_PATTERN 0xfecaddbafecaddbaULL
  202. #endif
  203. /*
  204. * Cache flags.
  205. *
  206. * The flags don't change once set and can be tested without locking.
  207. */
  208. #define KMEM_CF_SLAB_EXTERNAL 0x1 // Slab data is off slab.
  209. #define KMEM_CF_VERIFY 0x2 // Debugging facilities enabled.
  210. /*
  211. * Page-aligned collection of unconstructed buffers.
  212. *
  213. * This structure is either allocated from the slab cache, or, when internal
  214. * fragmentation allows it, or if forced by the cache creator, from the slab
  215. * it describes.
  216. */
  217. struct kmem_slab
  218. {
  219. struct list node;
  220. size_t nr_refs;
  221. union kmem_bufctl *first_free;
  222. void *addr;
  223. };
  224. static void*
  225. kmem_buf_verify_bytes (void *buf, void *pattern, size_t size)
  226. {
  227. char *end = buf + size;
  228. for (char *ptr = buf, *pattern_ptr = pattern;
  229. ptr < end; ++ptr, ++pattern_ptr)
  230. if (*ptr != *pattern_ptr)
  231. return (ptr);
  232. return (NULL);
  233. }
  234. static void
  235. kmem_buf_fill (void *buf, uint64_t pattern, size_t size)
  236. {
  237. assert (P2ALIGNED ((uintptr_t) buf, sizeof (uint64_t)));
  238. assert (P2ALIGNED (size, sizeof (uint64_t)));
  239. uint64_t *end = (uint64_t *)((char *)buf + size);
  240. for (uint64_t *ptr = buf; ptr < end; )
  241. *ptr++ = pattern;
  242. }
  243. static void*
  244. kmem_buf_verify_fill (void *buf, uint64_t old, uint64_t new, size_t size)
  245. {
  246. assert (P2ALIGNED ((uintptr_t) buf, sizeof (uint64_t)));
  247. assert (P2ALIGNED (size, sizeof (uint64_t)));
  248. uint64_t *end = (uint64_t *)((char *)buf + size);
  249. for (uint64_t *ptr = buf; ptr < end; ++ptr)
  250. {
  251. if (*ptr != old)
  252. return (kmem_buf_verify_bytes (ptr, &old, sizeof (old)));
  253. *ptr = new;
  254. }
  255. return (NULL);
  256. }
  257. static inline union kmem_bufctl*
  258. kmem_buf_to_bufctl (void *buf, struct kmem_cache *cache)
  259. {
  260. return ((union kmem_bufctl *)((char *)buf + cache->bufctl_dist));
  261. }
  262. static inline struct kmem_buftag*
  263. kmem_buf_to_buftag (void *buf, struct kmem_cache *cache)
  264. {
  265. return ((struct kmem_buftag *)((char *)buf + cache->buftag_dist));
  266. }
  267. static inline void*
  268. kmem_bufctl_to_buf (union kmem_bufctl *bufctl, struct kmem_cache *cache)
  269. {
  270. return ((char *)bufctl - cache->bufctl_dist);
  271. }
  272. static inline bool
  273. kmem_pagealloc_is_virtual (size_t size)
  274. {
  275. return (size > PAGE_SIZE);
  276. }
  277. static inline void
  278. kmem_mark_page_sleepable (struct vm_page *page)
  279. {
  280. _Auto value = (uintptr_t)vm_page_get_priv (page);
  281. vm_page_set_priv (page, (void *)(value | 2));
  282. }
  283. static void*
  284. kmem_pagealloc (size_t size, uint32_t pflags)
  285. {
  286. if (kmem_pagealloc_is_virtual (size))
  287. return (vm_kmem_alloc (size));
  288. size_t order = vm_page_order (size);
  289. _Auto page = vm_page_alloc (order, VM_PAGE_SEL_DIRECTMAP, VM_PAGE_KMEM,
  290. (pflags & KMEM_ALLOC_SLEEP) ? VM_PAGE_SLEEP : 0);
  291. if (! page)
  292. return (NULL);
  293. else if (pflags & KMEM_ALLOC_SLEEP)
  294. kmem_mark_page_sleepable (page);
  295. return (vm_page_direct_ptr (page));
  296. }
  297. static void
  298. kmem_pagefree (void *ptr, size_t size)
  299. {
  300. if (kmem_pagealloc_is_virtual (size))
  301. vm_kmem_free (ptr, size);
  302. else
  303. {
  304. _Auto page = vm_page_lookup (vm_page_direct_pa ((uintptr_t)ptr));
  305. assert (page);
  306. uint32_t flags = ((uintptr_t)vm_page_get_priv (page) & 2) ?
  307. VM_PAGE_SLEEP : 0;
  308. vm_page_free (page, vm_page_order (size), flags);
  309. }
  310. }
  311. static void
  312. kmem_slab_create_verify (struct kmem_slab *slab, struct kmem_cache *cache)
  313. {
  314. size_t buf_size = cache->buf_size;
  315. void *buf = slab->addr;
  316. _Auto buftag = kmem_buf_to_buftag (buf, cache);
  317. for (size_t buffers = cache->bufs_per_slab; buffers; --buffers)
  318. {
  319. kmem_buf_fill (buf, KMEM_FREE_PATTERN, cache->bufctl_dist);
  320. buftag->state = KMEM_BUFTAG_FREE;
  321. buf = (char *)buf + buf_size;
  322. buftag = kmem_buf_to_buftag (buf, cache);
  323. }
  324. }
  325. /*
  326. * Create an empty slab for a cache.
  327. *
  328. * The caller must drop all locks before calling this function.
  329. */
  330. static struct kmem_slab*
  331. kmem_slab_create (struct kmem_cache *cache, size_t color, uint32_t pflags)
  332. {
  333. void *slab_buf = kmem_pagealloc (cache->slab_size, pflags);
  334. if (! slab_buf)
  335. return (NULL);
  336. struct kmem_slab *slab;
  337. if (cache->flags & KMEM_CF_SLAB_EXTERNAL)
  338. {
  339. slab = kmem_cache_alloc (&kmem_slab_cache);
  340. if (! slab)
  341. {
  342. kmem_pagefree (slab_buf, cache->slab_size);
  343. return (NULL);
  344. }
  345. }
  346. else
  347. slab = (struct kmem_slab *) ((char *)slab_buf + cache->slab_size) - 1;
  348. list_node_init (&slab->node);
  349. slab->nr_refs = 0;
  350. slab->first_free = NULL;
  351. slab->addr = slab_buf + color;
  352. size_t buf_size = cache->buf_size;
  353. _Auto bufctl = kmem_buf_to_bufctl (slab->addr, cache);
  354. for (size_t buffers = cache->bufs_per_slab; buffers; --buffers)
  355. {
  356. bufctl->next = slab->first_free;
  357. slab->first_free = bufctl;
  358. bufctl = (union kmem_bufctl *)((char *) bufctl + buf_size);
  359. }
  360. if (cache->flags & KMEM_CF_VERIFY)
  361. kmem_slab_create_verify (slab, cache);
  362. return (slab);
  363. }
  364. static inline uintptr_t
  365. kmem_slab_buf (const struct kmem_slab *slab)
  366. {
  367. return (P2ALIGN ((uintptr_t)slab->addr, PAGE_SIZE));
  368. }
  369. #ifdef KMEM_USE_CPU_LAYER
  370. static void
  371. kmem_cpu_pool_init (struct kmem_cpu_pool *cpu_pool, struct kmem_cache *cache)
  372. {
  373. adaptive_lock_init (&cpu_pool->lock);
  374. cpu_pool->flags = cache->flags;
  375. cpu_pool->size = 0;
  376. cpu_pool->transfer_size = 0;
  377. cpu_pool->nr_objs = 0;
  378. cpu_pool->array = NULL;
  379. }
  380. static inline struct kmem_cpu_pool*
  381. kmem_cpu_pool_get (struct kmem_cache *cache)
  382. {
  383. return (&cache->cpu_pools[cpu_id ()]);
  384. }
  385. static inline void
  386. kmem_cpu_pool_build (struct kmem_cpu_pool *cpu_pool, struct kmem_cache *cache,
  387. void **array)
  388. {
  389. cpu_pool->size = cache->cpu_pool_type->array_size;
  390. cpu_pool->transfer_size = (cpu_pool->size +
  391. KMEM_CPU_POOL_TRANSFER_RATIO - 1) /
  392. KMEM_CPU_POOL_TRANSFER_RATIO;
  393. cpu_pool->array = array;
  394. }
  395. static inline void*
  396. kmem_cpu_pool_pop (struct kmem_cpu_pool *cpu_pool)
  397. {
  398. return (cpu_pool->array[--cpu_pool->nr_objs]);
  399. }
  400. static inline void
  401. kmem_cpu_pool_push (struct kmem_cpu_pool *cpu_pool, void *obj)
  402. {
  403. cpu_pool->array[cpu_pool->nr_objs++] = obj;
  404. }
  405. static int
  406. kmem_cpu_pool_fill (struct kmem_cpu_pool *cpu_pool, struct kmem_cache *cache)
  407. {
  408. ADAPTIVE_LOCK_GUARD (&cache->lock);
  409. int i;
  410. for (i = 0; i < cpu_pool->transfer_size; ++i)
  411. {
  412. void *buf = kmem_cache_alloc_from_slab (cache);
  413. if (! buf)
  414. break;
  415. kmem_cpu_pool_push (cpu_pool, buf);
  416. }
  417. return (i);
  418. }
  419. static void
  420. kmem_cpu_pool_drain (struct kmem_cpu_pool *cpu_pool, struct kmem_cache *cache)
  421. {
  422. ADAPTIVE_LOCK_GUARD (&cache->lock);
  423. for (int i = cpu_pool->transfer_size; i > 0; --i)
  424. {
  425. void *obj = kmem_cpu_pool_pop (cpu_pool);
  426. kmem_cache_free_to_slab (cache, obj);
  427. }
  428. }
  429. #endif // KMEM_USE_CPU_LAYER
  430. static void
  431. kmem_cache_error (struct kmem_cache *cache, void *buf, int error, void *arg)
  432. {
  433. printf ("kmem: error: cache: %s, buffer: %p\n", cache->name, buf);
  434. switch (error)
  435. {
  436. case KMEM_ERR_INVALID:
  437. panic ("kmem: freeing invalid address");
  438. break;
  439. case KMEM_ERR_DOUBLEFREE:
  440. panic ("kmem: attempting to free the same address twice");
  441. break;
  442. case KMEM_ERR_BUFTAG:
  443. panic ("kmem: invalid buftag content, buftag state: %p",
  444. (void *)((struct kmem_buftag *)arg)->state);
  445. break;
  446. case KMEM_ERR_MODIFIED:
  447. panic ("kmem: free buffer modified, fault address: %p, "
  448. "offset in buffer: %td", arg, arg - buf);
  449. break;
  450. case KMEM_ERR_REDZONE:
  451. panic ("kmem: write beyond end of buffer, fault address: %p, "
  452. "offset in buffer: %td", arg, arg - buf);
  453. break;
  454. default:
  455. panic ("kmem: unknown error");
  456. }
  457. __builtin_unreachable ();
  458. }
  459. /*
  460. * Compute properties such as slab size for the given cache.
  461. *
  462. * Once the slab size is known, this function sets the related properties
  463. * (buffers per slab and maximum color). It can also set some KMEM_CF_xxx
  464. * flags depending on the resulting layout.
  465. */
  466. static void
  467. kmem_cache_compute_properties (struct kmem_cache *cache, int flags)
  468. {
  469. if (cache->buf_size < KMEM_BUF_SIZE_THRESHOLD)
  470. flags |= KMEM_CACHE_NOOFFSLAB;
  471. cache->slab_size = PAGE_SIZE;
  472. bool embed;
  473. size_t size;
  474. while (1)
  475. {
  476. if (flags & KMEM_CACHE_NOOFFSLAB)
  477. embed = true;
  478. else
  479. {
  480. size_t waste = cache->slab_size % cache->buf_size;
  481. embed = (sizeof (struct kmem_slab) <= waste);
  482. }
  483. size = cache->slab_size;
  484. if (embed)
  485. size -= sizeof (struct kmem_slab);
  486. if (size >= cache->buf_size)
  487. break;
  488. cache->slab_size += PAGE_SIZE;
  489. }
  490. /*
  491. * A user may force page allocation in order to guarantee that virtual
  492. * memory isn't used. This is normally done for objects that are used
  493. * to implement virtual memory and avoid circular dependencies.
  494. *
  495. * When forcing the use of direct page allocation, only allow single
  496. * page allocations in order to completely prevent physical memory
  497. * fragmentation from making slab allocations fail.
  498. */
  499. if ((flags & KMEM_CACHE_PAGE_ONLY) && cache->slab_size != PAGE_SIZE)
  500. panic ("kmem: unable to guarantee page allocation");
  501. cache->bufs_per_slab = size / cache->buf_size;
  502. cache->color_max = size % cache->buf_size;
  503. /*
  504. * Make sure the first page of a slab buffer can be found from the
  505. * address of the first object.
  506. *
  507. * See kmem_slab_buf().
  508. */
  509. if (cache->color_max >= PAGE_SIZE)
  510. cache->color_max = 0;
  511. if (! embed)
  512. cache->flags |= KMEM_CF_SLAB_EXTERNAL;
  513. }
  514. void
  515. kmem_cache_init (struct kmem_cache *cache, const char *name, size_t obj_size,
  516. size_t align, kmem_ctor_fn_t ctor, int flags)
  517. {
  518. #ifdef CONFIG_KMEM_DEBUG
  519. cache->flags = KMEM_CF_VERIFY;
  520. #else
  521. cache->flags = 0;
  522. #endif
  523. if (flags & KMEM_CACHE_VERIFY)
  524. cache->flags |= KMEM_CF_VERIFY;
  525. if (align < KMEM_ALIGN_MIN)
  526. align = KMEM_ALIGN_MIN;
  527. assert (obj_size > 0);
  528. assert (ISP2 (align));
  529. assert (align < PAGE_SIZE);
  530. size_t buf_size = P2ROUND (obj_size, align);
  531. adaptive_lock_init (&cache->lock);
  532. list_node_init (&cache->node);
  533. list_init (&cache->partial_slabs);
  534. list_init (&cache->free_slabs);
  535. cache->obj_size = obj_size;
  536. cache->align = align;
  537. cache->buf_size = buf_size;
  538. cache->bufctl_dist = buf_size - sizeof (union kmem_bufctl);
  539. cache->color = 0;
  540. cache->nr_objs = 0;
  541. cache->nr_bufs = 0;
  542. cache->nr_slabs = 0;
  543. cache->nr_free_slabs = 0;
  544. cache->ctor = ctor;
  545. strlcpy (cache->name, name, sizeof (cache->name));
  546. cache->buftag_dist = 0;
  547. cache->redzone_pad = 0;
  548. if (cache->flags & KMEM_CF_VERIFY)
  549. {
  550. cache->bufctl_dist = buf_size;
  551. cache->buftag_dist = cache->bufctl_dist + sizeof (union kmem_bufctl);
  552. cache->redzone_pad = cache->bufctl_dist - cache->obj_size;
  553. buf_size += sizeof (union kmem_bufctl) + sizeof (struct kmem_buftag);
  554. buf_size = P2ROUND (buf_size, align);
  555. cache->buf_size = buf_size;
  556. }
  557. kmem_cache_compute_properties (cache, flags);
  558. #ifdef KMEM_USE_CPU_LAYER
  559. for (cache->cpu_pool_type = kmem_cpu_pool_types;
  560. buf_size <= cache->cpu_pool_type->buf_size;
  561. ++cache->cpu_pool_type);
  562. for (size_t i = 0; i < ARRAY_SIZE (cache->cpu_pools); ++i)
  563. kmem_cpu_pool_init (&cache->cpu_pools[i], cache);
  564. #endif
  565. ADAPTIVE_LOCK_GUARD (&kmem_cache_list_lock);
  566. list_insert_tail (&kmem_cache_list, &cache->node);
  567. }
  568. static inline int
  569. kmem_cache_empty (struct kmem_cache *cache)
  570. {
  571. return (cache->nr_objs == cache->nr_bufs);
  572. }
  573. static struct kmem_slab*
  574. kmem_cache_buf_to_slab (const struct kmem_cache *cache, void *buf)
  575. {
  576. if ((cache->flags & KMEM_CF_SLAB_EXTERNAL) ||
  577. cache->slab_size != PAGE_SIZE)
  578. return (NULL);
  579. return ((struct kmem_slab *)vm_page_end ((uintptr_t)buf) - 1);
  580. }
  581. static inline bool
  582. kmem_cache_registration_required (const struct kmem_cache *cache)
  583. {
  584. return ((cache->flags & KMEM_CF_SLAB_EXTERNAL) ||
  585. (cache->flags & KMEM_CF_VERIFY) ||
  586. cache->slab_size != PAGE_SIZE);
  587. }
  588. static void
  589. kmem_page_set_priv (struct vm_page *page, void *priv)
  590. {
  591. uintptr_t val = (uintptr_t)vm_page_get_priv (page) | (uintptr_t)priv;
  592. vm_page_set_priv (page, (void *)val);
  593. }
  594. static void*
  595. kmem_page_get_priv (struct vm_page *page)
  596. {
  597. uintptr_t val = (uintptr_t)vm_page_get_priv (page);
  598. return ((void *)(val & ~2));
  599. }
  600. static void
  601. kmem_cache_register (struct kmem_cache *cache, struct kmem_slab *slab)
  602. {
  603. assert (kmem_cache_registration_required (cache));
  604. assert (!slab->nr_refs);
  605. bool virtual = kmem_pagealloc_is_virtual (cache->slab_size);
  606. for (uintptr_t va = kmem_slab_buf (slab), end = va + cache->slab_size;
  607. va < end; va += PAGE_SIZE)
  608. {
  609. phys_addr_t pa;
  610. if (virtual)
  611. {
  612. int error = pmap_kextract (va, &pa);
  613. assert (! error);
  614. }
  615. else
  616. pa = vm_page_direct_pa (va);
  617. _Auto page = vm_page_lookup (pa);
  618. assert (page);
  619. assert ((virtual && vm_page_type (page) == VM_PAGE_KERNEL) ||
  620. (!virtual && vm_page_type (page) == VM_PAGE_KMEM));
  621. assert (!kmem_page_get_priv (page));
  622. kmem_page_set_priv (page, slab);
  623. }
  624. }
  625. static struct kmem_slab*
  626. kmem_cache_lookup (struct kmem_cache *cache, void *buf)
  627. {
  628. assert (kmem_cache_registration_required (cache));
  629. bool virtual = kmem_pagealloc_is_virtual (cache->slab_size);
  630. uintptr_t va = (uintptr_t) buf;
  631. phys_addr_t pa;
  632. if (virtual)
  633. {
  634. int error = pmap_kextract (va, &pa);
  635. if (error)
  636. return (NULL);
  637. }
  638. else
  639. pa = vm_page_direct_pa (va);
  640. _Auto page = vm_page_lookup (pa);
  641. if (! page)
  642. return (NULL);
  643. if ((virtual && (vm_page_type (page) != VM_PAGE_KERNEL)) ||
  644. (!virtual && (vm_page_type (page) != VM_PAGE_KMEM)))
  645. return (NULL);
  646. struct kmem_slab *slab = kmem_page_get_priv (page);
  647. assert ((uintptr_t)buf >= kmem_slab_buf (slab));
  648. assert ((uintptr_t)buf < kmem_slab_buf (slab) + cache->slab_size);
  649. return (slab);
  650. }
  651. static int
  652. kmem_cache_grow (struct kmem_cache *cache, uint32_t pflags)
  653. {
  654. adaptive_lock_acquire (&cache->lock);
  655. if (!kmem_cache_empty (cache))
  656. {
  657. adaptive_lock_release (&cache->lock);
  658. return (1);
  659. }
  660. size_t color = cache->color;
  661. cache->color += cache->align;
  662. if (cache->color > cache->color_max)
  663. cache->color = 0;
  664. adaptive_lock_release (&cache->lock);
  665. struct kmem_slab *slab = kmem_slab_create (cache, color, pflags);
  666. adaptive_lock_acquire (&cache->lock);
  667. if (slab)
  668. {
  669. list_insert_head (&cache->free_slabs, &slab->node);
  670. cache->nr_bufs += cache->bufs_per_slab;
  671. ++cache->nr_slabs;
  672. ++cache->nr_free_slabs;
  673. if (kmem_cache_registration_required (cache))
  674. kmem_cache_register (cache, slab);
  675. }
  676. /*
  677. * Even if our slab creation failed, another thread might have succeeded
  678. * in growing the cache.
  679. */
  680. int empty = kmem_cache_empty (cache);
  681. adaptive_lock_release (&cache->lock);
  682. return (!empty);
  683. }
  684. /*
  685. * Allocate a raw (unconstructed) buffer from the slab layer of a cache.
  686. *
  687. * The cache must be locked before calling this function.
  688. */
  689. static void*
  690. kmem_cache_alloc_from_slab (struct kmem_cache *cache)
  691. {
  692. struct kmem_slab *slab;
  693. if (!list_empty (&cache->partial_slabs))
  694. slab = list_first_entry (&cache->partial_slabs, struct kmem_slab, node);
  695. else if (!list_empty (&cache->free_slabs))
  696. slab = list_first_entry (&cache->free_slabs, struct kmem_slab, node);
  697. else
  698. return (NULL);
  699. union kmem_bufctl *bufctl = slab->first_free;
  700. assert (bufctl);
  701. slab->first_free = bufctl->next;
  702. ++cache->nr_objs;
  703. if (++slab->nr_refs == cache->bufs_per_slab)
  704. { // The slab has become complete.
  705. list_remove (&slab->node);
  706. if (slab->nr_refs == 1)
  707. --cache->nr_free_slabs;
  708. }
  709. else if (slab->nr_refs == 1)
  710. {
  711. /*
  712. * The slab has become partial. Insert the new slab at the end of
  713. * the list to reduce fragmentation.
  714. */
  715. list_remove (&slab->node);
  716. list_insert_tail (&cache->partial_slabs, &slab->node);
  717. --cache->nr_free_slabs;
  718. }
  719. return (kmem_bufctl_to_buf (bufctl, cache));
  720. }
  721. /*
  722. * Release a buffer to the slab layer of a cache.
  723. *
  724. * The cache must be locked before calling this function.
  725. */
  726. static void
  727. kmem_cache_free_to_slab (struct kmem_cache *cache, void *buf)
  728. {
  729. struct kmem_slab *slab = kmem_cache_buf_to_slab (cache, buf);
  730. if (! slab)
  731. {
  732. slab = kmem_cache_lookup (cache, buf);
  733. assert (slab);
  734. }
  735. assert (slab->nr_refs >= 1);
  736. assert (slab->nr_refs <= cache->bufs_per_slab);
  737. union kmem_bufctl *bufctl = kmem_buf_to_bufctl (buf, cache);
  738. bufctl->next = slab->first_free;
  739. slab->first_free = bufctl;
  740. --cache->nr_objs;
  741. if (--slab->nr_refs == 0)
  742. {
  743. /*
  744. * The slab has become free - If it was partial,
  745. * remove it from its list.
  746. */
  747. if (cache->bufs_per_slab != 1)
  748. list_remove (&slab->node);
  749. list_insert_head (&cache->free_slabs, &slab->node);
  750. ++cache->nr_free_slabs;
  751. }
  752. else if (slab->nr_refs == cache->bufs_per_slab - 1)
  753. // The slab has become partial.
  754. list_insert_head (&cache->partial_slabs, &slab->node);
  755. }
  756. static void
  757. kmem_cache_alloc_verify (struct kmem_cache *cache, void *buf)
  758. {
  759. struct kmem_buftag *buftag = kmem_buf_to_buftag (buf, cache);
  760. if (buftag->state != KMEM_BUFTAG_FREE)
  761. kmem_cache_error (cache, buf, KMEM_ERR_BUFTAG, buftag);
  762. void *addr = kmem_buf_verify_fill (buf, KMEM_FREE_PATTERN,
  763. KMEM_UNINIT_PATTERN, cache->bufctl_dist);
  764. if (addr)
  765. kmem_cache_error (cache, buf, KMEM_ERR_MODIFIED, addr);
  766. addr = (char *)buf + cache->obj_size;
  767. memset (addr, KMEM_REDZONE_BYTE, cache->redzone_pad);
  768. union kmem_bufctl *bufctl = kmem_buf_to_bufctl (buf, cache);
  769. bufctl->redzone = KMEM_REDZONE_WORD;
  770. buftag->state = KMEM_BUFTAG_ALLOC;
  771. }
  772. static void*
  773. kmem_cache_alloc_impl (struct kmem_cache *cache, uint32_t pflags)
  774. {
  775. #ifdef KMEM_USE_CPU_LAYER
  776. thread_pin ();
  777. struct kmem_cpu_pool *cpu_pool = kmem_cpu_pool_get (cache);
  778. adaptive_lock_acquire (&cpu_pool->lock);
  779. fast_alloc:
  780. if (likely (cpu_pool->nr_objs > 0))
  781. {
  782. void *buf = kmem_cpu_pool_pop (cpu_pool);
  783. bool verify = (cpu_pool->flags & KMEM_CF_VERIFY);
  784. adaptive_lock_release (&cpu_pool->lock);
  785. thread_unpin ();
  786. if (verify)
  787. kmem_cache_alloc_verify (cache, buf);
  788. return (buf);
  789. }
  790. if (cpu_pool->array)
  791. {
  792. if (!kmem_cpu_pool_fill (cpu_pool, cache))
  793. {
  794. adaptive_lock_release (&cpu_pool->lock);
  795. thread_unpin ();
  796. if (!kmem_cache_grow (cache, pflags))
  797. return (NULL);
  798. thread_pin ();
  799. cpu_pool = kmem_cpu_pool_get (cache);
  800. adaptive_lock_acquire (&cpu_pool->lock);
  801. }
  802. goto fast_alloc;
  803. }
  804. adaptive_lock_release (&cpu_pool->lock);
  805. thread_unpin ();
  806. #endif // KMEM_USE_CPU_LAYER
  807. slab_alloc:
  808. adaptive_lock_acquire (&cache->lock);
  809. void *buf = kmem_cache_alloc_from_slab (cache);
  810. adaptive_lock_release (&cache->lock);
  811. if (! buf)
  812. {
  813. if (!kmem_cache_grow (cache, pflags))
  814. return (NULL);
  815. goto slab_alloc;
  816. }
  817. if (cache->flags & KMEM_CF_VERIFY)
  818. kmem_cache_alloc_verify (cache, buf);
  819. return (buf);
  820. }
  821. void*
  822. kmem_cache_alloc2 (struct kmem_cache *cache, uint32_t pflags)
  823. {
  824. void *ret = kmem_cache_alloc_impl (cache, pflags);
  825. if (ret && cache->ctor)
  826. cache->ctor (ret);
  827. return (ret);
  828. }
  829. static void
  830. kmem_cache_free_verify (struct kmem_cache *cache, void *buf)
  831. {
  832. struct kmem_slab *slab = kmem_cache_lookup (cache, buf);
  833. if (! slab)
  834. kmem_cache_error (cache, buf, KMEM_ERR_INVALID, NULL);
  835. uintptr_t slabend = P2ALIGN ((uintptr_t)slab->addr +
  836. cache->slab_size, PAGE_SIZE);
  837. if ((uintptr_t)buf >= slabend)
  838. kmem_cache_error (cache, buf, KMEM_ERR_INVALID, NULL);
  839. if (((uintptr_t)buf - (uintptr_t)slab->addr) % cache->buf_size)
  840. kmem_cache_error (cache, buf, KMEM_ERR_INVALID, NULL);
  841. // As the buffer address is valid, accessing its buftag is safe.
  842. struct kmem_buftag *buftag = kmem_buf_to_buftag (buf, cache);
  843. if (buftag->state == KMEM_BUFTAG_ALLOC)
  844. ;
  845. else if (buftag->state == KMEM_BUFTAG_FREE)
  846. kmem_cache_error (cache, buf, KMEM_ERR_DOUBLEFREE, NULL);
  847. else
  848. kmem_cache_error (cache, buf, KMEM_ERR_BUFTAG, buftag);
  849. unsigned char *redzone_byte = (unsigned char *)buf + cache->obj_size;
  850. union kmem_bufctl *bufctl = kmem_buf_to_bufctl (buf, cache);
  851. for (; redzone_byte < (unsigned char *)bufctl; ++redzone_byte)
  852. if (*redzone_byte != KMEM_REDZONE_BYTE)
  853. kmem_cache_error (cache, buf, KMEM_ERR_REDZONE, redzone_byte);
  854. if (bufctl->redzone != KMEM_REDZONE_WORD)
  855. {
  856. uintptr_t word = KMEM_REDZONE_WORD;
  857. redzone_byte = kmem_buf_verify_bytes (&bufctl->redzone, &word,
  858. sizeof (bufctl->redzone));
  859. kmem_cache_error (cache, buf, KMEM_ERR_REDZONE, redzone_byte);
  860. }
  861. kmem_buf_fill (buf, KMEM_FREE_PATTERN, cache->bufctl_dist);
  862. buftag->state = KMEM_BUFTAG_FREE;
  863. }
  864. void
  865. kmem_cache_free (struct kmem_cache *cache, void *obj)
  866. {
  867. #ifdef KMEM_USE_CPU_LAYER
  868. thread_pin ();
  869. struct kmem_cpu_pool *cpu_pool = kmem_cpu_pool_get (cache);
  870. if (cpu_pool->flags & KMEM_CF_VERIFY)
  871. {
  872. thread_unpin ();
  873. kmem_cache_free_verify (cache, obj);
  874. thread_pin ();
  875. cpu_pool = kmem_cpu_pool_get (cache);
  876. }
  877. adaptive_lock_acquire (&cpu_pool->lock);
  878. fast_free:
  879. if (likely (cpu_pool->nr_objs < cpu_pool->size))
  880. {
  881. kmem_cpu_pool_push (cpu_pool, obj);
  882. adaptive_lock_release (&cpu_pool->lock);
  883. thread_unpin ();
  884. return;
  885. }
  886. if (cpu_pool->array)
  887. {
  888. kmem_cpu_pool_drain (cpu_pool, cache);
  889. goto fast_free;
  890. }
  891. adaptive_lock_release (&cpu_pool->lock);
  892. void **array = kmem_cache_alloc (cache->cpu_pool_type->array_cache);
  893. if (array)
  894. {
  895. adaptive_lock_acquire (&cpu_pool->lock);
  896. /*
  897. * Another thread may have built the CPU pool while the lock was
  898. * dropped.
  899. */
  900. if (cpu_pool->array)
  901. {
  902. adaptive_lock_release (&cpu_pool->lock);
  903. thread_unpin ();
  904. kmem_cache_free (cache->cpu_pool_type->array_cache, array);
  905. thread_pin ();
  906. cpu_pool = kmem_cpu_pool_get (cache);
  907. adaptive_lock_acquire (&cpu_pool->lock);
  908. goto fast_free;
  909. }
  910. kmem_cpu_pool_build (cpu_pool, cache, array);
  911. goto fast_free;
  912. }
  913. thread_unpin ();
  914. #else
  915. if (cache->flags & KMEM_CF_VERIFY)
  916. kmem_cache_free_verify (cache, obj);
  917. #endif // KMEM_USE_CPU_LAYER
  918. adaptive_lock_acquire (&cache->lock);
  919. kmem_cache_free_to_slab (cache, obj);
  920. adaptive_lock_release (&cache->lock);
  921. }
  922. void
  923. kmem_cache_info (struct kmem_cache *cache, struct stream *stream)
  924. {
  925. char flags_str[64];
  926. snprintf (flags_str, sizeof (flags_str), "%s%s",
  927. (cache->flags & KMEM_CF_SLAB_EXTERNAL) ? " SLAB_EXTERNAL" : "",
  928. (cache->flags & KMEM_CF_VERIFY) ? " VERIFY" : "");
  929. ADAPTIVE_LOCK_GUARD (&cache->lock);
  930. fmt_xprintf (stream, "kmem: flags: 0x%x%s\n",
  931. cache->flags, flags_str);
  932. fmt_xprintf (stream, "kmem: obj_size: %zu\n", cache->obj_size);
  933. fmt_xprintf (stream, "kmem: align: %zu\n", cache->align);
  934. fmt_xprintf (stream, "kmem: buf_size: %zu\n", cache->buf_size);
  935. fmt_xprintf (stream, "kmem: bufctl_dist: %zu\n", cache->bufctl_dist);
  936. fmt_xprintf (stream, "kmem: slab_size: %zu\n", cache->slab_size);
  937. fmt_xprintf (stream, "kmem: color_max: %zu\n", cache->color_max);
  938. fmt_xprintf (stream, "kmem: bufs_per_slab: %zu\n", cache->bufs_per_slab);
  939. fmt_xprintf (stream, "kmem: nr_objs: %zu\n", cache->nr_objs);
  940. fmt_xprintf (stream, "kmem: nr_bufs: %zu\n", cache->nr_bufs);
  941. fmt_xprintf (stream, "kmem: nr_slabs: %zu\n", cache->nr_slabs);
  942. fmt_xprintf (stream, "kmem: nr_free_slabs: %zu\n", cache->nr_free_slabs);
  943. fmt_xprintf (stream, "kmem: buftag_dist: %zu\n", cache->buftag_dist);
  944. fmt_xprintf (stream, "kmem: redzone_pad: %zu\n", cache->redzone_pad);
  945. #ifdef KMEM_USE_CPU_LAYER
  946. fmt_xprintf (stream, "kmem: cpu_pool_size: %d\n",
  947. cache->cpu_pool_type->array_size);
  948. #endif
  949. }
  950. #ifdef CONFIG_SHELL
  951. static struct kmem_cache*
  952. kmem_lookup_cache (const char *name)
  953. {
  954. ADAPTIVE_LOCK_GUARD (&kmem_cache_list_lock);
  955. struct kmem_cache *cache;
  956. list_for_each_entry (&kmem_cache_list, cache, node)
  957. if (strcmp (cache->name, name) == 0)
  958. return (cache);
  959. return (NULL);
  960. }
  961. static void
  962. kmem_shell_info (struct shell *shell __unused, int argc, char **argv)
  963. {
  964. if (argc < 2)
  965. kmem_info (shell->stream);
  966. else
  967. {
  968. struct kmem_cache *cache = kmem_lookup_cache (argv[1]);
  969. if (! cache)
  970. fmt_xprintf (shell->stream, "kmem: info: cache not found\n");
  971. else
  972. kmem_cache_info (cache, shell->stream);
  973. }
  974. }
  975. static struct shell_cmd kmem_shell_cmds[] =
  976. {
  977. SHELL_CMD_INITIALIZER ("kmem_info", kmem_shell_info,
  978. "kmem_info [<cache_name>]",
  979. "display information about kernel memory and caches"),
  980. };
  981. static int __init
  982. kmem_setup_shell (void)
  983. {
  984. SHELL_REGISTER_CMDS (kmem_shell_cmds, shell_get_main_cmd_set ());
  985. return (0);
  986. }
  987. INIT_OP_DEFINE (kmem_setup_shell,
  988. INIT_OP_DEP (kmem_setup, true),
  989. INIT_OP_DEP (printf_setup, true),
  990. INIT_OP_DEP (shell_setup, true),
  991. INIT_OP_DEP (thread_setup, true));
  992. #endif // CONFIG_SHELL
  993. #ifdef KMEM_USE_CPU_LAYER
  994. static void
  995. kmem_bootstrap_cpu (void)
  996. {
  997. char name[KMEM_NAME_SIZE];
  998. for (size_t i = 0; i < ARRAY_SIZE (kmem_cpu_pool_types); ++i)
  999. {
  1000. struct kmem_cpu_pool_type *cpu_pool_type = &kmem_cpu_pool_types[i];
  1001. cpu_pool_type->array_cache = &kmem_cpu_array_caches[i];
  1002. sprintf (name, "kmem_cpu_array_%d", cpu_pool_type->array_size);
  1003. size_t size = sizeof (void *) * cpu_pool_type->array_size;
  1004. kmem_cache_init (cpu_pool_type->array_cache, name, size,
  1005. cpu_pool_type->array_align, NULL, 0);
  1006. }
  1007. }
  1008. #endif // KMEM_USE_CPU_LAYER
  1009. static int __init
  1010. kmem_bootstrap (void)
  1011. {
  1012. // Make sure a bufctl can always be stored in a buffer.
  1013. assert (sizeof (union kmem_bufctl) <= KMEM_ALIGN_MIN);
  1014. list_init (&kmem_cache_list);
  1015. adaptive_lock_init (&kmem_cache_list_lock);
  1016. #ifdef KMEM_USE_CPU_LAYER
  1017. kmem_bootstrap_cpu ();
  1018. #endif // KMEM_USE_CPU_LAYER
  1019. // Prevent off slab data for the slab cache to avoid infinite recursion.
  1020. kmem_cache_init (&kmem_slab_cache, "kmem_slab", sizeof (struct kmem_slab),
  1021. 0, NULL, KMEM_CACHE_NOOFFSLAB);
  1022. size_t size = 1 << KMEM_CACHES_FIRST_ORDER;
  1023. char name[KMEM_NAME_SIZE];
  1024. for (size_t i = 0; i < ARRAY_SIZE (kmem_caches); ++i)
  1025. {
  1026. sprintf (name, "kmem_%zu", size);
  1027. kmem_cache_init (&kmem_caches[i], name, size, 0, NULL, 0);
  1028. size <<= 1;
  1029. }
  1030. return (0);
  1031. }
  1032. INIT_OP_DEFINE (kmem_bootstrap,
  1033. INIT_OP_DEP (thread_bootstrap, true),
  1034. INIT_OP_DEP (vm_page_setup, true));
  1035. static int __init
  1036. kmem_setup (void)
  1037. {
  1038. return (0);
  1039. }
  1040. INIT_OP_DEFINE (kmem_setup,
  1041. INIT_OP_DEP (kmem_bootstrap, true),
  1042. INIT_OP_DEP (vm_kmem_setup, true));
  1043. static inline size_t
  1044. kmem_get_index (size_t size)
  1045. {
  1046. return (log2_order (size) - KMEM_CACHES_FIRST_ORDER);
  1047. }
  1048. static void
  1049. kmem_alloc_verify (struct kmem_cache *cache, void *buf, size_t size)
  1050. {
  1051. assert (size <= cache->obj_size);
  1052. memset ((char *)buf + size, KMEM_REDZONE_BYTE, cache->obj_size - size);
  1053. }
  1054. void*
  1055. kmem_alloc2 (size_t size, uint32_t flags)
  1056. {
  1057. if (! size)
  1058. return (NULL);
  1059. size_t index = kmem_get_index (size);
  1060. if (index < ARRAY_SIZE (kmem_caches))
  1061. {
  1062. struct kmem_cache *cache = &kmem_caches[index];
  1063. void *buf = kmem_cache_alloc_impl (cache, flags);
  1064. if (buf && (cache->flags & KMEM_CF_VERIFY))
  1065. kmem_alloc_verify (cache, buf, size);
  1066. return (buf);
  1067. }
  1068. return (kmem_pagealloc (size, flags));
  1069. }
  1070. void*
  1071. kmem_zalloc (size_t size)
  1072. {
  1073. void *ptr = kmem_alloc (size);
  1074. return (ptr ? memset (ptr, 0, size) : ptr);
  1075. }
  1076. static void
  1077. kmem_free_verify (struct kmem_cache *cache, void *buf, size_t size)
  1078. {
  1079. assert (size <= cache->obj_size);
  1080. unsigned char *redzone_byte = buf + size,
  1081. *redzone_end = buf + cache->obj_size;
  1082. for (; redzone_byte < redzone_end; ++redzone_byte)
  1083. if (*redzone_byte != KMEM_REDZONE_BYTE)
  1084. kmem_cache_error (cache, buf, KMEM_ERR_REDZONE, redzone_byte);
  1085. }
  1086. void
  1087. kmem_free (void *ptr, size_t size)
  1088. {
  1089. if (!ptr || !size)
  1090. return;
  1091. size_t index = kmem_get_index (size);
  1092. if (index < ARRAY_SIZE (kmem_caches))
  1093. {
  1094. struct kmem_cache *cache = &kmem_caches[index];
  1095. if (cache->flags & KMEM_CF_VERIFY)
  1096. kmem_free_verify (cache, ptr, size);
  1097. kmem_cache_free (cache, ptr);
  1098. }
  1099. else
  1100. kmem_pagefree (ptr, size);
  1101. }
  1102. void
  1103. kmem_info (struct stream *stream)
  1104. {
  1105. size_t total = 0, total_physical = 0, total_virtual = 0, total_reclaim = 0,
  1106. total_reclaim_physical = 0, total_reclaim_virtual = 0;
  1107. fmt_xprintf (stream, "kmem: cache "
  1108. "obj slab bufs objs bufs total reclaimable\n");
  1109. fmt_xprintf (stream, "kmem: name size size /slab "
  1110. "usage count memory memory\n");
  1111. adaptive_lock_acquire (&kmem_cache_list_lock);
  1112. struct kmem_cache *cache;
  1113. list_for_each_entry (&kmem_cache_list, cache, node)
  1114. {
  1115. ADAPTIVE_LOCK_GUARD (&cache->lock);
  1116. size_t mem_usage = (cache->nr_slabs * cache->slab_size) >> 10,
  1117. mem_reclaim = (cache->nr_free_slabs * cache->slab_size) >> 10;
  1118. total += mem_usage;
  1119. total_reclaim += mem_reclaim;
  1120. if (kmem_pagealloc_is_virtual (cache->slab_size))
  1121. {
  1122. total_virtual += mem_usage;
  1123. total_reclaim_virtual += mem_reclaim;
  1124. }
  1125. else
  1126. {
  1127. total_physical += mem_usage;
  1128. total_reclaim_physical += mem_reclaim;
  1129. }
  1130. fmt_xprintf (stream,
  1131. "kmem: %-19s %6zu %3zuk %4zu %6zu %6zu %7zuk %10zuk\n",
  1132. cache->name, cache->obj_size, cache->slab_size >> 10,
  1133. cache->bufs_per_slab, cache->nr_objs, cache->nr_bufs,
  1134. mem_usage, mem_reclaim);
  1135. }
  1136. adaptive_lock_release (&kmem_cache_list_lock);
  1137. fmt_xprintf (stream, "total: %zuk (phys: %zuk virt: %zuk), "
  1138. "reclaim: %zuk (phys: %zuk virt: %zuk)\n",
  1139. total, total_physical, total_virtual,
  1140. total_reclaim, total_reclaim_physical, total_reclaim_virtual);
  1141. }