kmem.c 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377
  1. /*
  2. * Copyright (c) 2010-2018 Richard Braun.
  3. *
  4. * This program is free software: you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation, either version 3 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. *
  17. *
  18. * This allocator is based on the paper "The Slab Allocator: An Object-Caching
  19. * Kernel Memory Allocator" by Jeff Bonwick.
  20. *
  21. * It allows the allocation of objects (i.e. fixed-size typed buffers) from
  22. * caches and is efficient in both space and time. This implementation follows
  23. * many of the indications from the paper mentioned. The most notable
  24. * differences are outlined below.
  25. *
  26. * The per-cache self-scaling hash table for buffer-to-bufctl conversion,
  27. * described in 3.2.3 "Slab Layout for Large Objects", has been replaced with
  28. * a constant time buffer-to-slab lookup that relies on the VM system.
  29. *
  30. * Slabs are allocated from the physical page allocator if they're page-sized,
  31. * and from kernel virtual memory if they're bigger, in order to prevent
  32. * physical memory fragmentation from making slab allocations fail.
  33. *
  34. * This implementation uses per-CPU pools of objects, which service most
  35. * allocation requests. These pools act as caches (but are named differently
  36. * to avoid confusion with CPU caches) that reduce contention on multiprocessor
  37. * systems. When a pool is empty and cannot provide an object, it is filled by
  38. * transferring multiple objects from the slab layer. The symmetric case is
  39. * handled likewise.
  40. *
  41. * TODO Rework the CPU pool layer to use the SLQB algorithm by Nick Piggin.
  42. */
  43. #include <assert.h>
  44. #include <limits.h>
  45. #include <stdbool.h>
  46. #include <stddef.h>
  47. #include <stdint.h>
  48. #include <stdio.h>
  49. #include <string.h>
  50. #include <kern/adaptive_lock.h>
  51. #include <kern/init.h>
  52. #include <kern/list.h>
  53. #include <kern/log.h>
  54. #include <kern/log2.h>
  55. #include <kern/kmem.h>
  56. #include <kern/macros.h>
  57. #include <kern/panic.h>
  58. #include <kern/shell.h>
  59. #include <kern/thread.h>
  60. #include <machine/cpu.h>
  61. #include <machine/page.h>
  62. #include <machine/pmap.h>
  63. #include <vm/kmem.h>
  64. #include <vm/page.h>
  65. // Minimum required alignment.
  66. #define KMEM_ALIGN_MIN 8
  67. /*
  68. * Minimum number of buffers per slab.
  69. *
  70. * This value is ignored when the slab size exceeds a threshold.
  71. */
  72. #define KMEM_MIN_BUFS_PER_SLAB 8
  73. /*
  74. * Special slab size beyond which the minimum number of buffers per slab is
  75. * ignored when computing the slab size of a cache.
  76. */
  77. #define KMEM_SLAB_SIZE_THRESHOLD (8 * PAGE_SIZE)
  78. /*
  79. * Special buffer size under which slab data is unconditionally allocated
  80. * from its associated slab.
  81. */
  82. #define KMEM_BUF_SIZE_THRESHOLD (PAGE_SIZE / 8)
  83. /*
  84. * The transfer size of a CPU pool is computed by dividing the pool size by
  85. * this value.
  86. */
  87. #define KMEM_CPU_POOL_TRANSFER_RATIO 2
  88. // Logarithm of the size of the smallest general cache.
  89. #define KMEM_CACHES_FIRST_ORDER 5
  90. // Number of caches backing general purpose allocations.
  91. #define KMEM_NR_MEM_CACHES 13
  92. // Options for kmem_cache_alloc_verify().
  93. #define KMEM_AV_NOCONSTRUCT 0
  94. #define KMEM_AV_CONSTRUCT 1
  95. // Error codes for kmem_cache_error().
  96. #define KMEM_ERR_INVALID 0 // Invalid address being freed
  97. #define KMEM_ERR_DOUBLEFREE 1 // Freeing already free address
  98. #define KMEM_ERR_BUFTAG 2 // Invalid buftag content
  99. #define KMEM_ERR_MODIFIED 3 // Buffer modified while free
  100. #define KMEM_ERR_REDZONE 4 // Redzone violation
  101. #ifdef KMEM_USE_CPU_LAYER
  102. /*
  103. * Available CPU pool types.
  104. *
  105. * For each entry, the CPU pool size applies from the entry buf_size
  106. * (excluded) up to (and including) the buf_size of the preceding entry.
  107. *
  108. * See struct kmem_cpu_pool_type for a description of the values.
  109. */
  110. static struct kmem_cpu_pool_type kmem_cpu_pool_types[] __read_mostly =
  111. {
  112. { 32768, 1, 0, NULL },
  113. { 4096, 8, CPU_L1_SIZE, NULL },
  114. { 256, 64, CPU_L1_SIZE, NULL },
  115. { 0, 128, CPU_L1_SIZE, NULL }
  116. };
  117. // Caches where CPU pool arrays are allocated from.
  118. #define KMEM_CPU_ARRAY_CACHE_SIZE ARRAY_SIZE (kmem_cpu_pool_types)
  119. static struct kmem_cache kmem_cpu_array_caches[KMEM_CPU_ARRAY_CACHE_SIZE];
  120. #endif
  121. // Cache for off slab data.
  122. static struct kmem_cache kmem_slab_cache;
  123. // General caches array.
  124. static struct kmem_cache kmem_caches[KMEM_NR_MEM_CACHES];
  125. // List of all caches managed by the allocator.
  126. static struct list kmem_cache_list;
  127. static struct adaptive_lock kmem_cache_list_lock;
  128. static void kmem_cache_error (struct kmem_cache *cache, void *buf, int error,
  129. void *arg);
  130. static void* kmem_cache_alloc_from_slab (struct kmem_cache *cache);
  131. static void kmem_cache_free_to_slab (struct kmem_cache *cache, void *buf);
  132. /*
  133. * Buffer descriptor.
  134. *
  135. * For normal caches (i.e. without KMEM_CF_VERIFY), bufctls are located at the
  136. * end of (but inside) each buffer. If KMEM_CF_VERIFY is set, bufctls are
  137. * located after each buffer.
  138. *
  139. * When an object is allocated to a client, its bufctl isn't used. This memory
  140. * is instead used for redzoning if cache debugging is in effect.
  141. */
  142. union kmem_bufctl
  143. {
  144. union kmem_bufctl *next;
  145. uintptr_t redzone;
  146. };
  147. // Redzone guard word.
  148. #ifdef __LP64__
  149. #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  150. #define KMEM_REDZONE_WORD 0xfeedfacefeedfaceUL
  151. #else
  152. #define KMEM_REDZONE_WORD 0xcefaedfecefaedfeUL
  153. #endif
  154. #else
  155. #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  156. #define KMEM_REDZONE_WORD 0xfeedfaceUL
  157. #else
  158. #define KMEM_REDZONE_WORD 0xcefaedfeUL
  159. #endif
  160. #endif
  161. // Redzone byte for padding.
  162. #define KMEM_REDZONE_BYTE 0xbb
  163. /*
  164. * Buffer tag.
  165. *
  166. * This structure is only used for KMEM_CF_VERIFY caches. It is located after
  167. * the bufctl and includes information about the state of the buffer it
  168. * describes (allocated or not). It should be thought of as a debugging
  169. * extension of the bufctl.
  170. */
  171. struct kmem_buftag
  172. {
  173. uintptr_t state;
  174. };
  175. // Values the buftag state member can take.
  176. #ifdef __LP64__
  177. #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  178. #define KMEM_BUFTAG_ALLOC 0xa110c8eda110c8edUL
  179. #define KMEM_BUFTAG_FREE 0xf4eeb10cf4eeb10cUL
  180. #else
  181. #define KMEM_BUFTAG_ALLOC 0xedc810a1edc810a1UL
  182. #define KMEM_BUFTAG_FREE 0x0cb1eef40cb1eef4UL
  183. #endif
  184. #else
  185. #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  186. #define KMEM_BUFTAG_ALLOC 0xa110c8edUL
  187. #define KMEM_BUFTAG_FREE 0xf4eeb10cUL
  188. #else
  189. #define KMEM_BUFTAG_ALLOC 0xedc810a1UL
  190. #define KMEM_BUFTAG_FREE 0x0cb1eef4UL
  191. #endif
  192. #endif
  193. /*
  194. * Free and uninitialized patterns.
  195. *
  196. * These values are unconditionally 64-bit wide since buffers are at least
  197. * 8-byte aligned.
  198. */
  199. #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  200. #define KMEM_FREE_PATTERN 0xdeadbeefdeadbeefULL
  201. #define KMEM_UNINIT_PATTERN 0xbaddcafebaddcafeULL
  202. #else
  203. #define KMEM_FREE_PATTERN 0xefbeaddeefbeaddeULL
  204. #define KMEM_UNINIT_PATTERN 0xfecaddbafecaddbaULL
  205. #endif
  206. /*
  207. * Cache flags.
  208. *
  209. * The flags don't change once set and can be tested without locking.
  210. */
  211. #define KMEM_CF_SLAB_EXTERNAL 0x1 // Slab data is off slab.
  212. #define KMEM_CF_VERIFY 0x2 // Debugging facilities enabled.
  213. /*
  214. * Page-aligned collection of unconstructed buffers.
  215. *
  216. * This structure is either allocated from the slab cache, or, when internal
  217. * fragmentation allows it, or if forced by the cache creator, from the slab
  218. * it describes.
  219. */
  220. struct kmem_slab
  221. {
  222. struct list node;
  223. size_t nr_refs;
  224. union kmem_bufctl *first_free;
  225. void *addr;
  226. };
  227. static void*
  228. kmem_buf_verify_bytes (void *buf, void *pattern, size_t size)
  229. {
  230. char *end = buf + size;
  231. for (char *ptr = buf, *pattern_ptr = pattern;
  232. ptr < end; ptr++, pattern_ptr++)
  233. if (*ptr != *pattern_ptr)
  234. return (ptr);
  235. return (NULL);
  236. }
  237. static void
  238. kmem_buf_fill (void *buf, uint64_t pattern, size_t size)
  239. {
  240. assert (P2ALIGNED ((uintptr_t) buf, sizeof (uint64_t)));
  241. assert (P2ALIGNED (size, sizeof (uint64_t)));
  242. uint64_t *end = (uint64_t *)((char *)buf + size);
  243. for (uint64_t *ptr = buf; ptr < end; ptr++)
  244. *ptr = pattern;
  245. }
  246. static void*
  247. kmem_buf_verify_fill (void *buf, uint64_t old, uint64_t new, size_t size)
  248. {
  249. assert (P2ALIGNED ((uintptr_t) buf, sizeof (uint64_t)));
  250. assert (P2ALIGNED (size, sizeof (uint64_t)));
  251. uint64_t *end = (uint64_t *)((char *)buf + size);
  252. for (uint64_t *ptr = buf; ptr < end; ptr++)
  253. {
  254. if (*ptr != old)
  255. return (kmem_buf_verify_bytes (ptr, &old, sizeof (old)));
  256. *ptr = new;
  257. }
  258. return (NULL);
  259. }
  260. static inline union kmem_bufctl*
  261. kmem_buf_to_bufctl (void *buf, struct kmem_cache *cache)
  262. {
  263. return ((union kmem_bufctl *)((char *)buf + cache->bufctl_dist));
  264. }
  265. static inline struct kmem_buftag*
  266. kmem_buf_to_buftag (void *buf, struct kmem_cache *cache)
  267. {
  268. return ((struct kmem_buftag *)((char *)buf + cache->buftag_dist));
  269. }
  270. static inline void*
  271. kmem_bufctl_to_buf (union kmem_bufctl *bufctl, struct kmem_cache *cache)
  272. {
  273. return ((char *)bufctl - cache->bufctl_dist);
  274. }
  275. static inline bool
  276. kmem_pagealloc_is_virtual (size_t size)
  277. {
  278. return (size > PAGE_SIZE);
  279. }
  280. static inline void
  281. kmem_mark_page_sleepable (struct vm_page *page)
  282. {
  283. _Auto value = (uintptr_t)vm_page_get_priv (page);
  284. vm_page_set_priv (page, (void *)(value | 2));
  285. }
  286. static void*
  287. kmem_pagealloc (size_t size, uint32_t pflags)
  288. {
  289. if (kmem_pagealloc_is_virtual (size))
  290. return (vm_kmem_alloc (size));
  291. size_t order = vm_page_order (size);
  292. _Auto page = vm_page_alloc (order, VM_PAGE_SEL_DIRECTMAP, VM_PAGE_KMEM,
  293. (pflags & KMEM_ALLOC_SLEEP) ? VM_PAGE_SLEEP : 0);
  294. if (! page)
  295. return (NULL);
  296. else if (pflags & KMEM_ALLOC_SLEEP)
  297. kmem_mark_page_sleepable (page);
  298. return (vm_page_direct_ptr (page));
  299. }
  300. static void
  301. kmem_pagefree (void *ptr, size_t size)
  302. {
  303. if (kmem_pagealloc_is_virtual (size))
  304. vm_kmem_free (ptr, size);
  305. else
  306. {
  307. _Auto page = vm_page_lookup (vm_page_direct_pa ((uintptr_t)ptr));
  308. assert (page);
  309. uint32_t flags = ((uintptr_t)vm_page_get_priv (page) & 2) ?
  310. VM_PAGE_SLEEP : 0;
  311. vm_page_free (page, vm_page_order (size), flags);
  312. }
  313. }
  314. static void
  315. kmem_slab_create_verify (struct kmem_slab *slab, struct kmem_cache *cache)
  316. {
  317. size_t buf_size = cache->buf_size;
  318. void *buf = slab->addr;
  319. _Auto buftag = kmem_buf_to_buftag (buf, cache);
  320. for (size_t buffers = cache->bufs_per_slab; buffers; --buffers)
  321. {
  322. kmem_buf_fill (buf, KMEM_FREE_PATTERN, cache->bufctl_dist);
  323. buftag->state = KMEM_BUFTAG_FREE;
  324. buf = (char *)buf + buf_size;
  325. buftag = kmem_buf_to_buftag (buf, cache);
  326. }
  327. }
  328. /*
  329. * Create an empty slab for a cache.
  330. *
  331. * The caller must drop all locks before calling this function.
  332. */
  333. static struct kmem_slab*
  334. kmem_slab_create (struct kmem_cache *cache, size_t color, uint32_t pflags)
  335. {
  336. void *slab_buf = kmem_pagealloc (cache->slab_size, pflags);
  337. if (! slab_buf)
  338. return (NULL);
  339. struct kmem_slab *slab;
  340. if (cache->flags & KMEM_CF_SLAB_EXTERNAL)
  341. {
  342. slab = kmem_cache_alloc (&kmem_slab_cache);
  343. if (! slab)
  344. {
  345. kmem_pagefree (slab_buf, cache->slab_size);
  346. return (NULL);
  347. }
  348. }
  349. else
  350. slab = (struct kmem_slab *) ((char *)slab_buf + cache->slab_size) - 1;
  351. list_node_init (&slab->node);
  352. slab->nr_refs = 0;
  353. slab->first_free = NULL;
  354. slab->addr = slab_buf + color;
  355. size_t buf_size = cache->buf_size;
  356. _Auto bufctl = kmem_buf_to_bufctl (slab->addr, cache);
  357. for (size_t buffers = cache->bufs_per_slab; buffers; --buffers)
  358. {
  359. bufctl->next = slab->first_free;
  360. slab->first_free = bufctl;
  361. bufctl = (union kmem_bufctl *)((char *) bufctl + buf_size);
  362. }
  363. if (cache->flags & KMEM_CF_VERIFY)
  364. kmem_slab_create_verify (slab, cache);
  365. return (slab);
  366. }
  367. static inline uintptr_t
  368. kmem_slab_buf (const struct kmem_slab *slab)
  369. {
  370. return (P2ALIGN ((uintptr_t)slab->addr, PAGE_SIZE));
  371. }
  372. #ifdef KMEM_USE_CPU_LAYER
  373. static void
  374. kmem_cpu_pool_init (struct kmem_cpu_pool *cpu_pool, struct kmem_cache *cache)
  375. {
  376. adaptive_lock_init (&cpu_pool->lock);
  377. cpu_pool->flags = cache->flags;
  378. cpu_pool->size = 0;
  379. cpu_pool->transfer_size = 0;
  380. cpu_pool->nr_objs = 0;
  381. cpu_pool->array = NULL;
  382. }
  383. static inline struct kmem_cpu_pool*
  384. kmem_cpu_pool_get (struct kmem_cache *cache)
  385. {
  386. return (&cache->cpu_pools[cpu_id ()]);
  387. }
  388. static inline void
  389. kmem_cpu_pool_build (struct kmem_cpu_pool *cpu_pool, struct kmem_cache *cache,
  390. void **array)
  391. {
  392. cpu_pool->size = cache->cpu_pool_type->array_size;
  393. cpu_pool->transfer_size = (cpu_pool->size +
  394. KMEM_CPU_POOL_TRANSFER_RATIO - 1) /
  395. KMEM_CPU_POOL_TRANSFER_RATIO;
  396. cpu_pool->array = array;
  397. }
  398. static inline void*
  399. kmem_cpu_pool_pop (struct kmem_cpu_pool *cpu_pool)
  400. {
  401. return (cpu_pool->array[--cpu_pool->nr_objs]);
  402. }
  403. static inline void
  404. kmem_cpu_pool_push (struct kmem_cpu_pool *cpu_pool, void *obj)
  405. {
  406. cpu_pool->array[cpu_pool->nr_objs++] = obj;
  407. }
  408. static int
  409. kmem_cpu_pool_fill (struct kmem_cpu_pool *cpu_pool, struct kmem_cache *cache)
  410. {
  411. kmem_ctor_fn_t ctor = (cpu_pool->flags & KMEM_CF_VERIFY) ?
  412. NULL : cache->ctor;
  413. ADAPTIVE_LOCK_GUARD (&cache->lock);
  414. int i;
  415. for (i = 0; i < cpu_pool->transfer_size; i++)
  416. {
  417. void *buf = kmem_cache_alloc_from_slab (cache);
  418. if (! buf)
  419. break;
  420. else if (ctor)
  421. ctor (buf);
  422. kmem_cpu_pool_push (cpu_pool, buf);
  423. }
  424. return (i);
  425. }
  426. static void
  427. kmem_cpu_pool_drain (struct kmem_cpu_pool *cpu_pool, struct kmem_cache *cache)
  428. {
  429. ADAPTIVE_LOCK_GUARD (&cache->lock);
  430. for (int i = cpu_pool->transfer_size; i > 0; --i)
  431. {
  432. void *obj = kmem_cpu_pool_pop (cpu_pool);
  433. kmem_cache_free_to_slab (cache, obj);
  434. }
  435. }
  436. #endif // KMEM_USE_CPU_LAYER
  437. static void
  438. kmem_cache_error (struct kmem_cache *cache, void *buf, int error, void *arg)
  439. {
  440. printf ("kmem: error: cache: %s, buffer: %p\n", cache->name, buf);
  441. switch (error)
  442. {
  443. case KMEM_ERR_INVALID:
  444. panic ("kmem: freeing invalid address");
  445. break;
  446. case KMEM_ERR_DOUBLEFREE:
  447. panic ("kmem: attempting to free the same address twice");
  448. break;
  449. case KMEM_ERR_BUFTAG:
  450. panic ("kmem: invalid buftag content, buftag state: %p",
  451. (void *)((struct kmem_buftag *)arg)->state);
  452. break;
  453. case KMEM_ERR_MODIFIED:
  454. panic ("kmem: free buffer modified, fault address: %p, "
  455. "offset in buffer: %td", arg, arg - buf);
  456. break;
  457. case KMEM_ERR_REDZONE:
  458. panic ("kmem: write beyond end of buffer, fault address: %p, "
  459. "offset in buffer: %td", arg, arg - buf);
  460. break;
  461. default:
  462. panic ("kmem: unknown error");
  463. }
  464. __builtin_unreachable ();
  465. }
  466. /*
  467. * Compute properties such as slab size for the given cache.
  468. *
  469. * Once the slab size is known, this function sets the related properties
  470. * (buffers per slab and maximum color). It can also set some KMEM_CF_xxx
  471. * flags depending on the resulting layout.
  472. */
  473. static void
  474. kmem_cache_compute_properties (struct kmem_cache *cache, int flags)
  475. {
  476. if (cache->buf_size < KMEM_BUF_SIZE_THRESHOLD)
  477. flags |= KMEM_CACHE_NOOFFSLAB;
  478. cache->slab_size = PAGE_SIZE;
  479. bool embed;
  480. size_t size;
  481. while (1)
  482. {
  483. if (flags & KMEM_CACHE_NOOFFSLAB)
  484. embed = true;
  485. else
  486. {
  487. size_t waste = cache->slab_size % cache->buf_size;
  488. embed = (sizeof (struct kmem_slab) <= waste);
  489. }
  490. size = cache->slab_size;
  491. if (embed)
  492. size -= sizeof (struct kmem_slab);
  493. if (size >= cache->buf_size)
  494. break;
  495. cache->slab_size += PAGE_SIZE;
  496. }
  497. /*
  498. * A user may force page allocation in order to guarantee that virtual
  499. * memory isn't used. This is normally done for objects that are used
  500. * to implement virtual memory and avoid circular dependencies.
  501. *
  502. * When forcing the use of direct page allocation, only allow single
  503. * page allocations in order to completely prevent physical memory
  504. * fragmentation from making slab allocations fail.
  505. */
  506. if ((flags & KMEM_CACHE_PAGE_ONLY) && cache->slab_size != PAGE_SIZE)
  507. panic ("kmem: unable to guarantee page allocation");
  508. cache->bufs_per_slab = size / cache->buf_size;
  509. cache->color_max = size % cache->buf_size;
  510. /*
  511. * Make sure the first page of a slab buffer can be found from the
  512. * address of the first object.
  513. *
  514. * See kmem_slab_buf().
  515. */
  516. if (cache->color_max >= PAGE_SIZE)
  517. cache->color_max = 0;
  518. if (! embed)
  519. cache->flags |= KMEM_CF_SLAB_EXTERNAL;
  520. }
  521. void
  522. kmem_cache_init (struct kmem_cache *cache, const char *name, size_t obj_size,
  523. size_t align, kmem_ctor_fn_t ctor, int flags)
  524. {
  525. #ifdef CONFIG_KMEM_DEBUG
  526. cache->flags = KMEM_CF_VERIFY;
  527. #else
  528. cache->flags = 0;
  529. #endif
  530. if (flags & KMEM_CACHE_VERIFY)
  531. cache->flags |= KMEM_CF_VERIFY;
  532. if (align < KMEM_ALIGN_MIN)
  533. align = KMEM_ALIGN_MIN;
  534. assert (obj_size > 0);
  535. assert (ISP2 (align));
  536. assert (align < PAGE_SIZE);
  537. size_t buf_size = P2ROUND (obj_size, align);
  538. adaptive_lock_init (&cache->lock);
  539. list_node_init (&cache->node);
  540. list_init (&cache->partial_slabs);
  541. list_init (&cache->free_slabs);
  542. cache->obj_size = obj_size;
  543. cache->align = align;
  544. cache->buf_size = buf_size;
  545. cache->bufctl_dist = buf_size - sizeof (union kmem_bufctl);
  546. cache->color = 0;
  547. cache->nr_objs = 0;
  548. cache->nr_bufs = 0;
  549. cache->nr_slabs = 0;
  550. cache->nr_free_slabs = 0;
  551. cache->ctor = ctor;
  552. strlcpy (cache->name, name, sizeof (cache->name));
  553. cache->buftag_dist = 0;
  554. cache->redzone_pad = 0;
  555. if (cache->flags & KMEM_CF_VERIFY)
  556. {
  557. cache->bufctl_dist = buf_size;
  558. cache->buftag_dist = cache->bufctl_dist + sizeof (union kmem_bufctl);
  559. cache->redzone_pad = cache->bufctl_dist - cache->obj_size;
  560. buf_size += sizeof (union kmem_bufctl) + sizeof (struct kmem_buftag);
  561. buf_size = P2ROUND (buf_size, align);
  562. cache->buf_size = buf_size;
  563. }
  564. kmem_cache_compute_properties (cache, flags);
  565. #ifdef KMEM_USE_CPU_LAYER
  566. for (cache->cpu_pool_type = kmem_cpu_pool_types;
  567. buf_size <= cache->cpu_pool_type->buf_size;
  568. ++cache->cpu_pool_type);
  569. for (size_t i = 0; i < ARRAY_SIZE (cache->cpu_pools); i++)
  570. kmem_cpu_pool_init (&cache->cpu_pools[i], cache);
  571. #endif
  572. ADAPTIVE_LOCK_GUARD (&kmem_cache_list_lock);
  573. list_insert_tail (&kmem_cache_list, &cache->node);
  574. }
  575. static inline int
  576. kmem_cache_empty (struct kmem_cache *cache)
  577. {
  578. return (cache->nr_objs == cache->nr_bufs);
  579. }
  580. static struct kmem_slab*
  581. kmem_cache_buf_to_slab (const struct kmem_cache *cache, void *buf)
  582. {
  583. if ((cache->flags & KMEM_CF_SLAB_EXTERNAL) ||
  584. cache->slab_size != PAGE_SIZE)
  585. return (NULL);
  586. return ((struct kmem_slab *)vm_page_end ((uintptr_t)buf) - 1);
  587. }
  588. static inline bool
  589. kmem_cache_registration_required (const struct kmem_cache *cache)
  590. {
  591. return ((cache->flags & KMEM_CF_SLAB_EXTERNAL) ||
  592. (cache->flags & KMEM_CF_VERIFY) ||
  593. cache->slab_size != PAGE_SIZE);
  594. }
  595. static void
  596. kmem_page_set_priv (struct vm_page *page, void *priv)
  597. {
  598. uintptr_t val = (uintptr_t)vm_page_get_priv (page) | (uintptr_t)priv;
  599. vm_page_set_priv (page, (void *)val);
  600. }
  601. static void*
  602. kmem_page_get_priv (struct vm_page *page)
  603. {
  604. uintptr_t val = (uintptr_t)vm_page_get_priv (page);
  605. return ((void *)(val & ~2));
  606. }
  607. static void
  608. kmem_cache_register (struct kmem_cache *cache, struct kmem_slab *slab)
  609. {
  610. assert (kmem_cache_registration_required (cache));
  611. assert (!slab->nr_refs);
  612. bool virtual = kmem_pagealloc_is_virtual (cache->slab_size);
  613. for (uintptr_t va = kmem_slab_buf (slab), end = va + cache->slab_size;
  614. va < end; va += PAGE_SIZE)
  615. {
  616. phys_addr_t pa;
  617. if (virtual)
  618. {
  619. int error = pmap_kextract (va, &pa);
  620. assert (! error);
  621. }
  622. else
  623. pa = vm_page_direct_pa (va);
  624. _Auto page = vm_page_lookup (pa);
  625. assert (page);
  626. assert ((virtual && vm_page_type (page) == VM_PAGE_KERNEL) ||
  627. (!virtual && vm_page_type (page) == VM_PAGE_KMEM));
  628. assert (!kmem_page_get_priv (page));
  629. kmem_page_set_priv (page, slab);
  630. }
  631. }
  632. static struct kmem_slab*
  633. kmem_cache_lookup (struct kmem_cache *cache, void *buf)
  634. {
  635. assert (kmem_cache_registration_required (cache));
  636. bool virtual = kmem_pagealloc_is_virtual (cache->slab_size);
  637. uintptr_t va = (uintptr_t) buf;
  638. phys_addr_t pa;
  639. if (virtual)
  640. {
  641. int error = pmap_kextract (va, &pa);
  642. if (error)
  643. return (NULL);
  644. }
  645. else
  646. pa = vm_page_direct_pa (va);
  647. _Auto page = vm_page_lookup (pa);
  648. if (! page)
  649. return (NULL);
  650. if ((virtual && (vm_page_type (page) != VM_PAGE_KERNEL)) ||
  651. (!virtual && (vm_page_type (page) != VM_PAGE_KMEM)))
  652. return (NULL);
  653. struct kmem_slab *slab = kmem_page_get_priv (page);
  654. assert ((uintptr_t)buf >= kmem_slab_buf (slab));
  655. assert ((uintptr_t)buf < kmem_slab_buf (slab) + cache->slab_size);
  656. return (slab);
  657. }
  658. static int
  659. kmem_cache_grow (struct kmem_cache *cache, uint32_t pflags)
  660. {
  661. adaptive_lock_acquire (&cache->lock);
  662. if (!kmem_cache_empty (cache))
  663. {
  664. adaptive_lock_release (&cache->lock);
  665. return (1);
  666. }
  667. size_t color = cache->color;
  668. cache->color += cache->align;
  669. if (cache->color > cache->color_max)
  670. cache->color = 0;
  671. adaptive_lock_release (&cache->lock);
  672. struct kmem_slab *slab = kmem_slab_create (cache, color, pflags);
  673. adaptive_lock_acquire (&cache->lock);
  674. if (slab)
  675. {
  676. list_insert_head (&cache->free_slabs, &slab->node);
  677. cache->nr_bufs += cache->bufs_per_slab;
  678. cache->nr_slabs++;
  679. cache->nr_free_slabs++;
  680. if (kmem_cache_registration_required (cache))
  681. kmem_cache_register (cache, slab);
  682. }
  683. /*
  684. * Even if our slab creation failed, another thread might have succeeded
  685. * in growing the cache.
  686. */
  687. int empty = kmem_cache_empty (cache);
  688. adaptive_lock_release (&cache->lock);
  689. return (!empty);
  690. }
  691. /*
  692. * Allocate a raw (unconstructed) buffer from the slab layer of a cache.
  693. *
  694. * The cache must be locked before calling this function.
  695. */
  696. static void*
  697. kmem_cache_alloc_from_slab (struct kmem_cache *cache)
  698. {
  699. struct kmem_slab *slab;
  700. if (!list_empty (&cache->partial_slabs))
  701. slab = list_first_entry (&cache->partial_slabs, struct kmem_slab, node);
  702. else if (!list_empty (&cache->free_slabs))
  703. slab = list_first_entry (&cache->free_slabs, struct kmem_slab, node);
  704. else
  705. return (NULL);
  706. union kmem_bufctl *bufctl = slab->first_free;
  707. assert (bufctl);
  708. slab->first_free = bufctl->next;
  709. ++cache->nr_objs;
  710. if (++slab->nr_refs == cache->bufs_per_slab)
  711. { // The slab has become complete.
  712. list_remove (&slab->node);
  713. if (slab->nr_refs == 1)
  714. --cache->nr_free_slabs;
  715. }
  716. else if (slab->nr_refs == 1)
  717. {
  718. /*
  719. * The slab has become partial. Insert the new slab at the end of
  720. * the list to reduce fragmentation.
  721. */
  722. list_remove (&slab->node);
  723. list_insert_tail (&cache->partial_slabs, &slab->node);
  724. --cache->nr_free_slabs;
  725. }
  726. return (kmem_bufctl_to_buf (bufctl, cache));
  727. }
  728. /*
  729. * Release a buffer to the slab layer of a cache.
  730. *
  731. * The cache must be locked before calling this function.
  732. */
  733. static void
  734. kmem_cache_free_to_slab (struct kmem_cache *cache, void *buf)
  735. {
  736. struct kmem_slab *slab = kmem_cache_buf_to_slab (cache, buf);
  737. if (! slab)
  738. {
  739. slab = kmem_cache_lookup (cache, buf);
  740. assert (slab);
  741. }
  742. assert (slab->nr_refs >= 1);
  743. assert (slab->nr_refs <= cache->bufs_per_slab);
  744. union kmem_bufctl *bufctl = kmem_buf_to_bufctl (buf, cache);
  745. bufctl->next = slab->first_free;
  746. slab->first_free = bufctl;
  747. --cache->nr_objs;
  748. if (--slab->nr_refs == 0)
  749. {
  750. /*
  751. * The slab has become free - If it was partial,
  752. * remove it from its list.
  753. */
  754. if (cache->bufs_per_slab != 1)
  755. list_remove (&slab->node);
  756. list_insert_head (&cache->free_slabs, &slab->node);
  757. ++cache->nr_free_slabs;
  758. }
  759. else if (slab->nr_refs == cache->bufs_per_slab - 1)
  760. // The slab has become partial.
  761. list_insert_head (&cache->partial_slabs, &slab->node);
  762. }
  763. static void
  764. kmem_cache_alloc_verify (struct kmem_cache *cache, void *buf, int construct)
  765. {
  766. struct kmem_buftag *buftag = kmem_buf_to_buftag (buf, cache);
  767. if (buftag->state != KMEM_BUFTAG_FREE)
  768. kmem_cache_error (cache, buf, KMEM_ERR_BUFTAG, buftag);
  769. void *addr = kmem_buf_verify_fill (buf, KMEM_FREE_PATTERN,
  770. KMEM_UNINIT_PATTERN, cache->bufctl_dist);
  771. if (addr)
  772. kmem_cache_error (cache, buf, KMEM_ERR_MODIFIED, addr);
  773. addr = (char *)buf + cache->obj_size;
  774. memset (addr, KMEM_REDZONE_BYTE, cache->redzone_pad);
  775. union kmem_bufctl *bufctl = kmem_buf_to_bufctl (buf, cache);
  776. bufctl->redzone = KMEM_REDZONE_WORD;
  777. buftag->state = KMEM_BUFTAG_ALLOC;
  778. if (construct && cache->ctor)
  779. cache->ctor (buf);
  780. }
  781. void*
  782. kmem_cache_alloc2 (struct kmem_cache *cache, uint32_t pflags)
  783. {
  784. #ifdef KMEM_USE_CPU_LAYER
  785. thread_pin ();
  786. struct kmem_cpu_pool *cpu_pool = kmem_cpu_pool_get (cache);
  787. adaptive_lock_acquire (&cpu_pool->lock);
  788. fast_alloc:
  789. if (likely (cpu_pool->nr_objs > 0))
  790. {
  791. void *buf = kmem_cpu_pool_pop (cpu_pool);
  792. bool verify = (cpu_pool->flags & KMEM_CF_VERIFY);
  793. adaptive_lock_release (&cpu_pool->lock);
  794. thread_unpin ();
  795. if (verify)
  796. kmem_cache_alloc_verify (cache, buf, KMEM_AV_CONSTRUCT);
  797. return (buf);
  798. }
  799. if (cpu_pool->array)
  800. {
  801. if (!kmem_cpu_pool_fill (cpu_pool, cache))
  802. {
  803. adaptive_lock_release (&cpu_pool->lock);
  804. thread_unpin ();
  805. if (!kmem_cache_grow (cache, pflags))
  806. return (NULL);
  807. thread_pin ();
  808. cpu_pool = kmem_cpu_pool_get (cache);
  809. adaptive_lock_acquire (&cpu_pool->lock);
  810. }
  811. goto fast_alloc;
  812. }
  813. adaptive_lock_release (&cpu_pool->lock);
  814. thread_unpin ();
  815. #endif // KMEM_USE_CPU_LAYER
  816. slab_alloc:
  817. adaptive_lock_acquire (&cache->lock);
  818. void *buf = kmem_cache_alloc_from_slab (cache);
  819. adaptive_lock_release (&cache->lock);
  820. if (! buf)
  821. {
  822. if (!kmem_cache_grow (cache, pflags))
  823. return (NULL);
  824. goto slab_alloc;
  825. }
  826. if (cache->flags & KMEM_CF_VERIFY)
  827. kmem_cache_alloc_verify (cache, buf, KMEM_AV_NOCONSTRUCT);
  828. if (cache->ctor)
  829. cache->ctor (buf);
  830. return (buf);
  831. }
  832. static void
  833. kmem_cache_free_verify (struct kmem_cache *cache, void *buf)
  834. {
  835. struct kmem_slab *slab = kmem_cache_lookup (cache, buf);
  836. if (! slab)
  837. kmem_cache_error (cache, buf, KMEM_ERR_INVALID, NULL);
  838. uintptr_t slabend = P2ALIGN ((uintptr_t)slab->addr +
  839. cache->slab_size, PAGE_SIZE);
  840. if ((uintptr_t)buf >= slabend)
  841. kmem_cache_error (cache, buf, KMEM_ERR_INVALID, NULL);
  842. if (((uintptr_t)buf - (uintptr_t)slab->addr) % cache->buf_size)
  843. kmem_cache_error (cache, buf, KMEM_ERR_INVALID, NULL);
  844. // As the buffer address is valid, accessing its buftag is safe.
  845. struct kmem_buftag *buftag = kmem_buf_to_buftag (buf, cache);
  846. if (buftag->state == KMEM_BUFTAG_ALLOC)
  847. ;
  848. else if (buftag->state == KMEM_BUFTAG_FREE)
  849. kmem_cache_error (cache, buf, KMEM_ERR_DOUBLEFREE, NULL);
  850. else
  851. kmem_cache_error (cache, buf, KMEM_ERR_BUFTAG, buftag);
  852. unsigned char *redzone_byte = (unsigned char *)buf + cache->obj_size;
  853. union kmem_bufctl *bufctl = kmem_buf_to_bufctl (buf, cache);
  854. for (; redzone_byte < (unsigned char *)bufctl; ++redzone_byte)
  855. if (*redzone_byte != KMEM_REDZONE_BYTE)
  856. kmem_cache_error (cache, buf, KMEM_ERR_REDZONE, redzone_byte);
  857. if (bufctl->redzone != KMEM_REDZONE_WORD)
  858. {
  859. uintptr_t word = KMEM_REDZONE_WORD;
  860. redzone_byte = kmem_buf_verify_bytes (&bufctl->redzone, &word,
  861. sizeof (bufctl->redzone));
  862. kmem_cache_error (cache, buf, KMEM_ERR_REDZONE, redzone_byte);
  863. }
  864. kmem_buf_fill (buf, KMEM_FREE_PATTERN, cache->bufctl_dist);
  865. buftag->state = KMEM_BUFTAG_FREE;
  866. }
  867. void
  868. kmem_cache_free (struct kmem_cache *cache, void *obj)
  869. {
  870. #ifdef KMEM_USE_CPU_LAYER
  871. thread_pin ();
  872. struct kmem_cpu_pool *cpu_pool = kmem_cpu_pool_get (cache);
  873. if (cpu_pool->flags & KMEM_CF_VERIFY)
  874. {
  875. thread_unpin ();
  876. kmem_cache_free_verify (cache, obj);
  877. thread_pin ();
  878. cpu_pool = kmem_cpu_pool_get (cache);
  879. }
  880. adaptive_lock_acquire (&cpu_pool->lock);
  881. fast_free:
  882. if (likely (cpu_pool->nr_objs < cpu_pool->size))
  883. {
  884. kmem_cpu_pool_push (cpu_pool, obj);
  885. adaptive_lock_release (&cpu_pool->lock);
  886. thread_unpin ();
  887. return;
  888. }
  889. if (cpu_pool->array)
  890. {
  891. kmem_cpu_pool_drain (cpu_pool, cache);
  892. goto fast_free;
  893. }
  894. adaptive_lock_release (&cpu_pool->lock);
  895. void **array = kmem_cache_alloc (cache->cpu_pool_type->array_cache);
  896. if (array)
  897. {
  898. adaptive_lock_acquire (&cpu_pool->lock);
  899. /*
  900. * Another thread may have built the CPU pool while the lock was
  901. * dropped.
  902. */
  903. if (cpu_pool->array)
  904. {
  905. adaptive_lock_release (&cpu_pool->lock);
  906. thread_unpin ();
  907. kmem_cache_free (cache->cpu_pool_type->array_cache, array);
  908. thread_pin ();
  909. cpu_pool = kmem_cpu_pool_get (cache);
  910. adaptive_lock_acquire (&cpu_pool->lock);
  911. goto fast_free;
  912. }
  913. kmem_cpu_pool_build (cpu_pool, cache, array);
  914. goto fast_free;
  915. }
  916. thread_unpin ();
  917. #else
  918. if (cache->flags & KMEM_CF_VERIFY)
  919. kmem_cache_free_verify (cache, obj);
  920. #endif // KMEM_USE_CPU_LAYER
  921. adaptive_lock_acquire (&cache->lock);
  922. kmem_cache_free_to_slab (cache, obj);
  923. adaptive_lock_release (&cache->lock);
  924. }
  925. void
  926. kmem_cache_info (struct kmem_cache *cache, struct stream *stream)
  927. {
  928. char flags_str[64];
  929. snprintf (flags_str, sizeof (flags_str), "%s%s",
  930. (cache->flags & KMEM_CF_SLAB_EXTERNAL) ? " SLAB_EXTERNAL" : "",
  931. (cache->flags & KMEM_CF_VERIFY) ? " VERIFY" : "");
  932. ADAPTIVE_LOCK_GUARD (&cache->lock);
  933. fmt_xprintf (stream, "kmem: flags: 0x%x%s\n",
  934. cache->flags, flags_str);
  935. fmt_xprintf (stream, "kmem: obj_size: %zu\n", cache->obj_size);
  936. fmt_xprintf (stream, "kmem: align: %zu\n", cache->align);
  937. fmt_xprintf (stream, "kmem: buf_size: %zu\n", cache->buf_size);
  938. fmt_xprintf (stream, "kmem: bufctl_dist: %zu\n", cache->bufctl_dist);
  939. fmt_xprintf (stream, "kmem: slab_size: %zu\n", cache->slab_size);
  940. fmt_xprintf (stream, "kmem: color_max: %zu\n", cache->color_max);
  941. fmt_xprintf (stream, "kmem: bufs_per_slab: %zu\n", cache->bufs_per_slab);
  942. fmt_xprintf (stream, "kmem: nr_objs: %zu\n", cache->nr_objs);
  943. fmt_xprintf (stream, "kmem: nr_bufs: %zu\n", cache->nr_bufs);
  944. fmt_xprintf (stream, "kmem: nr_slabs: %zu\n", cache->nr_slabs);
  945. fmt_xprintf (stream, "kmem: nr_free_slabs: %zu\n", cache->nr_free_slabs);
  946. fmt_xprintf (stream, "kmem: buftag_dist: %zu\n", cache->buftag_dist);
  947. fmt_xprintf (stream, "kmem: redzone_pad: %zu\n", cache->redzone_pad);
  948. #ifdef KMEM_USE_CPU_LAYER
  949. fmt_xprintf (stream, "kmem: cpu_pool_size: %d\n",
  950. cache->cpu_pool_type->array_size);
  951. #endif
  952. }
  953. #ifdef CONFIG_SHELL
  954. static struct kmem_cache*
  955. kmem_lookup_cache (const char *name)
  956. {
  957. ADAPTIVE_LOCK_GUARD (&kmem_cache_list_lock);
  958. struct kmem_cache *cache;
  959. list_for_each_entry (&kmem_cache_list, cache, node)
  960. if (strcmp (cache->name, name) == 0)
  961. return (cache);
  962. return (NULL);
  963. }
  964. static void
  965. kmem_shell_info (struct shell *shell __unused, int argc, char **argv)
  966. {
  967. if (argc < 2)
  968. kmem_info (shell->stream);
  969. else
  970. {
  971. struct kmem_cache *cache = kmem_lookup_cache (argv[1]);
  972. if (! cache)
  973. fmt_xprintf (shell->stream, "kmem: info: cache not found\n");
  974. else
  975. kmem_cache_info (cache, shell->stream);
  976. }
  977. }
  978. static struct shell_cmd kmem_shell_cmds[] =
  979. {
  980. SHELL_CMD_INITIALIZER ("kmem_info", kmem_shell_info,
  981. "kmem_info [<cache_name>]",
  982. "display information about kernel memory and caches"),
  983. };
  984. static int __init
  985. kmem_setup_shell (void)
  986. {
  987. SHELL_REGISTER_CMDS (kmem_shell_cmds, shell_get_main_cmd_set ());
  988. return (0);
  989. }
  990. INIT_OP_DEFINE (kmem_setup_shell,
  991. INIT_OP_DEP (kmem_setup, true),
  992. INIT_OP_DEP (printf_setup, true),
  993. INIT_OP_DEP (shell_setup, true),
  994. INIT_OP_DEP (thread_setup, true));
  995. #endif // CONFIG_SHELL
  996. #ifdef KMEM_USE_CPU_LAYER
  997. static void
  998. kmem_bootstrap_cpu (void)
  999. {
  1000. char name[KMEM_NAME_SIZE];
  1001. for (size_t i = 0; i < ARRAY_SIZE (kmem_cpu_pool_types); i++)
  1002. {
  1003. struct kmem_cpu_pool_type *cpu_pool_type = &kmem_cpu_pool_types[i];
  1004. cpu_pool_type->array_cache = &kmem_cpu_array_caches[i];
  1005. sprintf (name, "kmem_cpu_array_%d", cpu_pool_type->array_size);
  1006. size_t size = sizeof (void *) * cpu_pool_type->array_size;
  1007. kmem_cache_init (cpu_pool_type->array_cache, name, size,
  1008. cpu_pool_type->array_align, NULL, 0);
  1009. }
  1010. }
  1011. #endif // KMEM_USE_CPU_LAYER
  1012. static int __init
  1013. kmem_bootstrap (void)
  1014. {
  1015. // Make sure a bufctl can always be stored in a buffer.
  1016. assert (sizeof (union kmem_bufctl) <= KMEM_ALIGN_MIN);
  1017. list_init (&kmem_cache_list);
  1018. adaptive_lock_init (&kmem_cache_list_lock);
  1019. #ifdef KMEM_USE_CPU_LAYER
  1020. kmem_bootstrap_cpu ();
  1021. #endif // KMEM_USE_CPU_LAYER
  1022. // Prevent off slab data for the slab cache to avoid infinite recursion.
  1023. kmem_cache_init (&kmem_slab_cache, "kmem_slab", sizeof (struct kmem_slab),
  1024. 0, NULL, KMEM_CACHE_NOOFFSLAB);
  1025. size_t size = 1 << KMEM_CACHES_FIRST_ORDER;
  1026. char name[KMEM_NAME_SIZE];
  1027. for (size_t i = 0; i < ARRAY_SIZE (kmem_caches); i++)
  1028. {
  1029. sprintf (name, "kmem_%zu", size);
  1030. kmem_cache_init (&kmem_caches[i], name, size, 0, NULL, 0);
  1031. size <<= 1;
  1032. }
  1033. return (0);
  1034. }
  1035. INIT_OP_DEFINE (kmem_bootstrap,
  1036. INIT_OP_DEP (thread_bootstrap, true),
  1037. INIT_OP_DEP (vm_page_setup, true));
  1038. static int __init
  1039. kmem_setup (void)
  1040. {
  1041. return (0);
  1042. }
  1043. INIT_OP_DEFINE (kmem_setup,
  1044. INIT_OP_DEP (kmem_bootstrap, true),
  1045. INIT_OP_DEP (vm_kmem_setup, true));
  1046. static inline size_t
  1047. kmem_get_index (size_t size)
  1048. {
  1049. return (log2_order (size) - KMEM_CACHES_FIRST_ORDER);
  1050. }
  1051. static void
  1052. kmem_alloc_verify (struct kmem_cache *cache, void *buf, size_t size)
  1053. {
  1054. assert (size <= cache->obj_size);
  1055. memset ((char *)buf + size, KMEM_REDZONE_BYTE, cache->obj_size - size);
  1056. }
  1057. void*
  1058. kmem_alloc2 (size_t size, uint32_t flags)
  1059. {
  1060. if (! size)
  1061. return (NULL);
  1062. size_t index = kmem_get_index (size);
  1063. if (index < ARRAY_SIZE (kmem_caches))
  1064. {
  1065. struct kmem_cache *cache = &kmem_caches[index];
  1066. void *buf = kmem_cache_alloc2 (cache, flags);
  1067. if (buf && (cache->flags & KMEM_CF_VERIFY))
  1068. kmem_alloc_verify (cache, buf, size);
  1069. return (buf);
  1070. }
  1071. return (kmem_pagealloc (size, flags));
  1072. }
  1073. void*
  1074. kmem_zalloc (size_t size)
  1075. {
  1076. void *ptr = kmem_alloc (size);
  1077. return (ptr ? memset (ptr, 0, size) : ptr);
  1078. }
  1079. static void
  1080. kmem_free_verify (struct kmem_cache *cache, void *buf, size_t size)
  1081. {
  1082. assert (size <= cache->obj_size);
  1083. unsigned char *redzone_byte = buf + size,
  1084. *redzone_end = buf + cache->obj_size;
  1085. for (; redzone_byte < redzone_end; ++redzone_byte)
  1086. if (*redzone_byte != KMEM_REDZONE_BYTE)
  1087. kmem_cache_error (cache, buf, KMEM_ERR_REDZONE, redzone_byte);
  1088. }
  1089. void
  1090. kmem_free (void *ptr, size_t size)
  1091. {
  1092. if (!ptr || !size)
  1093. return;
  1094. size_t index = kmem_get_index (size);
  1095. if (index < ARRAY_SIZE (kmem_caches))
  1096. {
  1097. struct kmem_cache *cache = &kmem_caches[index];
  1098. if (cache->flags & KMEM_CF_VERIFY)
  1099. kmem_free_verify (cache, ptr, size);
  1100. kmem_cache_free (cache, ptr);
  1101. }
  1102. else
  1103. kmem_pagefree (ptr, size);
  1104. }
  1105. void
  1106. kmem_info (struct stream *stream)
  1107. {
  1108. size_t total = 0, total_physical = 0, total_virtual = 0, total_reclaim = 0,
  1109. total_reclaim_physical = 0, total_reclaim_virtual = 0;
  1110. fmt_xprintf (stream, "kmem: cache "
  1111. "obj slab bufs objs bufs total reclaimable\n");
  1112. fmt_xprintf (stream, "kmem: name size size /slab "
  1113. "usage count memory memory\n");
  1114. adaptive_lock_acquire (&kmem_cache_list_lock);
  1115. struct kmem_cache *cache;
  1116. list_for_each_entry (&kmem_cache_list, cache, node)
  1117. {
  1118. ADAPTIVE_LOCK_GUARD (&cache->lock);
  1119. size_t mem_usage = (cache->nr_slabs * cache->slab_size) >> 10,
  1120. mem_reclaim = (cache->nr_free_slabs * cache->slab_size) >> 10;
  1121. total += mem_usage;
  1122. total_reclaim += mem_reclaim;
  1123. if (kmem_pagealloc_is_virtual (cache->slab_size))
  1124. {
  1125. total_virtual += mem_usage;
  1126. total_reclaim_virtual += mem_reclaim;
  1127. }
  1128. else
  1129. {
  1130. total_physical += mem_usage;
  1131. total_reclaim_physical += mem_reclaim;
  1132. }
  1133. fmt_xprintf (stream,
  1134. "kmem: %-19s %6zu %3zuk %4zu %6zu %6zu %7zuk %10zuk\n",
  1135. cache->name, cache->obj_size, cache->slab_size >> 10,
  1136. cache->bufs_per_slab, cache->nr_objs, cache->nr_bufs,
  1137. mem_usage, mem_reclaim);
  1138. }
  1139. adaptive_lock_release (&kmem_cache_list_lock);
  1140. fmt_xprintf (stream, "total: %zuk (phys: %zuk virt: %zuk), "
  1141. "reclaim: %zuk (phys: %zuk virt: %zuk)\n",
  1142. total, total_physical, total_virtual,
  1143. total_reclaim, total_reclaim_physical, total_reclaim_virtual);
  1144. }