subr_pool.c 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511
  1. /* $OpenBSD: subr_pool.c,v 1.187 2015/07/23 12:44:43 dlg Exp $ */
  2. /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */
  3. /*-
  4. * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc.
  5. * All rights reserved.
  6. *
  7. * This code is derived from software contributed to The NetBSD Foundation
  8. * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
  9. * Simulation Facility, NASA Ames Research Center.
  10. *
  11. * Redistribution and use in source and binary forms, with or without
  12. * modification, are permitted provided that the following conditions
  13. * are met:
  14. * 1. Redistributions of source code must retain the above copyright
  15. * notice, this list of conditions and the following disclaimer.
  16. * 2. Redistributions in binary form must reproduce the above copyright
  17. * notice, this list of conditions and the following disclaimer in the
  18. * documentation and/or other materials provided with the distribution.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  21. * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  22. * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  23. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  24. * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  25. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  26. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  27. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  29. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  30. * POSSIBILITY OF SUCH DAMAGE.
  31. */
  32. #include <sys/param.h>
  33. #include <sys/systm.h>
  34. #include <sys/errno.h>
  35. #include <sys/kernel.h>
  36. #include <sys/malloc.h>
  37. #include <sys/pool.h>
  38. #include <sys/syslog.h>
  39. #include <sys/rwlock.h>
  40. #include <sys/sysctl.h>
  41. #include <sys/task.h>
  42. #include <sys/timeout.h>
  43. #include <uvm/uvm_extern.h>
  44. /*
  45. * Pool resource management utility.
  46. *
  47. * Memory is allocated in pages which are split into pieces according to
  48. * the pool item size. Each page is kept on one of three lists in the
  49. * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages',
  50. * for empty, full and partially-full pages respectively. The individual
  51. * pool items are on a linked list headed by `ph_itemlist' in each page
  52. * header. The memory for building the page list is either taken from
  53. * the allocated pages themselves (for small pool items) or taken from
  54. * an internal pool of page headers (`phpool').
  55. */
  56. /* List of all pools */
  57. SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head);
  58. /*
  59. * Every pool gets a unique serial number assigned to it. If this counter
  60. * wraps, we're screwed, but we shouldn't create so many pools anyway.
  61. */
  62. unsigned int pool_serial;
  63. unsigned int pool_count;
  64. /* Lock the previous variables making up the global pool state */
  65. struct rwlock pool_lock = RWLOCK_INITIALIZER("pools");
  66. /* Private pool for page header structures */
  67. struct pool phpool;
  68. struct pool_item_header {
  69. /* Page headers */
  70. TAILQ_ENTRY(pool_item_header)
  71. ph_pagelist; /* pool page list */
  72. XSIMPLEQ_HEAD(,pool_item) ph_itemlist; /* chunk list for this page */
  73. RB_ENTRY(pool_item_header)
  74. ph_node; /* Off-page page headers */
  75. int ph_nmissing; /* # of chunks in use */
  76. caddr_t ph_page; /* this page's address */
  77. caddr_t ph_colored; /* page's colored address */
  78. u_long ph_magic;
  79. int ph_tick;
  80. };
  81. #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */
  82. #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT)
  83. struct pool_item {
  84. u_long pi_magic;
  85. XSIMPLEQ_ENTRY(pool_item) pi_list;
  86. };
  87. #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic)
  88. #ifdef POOL_DEBUG
  89. int pool_debug = 1;
  90. #else
  91. int pool_debug = 0;
  92. #endif
  93. #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0)
  94. struct pool_item_header *
  95. pool_p_alloc(struct pool *, int, int *);
  96. void pool_p_insert(struct pool *, struct pool_item_header *);
  97. void pool_p_remove(struct pool *, struct pool_item_header *);
  98. void pool_p_free(struct pool *, struct pool_item_header *);
  99. void pool_update_curpage(struct pool *);
  100. void *pool_do_get(struct pool *, int, int *);
  101. int pool_chk_page(struct pool *, struct pool_item_header *, int);
  102. int pool_chk(struct pool *);
  103. void pool_get_done(void *, void *);
  104. void pool_runqueue(struct pool *, int);
  105. void *pool_allocator_alloc(struct pool *, int, int *);
  106. void pool_allocator_free(struct pool *, void *);
  107. /*
  108. * The default pool allocator.
  109. */
  110. void *pool_page_alloc(struct pool *, int, int *);
  111. void pool_page_free(struct pool *, void *);
  112. /*
  113. * safe for interrupts, name preserved for compat this is the default
  114. * allocator
  115. */
  116. struct pool_allocator pool_allocator_nointr = {
  117. pool_page_alloc,
  118. pool_page_free
  119. };
  120. void *pool_large_alloc(struct pool *, int, int *);
  121. void pool_large_free(struct pool *, void *);
  122. struct pool_allocator pool_allocator_large = {
  123. pool_large_alloc,
  124. pool_large_free
  125. };
  126. void *pool_large_alloc_ni(struct pool *, int, int *);
  127. void pool_large_free_ni(struct pool *, void *);
  128. struct pool_allocator pool_allocator_large_ni = {
  129. pool_large_alloc_ni,
  130. pool_large_free_ni
  131. };
  132. #ifdef DDB
  133. void pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...)
  134. __attribute__((__format__(__kprintf__,1,2))));
  135. void pool_print1(struct pool *, const char *, int (*)(const char *, ...)
  136. __attribute__((__format__(__kprintf__,1,2))));
  137. #endif
  138. /* stale page garbage collectors */
  139. void pool_gc_sched(void *);
  140. struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL);
  141. void pool_gc_pages(void *);
  142. struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL);
  143. int pool_wait_free = 1;
  144. int pool_wait_gc = 8;
  145. static inline int
  146. phtree_compare(struct pool_item_header *a, struct pool_item_header *b)
  147. {
  148. vaddr_t va = (vaddr_t)a->ph_page;
  149. vaddr_t vb = (vaddr_t)b->ph_page;
  150. /* the compares in this order are important for the NFIND to work */
  151. if (vb < va)
  152. return (-1);
  153. if (vb > va)
  154. return (1);
  155. return (0);
  156. }
  157. RB_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare);
  158. RB_GENERATE(phtree, pool_item_header, ph_node, phtree_compare);
  159. /*
  160. * Return the pool page header based on page address.
  161. */
  162. static inline struct pool_item_header *
  163. pr_find_pagehead(struct pool *pp, void *v)
  164. {
  165. struct pool_item_header *ph, key;
  166. if (POOL_INPGHDR(pp)) {
  167. caddr_t page;
  168. page = (caddr_t)((vaddr_t)v & pp->pr_pgmask);
  169. return ((struct pool_item_header *)(page + pp->pr_phoffset));
  170. }
  171. key.ph_page = v;
  172. ph = RB_NFIND(phtree, &pp->pr_phtree, &key);
  173. if (ph == NULL)
  174. panic("%s: %s: page header missing", __func__, pp->pr_wchan);
  175. KASSERT(ph->ph_page <= (caddr_t)v);
  176. if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v)
  177. panic("%s: %s: incorrect page", __func__, pp->pr_wchan);
  178. return (ph);
  179. }
  180. /*
  181. * Initialize the given pool resource structure.
  182. *
  183. * We export this routine to allow other kernel parts to declare
  184. * static pools that must be initialized before malloc() is available.
  185. */
  186. void
  187. pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags,
  188. const char *wchan, struct pool_allocator *palloc)
  189. {
  190. int off = 0, space;
  191. unsigned int pgsize = PAGE_SIZE, items;
  192. #ifdef DIAGNOSTIC
  193. struct pool *iter;
  194. KASSERT(ioff == 0);
  195. #endif
  196. if (align == 0)
  197. align = ALIGN(1);
  198. if (size < sizeof(struct pool_item))
  199. size = sizeof(struct pool_item);
  200. size = roundup(size, align);
  201. if (palloc == NULL) {
  202. while (size > pgsize)
  203. pgsize <<= 1;
  204. if (pgsize > PAGE_SIZE) {
  205. palloc = ISSET(flags, PR_WAITOK) ?
  206. &pool_allocator_large_ni : &pool_allocator_large;
  207. } else
  208. palloc = &pool_allocator_nointr;
  209. } else
  210. pgsize = palloc->pa_pagesz ? palloc->pa_pagesz : PAGE_SIZE;
  211. items = pgsize / size;
  212. /*
  213. * Decide whether to put the page header off page to avoid
  214. * wasting too large a part of the page. Off-page page headers
  215. * go into an RB tree, so we can match a returned item with
  216. * its header based on the page address.
  217. */
  218. if (pgsize - (size * items) > sizeof(struct pool_item_header)) {
  219. off = pgsize - sizeof(struct pool_item_header);
  220. } else if (sizeof(struct pool_item_header) * 2 >= size) {
  221. off = pgsize - sizeof(struct pool_item_header);
  222. items = off / size;
  223. }
  224. KASSERT(items > 0);
  225. /*
  226. * Initialize the pool structure.
  227. */
  228. memset(pp, 0, sizeof(*pp));
  229. TAILQ_INIT(&pp->pr_emptypages);
  230. TAILQ_INIT(&pp->pr_fullpages);
  231. TAILQ_INIT(&pp->pr_partpages);
  232. pp->pr_curpage = NULL;
  233. pp->pr_npages = 0;
  234. pp->pr_minitems = 0;
  235. pp->pr_minpages = 0;
  236. pp->pr_maxpages = 8;
  237. pp->pr_size = size;
  238. pp->pr_pgsize = pgsize;
  239. pp->pr_pgmask = ~0UL ^ (pgsize - 1);
  240. pp->pr_phoffset = off;
  241. pp->pr_itemsperpage = items;
  242. pp->pr_wchan = wchan;
  243. pp->pr_alloc = palloc;
  244. pp->pr_nitems = 0;
  245. pp->pr_nout = 0;
  246. pp->pr_hardlimit = UINT_MAX;
  247. pp->pr_hardlimit_warning = NULL;
  248. pp->pr_hardlimit_ratecap.tv_sec = 0;
  249. pp->pr_hardlimit_ratecap.tv_usec = 0;
  250. pp->pr_hardlimit_warning_last.tv_sec = 0;
  251. pp->pr_hardlimit_warning_last.tv_usec = 0;
  252. RB_INIT(&pp->pr_phtree);
  253. /*
  254. * Use the space between the chunks and the page header
  255. * for cache coloring.
  256. */
  257. space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize;
  258. space -= pp->pr_itemsperpage * pp->pr_size;
  259. pp->pr_align = align;
  260. pp->pr_maxcolors = (space / align) + 1;
  261. pp->pr_nget = 0;
  262. pp->pr_nfail = 0;
  263. pp->pr_nput = 0;
  264. pp->pr_npagealloc = 0;
  265. pp->pr_npagefree = 0;
  266. pp->pr_hiwat = 0;
  267. pp->pr_nidle = 0;
  268. pp->pr_ipl = -1;
  269. mtx_init(&pp->pr_mtx, IPL_NONE);
  270. mtx_init(&pp->pr_requests_mtx, IPL_NONE);
  271. TAILQ_INIT(&pp->pr_requests);
  272. if (phpool.pr_size == 0) {
  273. pool_init(&phpool, sizeof(struct pool_item_header), 0, 0,
  274. 0, "phpool", NULL);
  275. pool_setipl(&phpool, IPL_HIGH);
  276. /* make sure phpool wont "recurse" */
  277. KASSERT(POOL_INPGHDR(&phpool));
  278. }
  279. /* pglistalloc/constraint parameters */
  280. pp->pr_crange = &kp_dirty;
  281. /* Insert this into the list of all pools. */
  282. rw_enter_write(&pool_lock);
  283. #ifdef DIAGNOSTIC
  284. SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
  285. if (iter == pp)
  286. panic("%s: pool %s already on list", __func__, wchan);
  287. }
  288. #endif
  289. pp->pr_serial = ++pool_serial;
  290. if (pool_serial == 0)
  291. panic("%s: too much uptime", __func__);
  292. SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist);
  293. pool_count++;
  294. rw_exit_write(&pool_lock);
  295. }
  296. void
  297. pool_setipl(struct pool *pp, int ipl)
  298. {
  299. pp->pr_ipl = ipl;
  300. mtx_init(&pp->pr_mtx, ipl);
  301. mtx_init(&pp->pr_requests_mtx, ipl);
  302. }
  303. /*
  304. * Decommission a pool resource.
  305. */
  306. void
  307. pool_destroy(struct pool *pp)
  308. {
  309. struct pool_item_header *ph;
  310. struct pool *prev, *iter;
  311. #ifdef DIAGNOSTIC
  312. if (pp->pr_nout != 0)
  313. panic("%s: pool busy: still out: %u", __func__, pp->pr_nout);
  314. #endif
  315. /* Remove from global pool list */
  316. rw_enter_write(&pool_lock);
  317. pool_count--;
  318. if (pp == SIMPLEQ_FIRST(&pool_head))
  319. SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist);
  320. else {
  321. prev = SIMPLEQ_FIRST(&pool_head);
  322. SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
  323. if (iter == pp) {
  324. SIMPLEQ_REMOVE_AFTER(&pool_head, prev,
  325. pr_poollist);
  326. break;
  327. }
  328. prev = iter;
  329. }
  330. }
  331. rw_exit_write(&pool_lock);
  332. /* Remove all pages */
  333. while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) {
  334. mtx_enter(&pp->pr_mtx);
  335. pool_p_remove(pp, ph);
  336. mtx_leave(&pp->pr_mtx);
  337. pool_p_free(pp, ph);
  338. }
  339. KASSERT(TAILQ_EMPTY(&pp->pr_fullpages));
  340. KASSERT(TAILQ_EMPTY(&pp->pr_partpages));
  341. }
  342. void
  343. pool_request_init(struct pool_request *pr,
  344. void (*handler)(void *, void *), void *cookie)
  345. {
  346. pr->pr_handler = handler;
  347. pr->pr_cookie = cookie;
  348. pr->pr_item = NULL;
  349. }
  350. void
  351. pool_request(struct pool *pp, struct pool_request *pr)
  352. {
  353. mtx_enter(&pp->pr_requests_mtx);
  354. TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry);
  355. pool_runqueue(pp, PR_NOWAIT);
  356. mtx_leave(&pp->pr_requests_mtx);
  357. }
  358. struct pool_get_memory {
  359. struct mutex mtx;
  360. void * volatile v;
  361. };
  362. /*
  363. * Grab an item from the pool.
  364. */
  365. void *
  366. pool_get(struct pool *pp, int flags)
  367. {
  368. void *v = NULL;
  369. int slowdown = 0;
  370. KASSERT(flags & (PR_WAITOK | PR_NOWAIT));
  371. mtx_enter(&pp->pr_mtx);
  372. if (pp->pr_nout >= pp->pr_hardlimit) {
  373. if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL))
  374. goto fail;
  375. } else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) {
  376. if (ISSET(flags, PR_NOWAIT))
  377. goto fail;
  378. }
  379. mtx_leave(&pp->pr_mtx);
  380. if (slowdown && ISSET(flags, PR_WAITOK))
  381. yield();
  382. if (v == NULL) {
  383. struct pool_get_memory mem = {
  384. MUTEX_INITIALIZER((pp->pr_ipl == -1) ?
  385. IPL_NONE : pp->pr_ipl), NULL };
  386. struct pool_request pr;
  387. pool_request_init(&pr, pool_get_done, &mem);
  388. pool_request(pp, &pr);
  389. mtx_enter(&mem.mtx);
  390. while (mem.v == NULL)
  391. msleep(&mem, &mem.mtx, PSWP, pp->pr_wchan, 0);
  392. mtx_leave(&mem.mtx);
  393. v = mem.v;
  394. }
  395. if (ISSET(flags, PR_ZERO))
  396. memset(v, 0, pp->pr_size);
  397. return (v);
  398. fail:
  399. pp->pr_nfail++;
  400. mtx_leave(&pp->pr_mtx);
  401. return (NULL);
  402. }
  403. void
  404. pool_get_done(void *xmem, void *v)
  405. {
  406. struct pool_get_memory *mem = xmem;
  407. mtx_enter(&mem->mtx);
  408. mem->v = v;
  409. mtx_leave(&mem->mtx);
  410. wakeup_one(mem);
  411. }
  412. void
  413. pool_runqueue(struct pool *pp, int flags)
  414. {
  415. struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl);
  416. struct pool_request *pr;
  417. MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx);
  418. MUTEX_ASSERT_LOCKED(&pp->pr_requests_mtx);
  419. if (pp->pr_requesting++)
  420. return;
  421. do {
  422. pp->pr_requesting = 1;
  423. /* no TAILQ_JOIN? :( */
  424. while ((pr = TAILQ_FIRST(&pp->pr_requests)) != NULL) {
  425. TAILQ_REMOVE(&pp->pr_requests, pr, pr_entry);
  426. TAILQ_INSERT_TAIL(&prl, pr, pr_entry);
  427. }
  428. if (TAILQ_EMPTY(&prl))
  429. continue;
  430. mtx_leave(&pp->pr_requests_mtx);
  431. mtx_enter(&pp->pr_mtx);
  432. pr = TAILQ_FIRST(&prl);
  433. while (pr != NULL) {
  434. int slowdown = 0;
  435. if (pp->pr_nout >= pp->pr_hardlimit)
  436. break;
  437. pr->pr_item = pool_do_get(pp, flags, &slowdown);
  438. if (pr->pr_item == NULL) /* || slowdown ? */
  439. break;
  440. pr = TAILQ_NEXT(pr, pr_entry);
  441. }
  442. mtx_leave(&pp->pr_mtx);
  443. while ((pr = TAILQ_FIRST(&prl)) != NULL &&
  444. pr->pr_item != NULL) {
  445. TAILQ_REMOVE(&prl, pr, pr_entry);
  446. (*pr->pr_handler)(pr->pr_cookie, pr->pr_item);
  447. }
  448. mtx_enter(&pp->pr_requests_mtx);
  449. } while (--pp->pr_requesting);
  450. /* no TAILQ_JOIN :( */
  451. while ((pr = TAILQ_FIRST(&prl)) != NULL) {
  452. TAILQ_REMOVE(&prl, pr, pr_entry);
  453. TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry);
  454. }
  455. }
  456. void *
  457. pool_do_get(struct pool *pp, int flags, int *slowdown)
  458. {
  459. struct pool_item *pi;
  460. struct pool_item_header *ph;
  461. MUTEX_ASSERT_LOCKED(&pp->pr_mtx);
  462. if (pp->pr_ipl != -1)
  463. splassert(pp->pr_ipl);
  464. /*
  465. * Account for this item now to avoid races if we need to give up
  466. * pr_mtx to allocate a page.
  467. */
  468. pp->pr_nout++;
  469. if (pp->pr_curpage == NULL) {
  470. mtx_leave(&pp->pr_mtx);
  471. ph = pool_p_alloc(pp, flags, slowdown);
  472. mtx_enter(&pp->pr_mtx);
  473. if (ph == NULL) {
  474. pp->pr_nout--;
  475. return (NULL);
  476. }
  477. pool_p_insert(pp, ph);
  478. }
  479. ph = pp->pr_curpage;
  480. pi = XSIMPLEQ_FIRST(&ph->ph_itemlist);
  481. if (__predict_false(pi == NULL))
  482. panic("%s: %s: page empty", __func__, pp->pr_wchan);
  483. if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
  484. panic("%s: %s free list modified: "
  485. "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx",
  486. __func__, pp->pr_wchan, ph->ph_page, pi,
  487. 0, pi->pi_magic, POOL_IMAGIC(ph, pi));
  488. }
  489. XSIMPLEQ_REMOVE_HEAD(&ph->ph_itemlist, pi_list);
  490. #ifdef DIAGNOSTIC
  491. if (pool_debug && POOL_PHPOISON(ph)) {
  492. size_t pidx;
  493. uint32_t pval;
  494. if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
  495. &pidx, &pval)) {
  496. int *ip = (int *)(pi + 1);
  497. panic("%s: %s free list modified: "
  498. "page %p; item addr %p; offset 0x%zx=0x%x",
  499. __func__, pp->pr_wchan, ph->ph_page, pi,
  500. pidx * sizeof(int), ip[pidx]);
  501. }
  502. }
  503. #endif /* DIAGNOSTIC */
  504. if (ph->ph_nmissing++ == 0) {
  505. /*
  506. * This page was previously empty. Move it to the list of
  507. * partially-full pages. This page is already curpage.
  508. */
  509. TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_pagelist);
  510. TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_pagelist);
  511. pp->pr_nidle--;
  512. }
  513. if (ph->ph_nmissing == pp->pr_itemsperpage) {
  514. /*
  515. * This page is now full. Move it to the full list
  516. * and select a new current page.
  517. */
  518. TAILQ_REMOVE(&pp->pr_partpages, ph, ph_pagelist);
  519. TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_pagelist);
  520. pool_update_curpage(pp);
  521. }
  522. pp->pr_nget++;
  523. return (pi);
  524. }
  525. /*
  526. * Return resource to the pool.
  527. */
  528. void
  529. pool_put(struct pool *pp, void *v)
  530. {
  531. struct pool_item *pi = v;
  532. struct pool_item_header *ph, *freeph = NULL;
  533. #ifdef DIAGNOSTIC
  534. if (v == NULL)
  535. panic("%s: NULL item", __func__);
  536. #endif
  537. mtx_enter(&pp->pr_mtx);
  538. if (pp->pr_ipl != -1)
  539. splassert(pp->pr_ipl);
  540. ph = pr_find_pagehead(pp, v);
  541. #ifdef DIAGNOSTIC
  542. if (pool_debug) {
  543. struct pool_item *qi;
  544. XSIMPLEQ_FOREACH(qi, &ph->ph_itemlist, pi_list) {
  545. if (pi == qi) {
  546. panic("%s: %s: double pool_put: %p", __func__,
  547. pp->pr_wchan, pi);
  548. }
  549. }
  550. }
  551. #endif /* DIAGNOSTIC */
  552. pi->pi_magic = POOL_IMAGIC(ph, pi);
  553. XSIMPLEQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list);
  554. #ifdef DIAGNOSTIC
  555. if (POOL_PHPOISON(ph))
  556. poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
  557. #endif /* DIAGNOSTIC */
  558. if (ph->ph_nmissing-- == pp->pr_itemsperpage) {
  559. /*
  560. * The page was previously completely full, move it to the
  561. * partially-full list.
  562. */
  563. TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_pagelist);
  564. TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_pagelist);
  565. }
  566. if (ph->ph_nmissing == 0) {
  567. /*
  568. * The page is now empty, so move it to the empty page list.
  569. */
  570. pp->pr_nidle++;
  571. ph->ph_tick = ticks;
  572. TAILQ_REMOVE(&pp->pr_partpages, ph, ph_pagelist);
  573. TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_pagelist);
  574. pool_update_curpage(pp);
  575. }
  576. pp->pr_nout--;
  577. pp->pr_nput++;
  578. /* is it time to free a page? */
  579. if (pp->pr_nidle > pp->pr_maxpages &&
  580. (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
  581. (ticks - ph->ph_tick) > (hz * pool_wait_free)) {
  582. freeph = ph;
  583. pool_p_remove(pp, freeph);
  584. }
  585. mtx_leave(&pp->pr_mtx);
  586. if (freeph != NULL)
  587. pool_p_free(pp, freeph);
  588. mtx_enter(&pp->pr_requests_mtx);
  589. pool_runqueue(pp, PR_NOWAIT);
  590. mtx_leave(&pp->pr_requests_mtx);
  591. }
  592. /*
  593. * Add N items to the pool.
  594. */
  595. int
  596. pool_prime(struct pool *pp, int n)
  597. {
  598. struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
  599. struct pool_item_header *ph;
  600. int newpages;
  601. newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
  602. while (newpages-- > 0) {
  603. int slowdown = 0;
  604. ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown);
  605. if (ph == NULL) /* or slowdown? */
  606. break;
  607. TAILQ_INSERT_TAIL(&pl, ph, ph_pagelist);
  608. }
  609. mtx_enter(&pp->pr_mtx);
  610. while ((ph = TAILQ_FIRST(&pl)) != NULL) {
  611. TAILQ_REMOVE(&pl, ph, ph_pagelist);
  612. pool_p_insert(pp, ph);
  613. }
  614. mtx_leave(&pp->pr_mtx);
  615. return (0);
  616. }
  617. struct pool_item_header *
  618. pool_p_alloc(struct pool *pp, int flags, int *slowdown)
  619. {
  620. struct pool_item_header *ph;
  621. struct pool_item *pi;
  622. caddr_t addr;
  623. int n;
  624. MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx);
  625. KASSERT(pp->pr_size >= sizeof(*pi));
  626. addr = pool_allocator_alloc(pp, flags, slowdown);
  627. if (addr == NULL)
  628. return (NULL);
  629. if (POOL_INPGHDR(pp))
  630. ph = (struct pool_item_header *)(addr + pp->pr_phoffset);
  631. else {
  632. ph = pool_get(&phpool, flags);
  633. if (ph == NULL) {
  634. pool_allocator_free(pp, addr);
  635. return (NULL);
  636. }
  637. }
  638. XSIMPLEQ_INIT(&ph->ph_itemlist);
  639. ph->ph_page = addr;
  640. addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors);
  641. ph->ph_colored = addr;
  642. ph->ph_nmissing = 0;
  643. arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic));
  644. #ifdef DIAGNOSTIC
  645. /* use a bit in ph_magic to record if we poison page items */
  646. if (pool_debug)
  647. SET(ph->ph_magic, POOL_MAGICBIT);
  648. else
  649. CLR(ph->ph_magic, POOL_MAGICBIT);
  650. #endif /* DIAGNOSTIC */
  651. n = pp->pr_itemsperpage;
  652. while (n--) {
  653. pi = (struct pool_item *)addr;
  654. pi->pi_magic = POOL_IMAGIC(ph, pi);
  655. XSIMPLEQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list);
  656. #ifdef DIAGNOSTIC
  657. if (POOL_PHPOISON(ph))
  658. poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
  659. #endif /* DIAGNOSTIC */
  660. addr += pp->pr_size;
  661. }
  662. return (ph);
  663. }
  664. void
  665. pool_p_free(struct pool *pp, struct pool_item_header *ph)
  666. {
  667. struct pool_item *pi;
  668. MUTEX_ASSERT_UNLOCKED(&pp->pr_mtx);
  669. KASSERT(ph->ph_nmissing == 0);
  670. XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) {
  671. if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
  672. panic("%s: %s free list modified: "
  673. "page %p; item addr %p; offset 0x%x=0x%lx",
  674. __func__, pp->pr_wchan, ph->ph_page, pi,
  675. 0, pi->pi_magic);
  676. }
  677. #ifdef DIAGNOSTIC
  678. if (POOL_PHPOISON(ph)) {
  679. size_t pidx;
  680. uint32_t pval;
  681. if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
  682. &pidx, &pval)) {
  683. int *ip = (int *)(pi + 1);
  684. panic("%s: %s free list modified: "
  685. "page %p; item addr %p; offset 0x%zx=0x%x",
  686. __func__, pp->pr_wchan, ph->ph_page, pi,
  687. pidx * sizeof(int), ip[pidx]);
  688. }
  689. }
  690. #endif
  691. }
  692. pool_allocator_free(pp, ph->ph_page);
  693. if (!POOL_INPGHDR(pp))
  694. pool_put(&phpool, ph);
  695. }
  696. void
  697. pool_p_insert(struct pool *pp, struct pool_item_header *ph)
  698. {
  699. MUTEX_ASSERT_LOCKED(&pp->pr_mtx);
  700. /* If the pool was depleted, point at the new page */
  701. if (pp->pr_curpage == NULL)
  702. pp->pr_curpage = ph;
  703. TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_pagelist);
  704. if (!POOL_INPGHDR(pp))
  705. RB_INSERT(phtree, &pp->pr_phtree, ph);
  706. pp->pr_nitems += pp->pr_itemsperpage;
  707. pp->pr_nidle++;
  708. pp->pr_npagealloc++;
  709. if (++pp->pr_npages > pp->pr_hiwat)
  710. pp->pr_hiwat = pp->pr_npages;
  711. }
  712. void
  713. pool_p_remove(struct pool *pp, struct pool_item_header *ph)
  714. {
  715. MUTEX_ASSERT_LOCKED(&pp->pr_mtx);
  716. pp->pr_npagefree++;
  717. pp->pr_npages--;
  718. pp->pr_nidle--;
  719. pp->pr_nitems -= pp->pr_itemsperpage;
  720. if (!POOL_INPGHDR(pp))
  721. RB_REMOVE(phtree, &pp->pr_phtree, ph);
  722. TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_pagelist);
  723. pool_update_curpage(pp);
  724. }
  725. void
  726. pool_update_curpage(struct pool *pp)
  727. {
  728. pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist);
  729. if (pp->pr_curpage == NULL) {
  730. pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist);
  731. }
  732. }
  733. void
  734. pool_setlowat(struct pool *pp, int n)
  735. {
  736. int prime = 0;
  737. mtx_enter(&pp->pr_mtx);
  738. pp->pr_minitems = n;
  739. pp->pr_minpages = (n == 0)
  740. ? 0
  741. : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
  742. if (pp->pr_nitems < n)
  743. prime = n - pp->pr_nitems;
  744. mtx_leave(&pp->pr_mtx);
  745. if (prime > 0)
  746. pool_prime(pp, prime);
  747. }
  748. void
  749. pool_sethiwat(struct pool *pp, int n)
  750. {
  751. pp->pr_maxpages = (n == 0)
  752. ? 0
  753. : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
  754. }
  755. int
  756. pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap)
  757. {
  758. int error = 0;
  759. if (n < pp->pr_nout) {
  760. error = EINVAL;
  761. goto done;
  762. }
  763. pp->pr_hardlimit = n;
  764. pp->pr_hardlimit_warning = warnmsg;
  765. pp->pr_hardlimit_ratecap.tv_sec = ratecap;
  766. pp->pr_hardlimit_warning_last.tv_sec = 0;
  767. pp->pr_hardlimit_warning_last.tv_usec = 0;
  768. done:
  769. return (error);
  770. }
  771. void
  772. pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode)
  773. {
  774. pp->pr_crange = mode;
  775. }
  776. /*
  777. * Release all complete pages that have not been used recently.
  778. *
  779. * Returns non-zero if any pages have been reclaimed.
  780. */
  781. int
  782. pool_reclaim(struct pool *pp)
  783. {
  784. struct pool_item_header *ph, *phnext;
  785. struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
  786. mtx_enter(&pp->pr_mtx);
  787. for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) {
  788. phnext = TAILQ_NEXT(ph, ph_pagelist);
  789. /* Check our minimum page claim */
  790. if (pp->pr_npages <= pp->pr_minpages)
  791. break;
  792. /*
  793. * If freeing this page would put us below
  794. * the low water mark, stop now.
  795. */
  796. if ((pp->pr_nitems - pp->pr_itemsperpage) <
  797. pp->pr_minitems)
  798. break;
  799. pool_p_remove(pp, ph);
  800. TAILQ_INSERT_TAIL(&pl, ph, ph_pagelist);
  801. }
  802. mtx_leave(&pp->pr_mtx);
  803. if (TAILQ_EMPTY(&pl))
  804. return (0);
  805. while ((ph = TAILQ_FIRST(&pl)) != NULL) {
  806. TAILQ_REMOVE(&pl, ph, ph_pagelist);
  807. pool_p_free(pp, ph);
  808. }
  809. return (1);
  810. }
  811. /*
  812. * Release all complete pages that have not been used recently
  813. * from all pools.
  814. */
  815. void
  816. pool_reclaim_all(void)
  817. {
  818. struct pool *pp;
  819. rw_enter_read(&pool_lock);
  820. SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist)
  821. pool_reclaim(pp);
  822. rw_exit_read(&pool_lock);
  823. }
  824. #ifdef DDB
  825. #include <machine/db_machdep.h>
  826. #include <ddb/db_output.h>
  827. /*
  828. * Diagnostic helpers.
  829. */
  830. void
  831. pool_printit(struct pool *pp, const char *modif,
  832. int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
  833. {
  834. pool_print1(pp, modif, pr);
  835. }
  836. void
  837. pool_print_pagelist(struct pool_pagelist *pl,
  838. int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
  839. {
  840. struct pool_item_header *ph;
  841. struct pool_item *pi;
  842. TAILQ_FOREACH(ph, pl, ph_pagelist) {
  843. (*pr)("\t\tpage %p, color %p, nmissing %d\n",
  844. ph->ph_page, ph->ph_colored, ph->ph_nmissing);
  845. XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) {
  846. if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
  847. (*pr)("\t\t\titem %p, magic 0x%lx\n",
  848. pi, pi->pi_magic);
  849. }
  850. }
  851. }
  852. }
  853. void
  854. pool_print1(struct pool *pp, const char *modif,
  855. int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
  856. {
  857. struct pool_item_header *ph;
  858. int print_pagelist = 0;
  859. char c;
  860. while ((c = *modif++) != '\0') {
  861. if (c == 'p')
  862. print_pagelist = 1;
  863. modif++;
  864. }
  865. (*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size,
  866. pp->pr_maxcolors);
  867. (*pr)("\talloc %p\n", pp->pr_alloc);
  868. (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n",
  869. pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages);
  870. (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n",
  871. pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit);
  872. (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n",
  873. pp->pr_nget, pp->pr_nfail, pp->pr_nput);
  874. (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n",
  875. pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle);
  876. if (print_pagelist == 0)
  877. return;
  878. if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL)
  879. (*pr)("\n\tempty page list:\n");
  880. pool_print_pagelist(&pp->pr_emptypages, pr);
  881. if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL)
  882. (*pr)("\n\tfull page list:\n");
  883. pool_print_pagelist(&pp->pr_fullpages, pr);
  884. if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL)
  885. (*pr)("\n\tpartial-page list:\n");
  886. pool_print_pagelist(&pp->pr_partpages, pr);
  887. if (pp->pr_curpage == NULL)
  888. (*pr)("\tno current page\n");
  889. else
  890. (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page);
  891. }
  892. void
  893. db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif)
  894. {
  895. struct pool *pp;
  896. char maxp[16];
  897. int ovflw;
  898. char mode;
  899. mode = modif[0];
  900. if (mode != '\0' && mode != 'a') {
  901. db_printf("usage: show all pools [/a]\n");
  902. return;
  903. }
  904. if (mode == '\0')
  905. db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n",
  906. "Name",
  907. "Size",
  908. "Requests",
  909. "Fail",
  910. "Releases",
  911. "Pgreq",
  912. "Pgrel",
  913. "Npage",
  914. "Hiwat",
  915. "Minpg",
  916. "Maxpg",
  917. "Idle");
  918. else
  919. db_printf("%-12s %18s %18s\n",
  920. "Name", "Address", "Allocator");
  921. SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
  922. if (mode == 'a') {
  923. db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp,
  924. pp->pr_alloc);
  925. continue;
  926. }
  927. if (!pp->pr_nget)
  928. continue;
  929. if (pp->pr_maxpages == UINT_MAX)
  930. snprintf(maxp, sizeof maxp, "inf");
  931. else
  932. snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages);
  933. #define PRWORD(ovflw, fmt, width, fixed, val) do { \
  934. (ovflw) += db_printf((fmt), \
  935. (width) - (fixed) - (ovflw) > 0 ? \
  936. (width) - (fixed) - (ovflw) : 0, \
  937. (val)) - (width); \
  938. if ((ovflw) < 0) \
  939. (ovflw) = 0; \
  940. } while (/* CONSTCOND */0)
  941. ovflw = 0;
  942. PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan);
  943. PRWORD(ovflw, " %*u", 4, 1, pp->pr_size);
  944. PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget);
  945. PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail);
  946. PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput);
  947. PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc);
  948. PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree);
  949. PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages);
  950. PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat);
  951. PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages);
  952. PRWORD(ovflw, " %*s", 6, 1, maxp);
  953. PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle);
  954. pool_chk(pp);
  955. }
  956. }
  957. #endif /* DDB */
  958. #if defined(POOL_DEBUG) || defined(DDB)
  959. int
  960. pool_chk_page(struct pool *pp, struct pool_item_header *ph, int expected)
  961. {
  962. struct pool_item *pi;
  963. caddr_t page;
  964. int n;
  965. const char *label = pp->pr_wchan;
  966. page = (caddr_t)((u_long)ph & pp->pr_pgmask);
  967. if (page != ph->ph_page && POOL_INPGHDR(pp)) {
  968. printf("%s: ", label);
  969. printf("pool(%p:%s): page inconsistency: page %p; "
  970. "at page head addr %p (p %p)\n",
  971. pp, pp->pr_wchan, ph->ph_page, ph, page);
  972. return 1;
  973. }
  974. for (pi = XSIMPLEQ_FIRST(&ph->ph_itemlist), n = 0;
  975. pi != NULL;
  976. pi = XSIMPLEQ_NEXT(&ph->ph_itemlist, pi, pi_list), n++) {
  977. if ((caddr_t)pi < ph->ph_page ||
  978. (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) {
  979. printf("%s: ", label);
  980. printf("pool(%p:%s): page inconsistency: page %p;"
  981. " item ordinal %d; addr %p\n", pp,
  982. pp->pr_wchan, ph->ph_page, n, pi);
  983. return (1);
  984. }
  985. if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
  986. printf("%s: ", label);
  987. printf("pool(%p:%s): free list modified: "
  988. "page %p; item ordinal %d; addr %p "
  989. "(p %p); offset 0x%x=0x%lx\n",
  990. pp, pp->pr_wchan, ph->ph_page, n, pi, page,
  991. 0, pi->pi_magic);
  992. }
  993. #ifdef DIAGNOSTIC
  994. if (POOL_PHPOISON(ph)) {
  995. size_t pidx;
  996. uint32_t pval;
  997. if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
  998. &pidx, &pval)) {
  999. int *ip = (int *)(pi + 1);
  1000. printf("pool(%s): free list modified: "
  1001. "page %p; item ordinal %d; addr %p "
  1002. "(p %p); offset 0x%zx=0x%x\n",
  1003. pp->pr_wchan, ph->ph_page, n, pi,
  1004. page, pidx * sizeof(int), ip[pidx]);
  1005. }
  1006. }
  1007. #endif /* DIAGNOSTIC */
  1008. }
  1009. if (n + ph->ph_nmissing != pp->pr_itemsperpage) {
  1010. printf("pool(%p:%s): page inconsistency: page %p;"
  1011. " %d on list, %d missing, %d items per page\n", pp,
  1012. pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
  1013. pp->pr_itemsperpage);
  1014. return 1;
  1015. }
  1016. if (expected >= 0 && n != expected) {
  1017. printf("pool(%p:%s): page inconsistency: page %p;"
  1018. " %d on list, %d missing, %d expected\n", pp,
  1019. pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
  1020. expected);
  1021. return 1;
  1022. }
  1023. return 0;
  1024. }
  1025. int
  1026. pool_chk(struct pool *pp)
  1027. {
  1028. struct pool_item_header *ph;
  1029. int r = 0;
  1030. TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_pagelist)
  1031. r += pool_chk_page(pp, ph, pp->pr_itemsperpage);
  1032. TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_pagelist)
  1033. r += pool_chk_page(pp, ph, 0);
  1034. TAILQ_FOREACH(ph, &pp->pr_partpages, ph_pagelist)
  1035. r += pool_chk_page(pp, ph, -1);
  1036. return (r);
  1037. }
  1038. #endif /* defined(POOL_DEBUG) || defined(DDB) */
  1039. #ifdef DDB
  1040. void
  1041. pool_walk(struct pool *pp, int full,
  1042. int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))),
  1043. void (*func)(void *, int, int (*)(const char *, ...)
  1044. __attribute__((__format__(__kprintf__,1,2)))))
  1045. {
  1046. struct pool_item_header *ph;
  1047. struct pool_item *pi;
  1048. caddr_t cp;
  1049. int n;
  1050. TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) {
  1051. cp = ph->ph_colored;
  1052. n = ph->ph_nmissing;
  1053. while (n--) {
  1054. func(cp, full, pr);
  1055. cp += pp->pr_size;
  1056. }
  1057. }
  1058. TAILQ_FOREACH(ph, &pp->pr_partpages, ph_pagelist) {
  1059. cp = ph->ph_colored;
  1060. n = ph->ph_nmissing;
  1061. do {
  1062. XSIMPLEQ_FOREACH(pi, &ph->ph_itemlist, pi_list) {
  1063. if (cp == (caddr_t)pi)
  1064. break;
  1065. }
  1066. if (cp != (caddr_t)pi) {
  1067. func(cp, full, pr);
  1068. n--;
  1069. }
  1070. cp += pp->pr_size;
  1071. } while (n > 0);
  1072. }
  1073. }
  1074. #endif
  1075. /*
  1076. * We have three different sysctls.
  1077. * kern.pool.npools - the number of pools.
  1078. * kern.pool.pool.<pool#> - the pool struct for the pool#.
  1079. * kern.pool.name.<pool#> - the name for pool#.
  1080. */
  1081. int
  1082. sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp)
  1083. {
  1084. struct kinfo_pool pi;
  1085. struct pool *pp;
  1086. int rv = ENOENT;
  1087. switch (name[0]) {
  1088. case KERN_POOL_NPOOLS:
  1089. if (namelen != 1)
  1090. return (ENOTDIR);
  1091. return (sysctl_rdint(oldp, oldlenp, NULL, pool_count));
  1092. case KERN_POOL_NAME:
  1093. case KERN_POOL_POOL:
  1094. break;
  1095. default:
  1096. return (EOPNOTSUPP);
  1097. }
  1098. if (namelen != 2)
  1099. return (ENOTDIR);
  1100. rw_enter_read(&pool_lock);
  1101. SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
  1102. if (name[1] == pp->pr_serial)
  1103. break;
  1104. }
  1105. if (pp == NULL)
  1106. goto done;
  1107. switch (name[0]) {
  1108. case KERN_POOL_NAME:
  1109. rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan);
  1110. break;
  1111. case KERN_POOL_POOL:
  1112. memset(&pi, 0, sizeof(pi));
  1113. if (pp->pr_ipl != -1)
  1114. mtx_enter(&pp->pr_mtx);
  1115. pi.pr_size = pp->pr_size;
  1116. pi.pr_pgsize = pp->pr_pgsize;
  1117. pi.pr_itemsperpage = pp->pr_itemsperpage;
  1118. pi.pr_npages = pp->pr_npages;
  1119. pi.pr_minpages = pp->pr_minpages;
  1120. pi.pr_maxpages = pp->pr_maxpages;
  1121. pi.pr_hardlimit = pp->pr_hardlimit;
  1122. pi.pr_nout = pp->pr_nout;
  1123. pi.pr_nitems = pp->pr_nitems;
  1124. pi.pr_nget = pp->pr_nget;
  1125. pi.pr_nput = pp->pr_nput;
  1126. pi.pr_nfail = pp->pr_nfail;
  1127. pi.pr_npagealloc = pp->pr_npagealloc;
  1128. pi.pr_npagefree = pp->pr_npagefree;
  1129. pi.pr_hiwat = pp->pr_hiwat;
  1130. pi.pr_nidle = pp->pr_nidle;
  1131. if (pp->pr_ipl != -1)
  1132. mtx_leave(&pp->pr_mtx);
  1133. rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi));
  1134. break;
  1135. }
  1136. done:
  1137. rw_exit_read(&pool_lock);
  1138. return (rv);
  1139. }
  1140. void
  1141. pool_gc_sched(void *null)
  1142. {
  1143. task_add(systqmp, &pool_gc_task);
  1144. }
  1145. void
  1146. pool_gc_pages(void *null)
  1147. {
  1148. struct pool *pp;
  1149. struct pool_item_header *ph, *freeph;
  1150. int s;
  1151. rw_enter_read(&pool_lock);
  1152. s = splvm(); /* XXX go to splvm until all pools _setipl properly */
  1153. SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
  1154. if (pp->pr_nidle <= pp->pr_minpages || /* guess */
  1155. !mtx_enter_try(&pp->pr_mtx)) /* try */
  1156. continue;
  1157. /* is it time to free a page? */
  1158. if (pp->pr_nidle > pp->pr_minpages &&
  1159. (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
  1160. (ticks - ph->ph_tick) > (hz * pool_wait_gc)) {
  1161. freeph = ph;
  1162. pool_p_remove(pp, freeph);
  1163. } else
  1164. freeph = NULL;
  1165. mtx_leave(&pp->pr_mtx);
  1166. if (freeph != NULL)
  1167. pool_p_free(pp, freeph);
  1168. }
  1169. splx(s);
  1170. rw_exit_read(&pool_lock);
  1171. timeout_add_sec(&pool_gc_tick, 1);
  1172. }
  1173. /*
  1174. * Pool backend allocators.
  1175. */
  1176. void *
  1177. pool_allocator_alloc(struct pool *pp, int flags, int *slowdown)
  1178. {
  1179. void *v;
  1180. KERNEL_LOCK();
  1181. v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown);
  1182. KERNEL_UNLOCK();
  1183. #ifdef DIAGNOSTIC
  1184. if (v != NULL && POOL_INPGHDR(pp)) {
  1185. vaddr_t addr = (vaddr_t)v;
  1186. if ((addr & pp->pr_pgmask) != addr) {
  1187. panic("%s: %s page address %p isnt aligned to %u",
  1188. __func__, pp->pr_wchan, v, pp->pr_pgsize);
  1189. }
  1190. }
  1191. #endif
  1192. return (v);
  1193. }
  1194. void
  1195. pool_allocator_free(struct pool *pp, void *v)
  1196. {
  1197. struct pool_allocator *pa = pp->pr_alloc;
  1198. KERNEL_LOCK();
  1199. (*pa->pa_free)(pp, v);
  1200. KERNEL_UNLOCK();
  1201. }
  1202. void *
  1203. pool_page_alloc(struct pool *pp, int flags, int *slowdown)
  1204. {
  1205. struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
  1206. kd.kd_waitok = ISSET(flags, PR_WAITOK);
  1207. kd.kd_slowdown = slowdown;
  1208. return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd));
  1209. }
  1210. void
  1211. pool_page_free(struct pool *pp, void *v)
  1212. {
  1213. km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange);
  1214. }
  1215. void *
  1216. pool_large_alloc(struct pool *pp, int flags, int *slowdown)
  1217. {
  1218. struct kmem_va_mode kv = kv_intrsafe;
  1219. struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
  1220. void *v;
  1221. int s;
  1222. if (POOL_INPGHDR(pp))
  1223. kv.kv_align = pp->pr_pgsize;
  1224. kd.kd_waitok = ISSET(flags, PR_WAITOK);
  1225. kd.kd_slowdown = slowdown;
  1226. s = splvm();
  1227. v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);
  1228. splx(s);
  1229. return (v);
  1230. }
  1231. void
  1232. pool_large_free(struct pool *pp, void *v)
  1233. {
  1234. struct kmem_va_mode kv = kv_intrsafe;
  1235. int s;
  1236. if (POOL_INPGHDR(pp))
  1237. kv.kv_align = pp->pr_pgsize;
  1238. s = splvm();
  1239. km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
  1240. splx(s);
  1241. }
  1242. void *
  1243. pool_large_alloc_ni(struct pool *pp, int flags, int *slowdown)
  1244. {
  1245. struct kmem_va_mode kv = kv_any;
  1246. struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
  1247. if (POOL_INPGHDR(pp))
  1248. kv.kv_align = pp->pr_pgsize;
  1249. kd.kd_waitok = ISSET(flags, PR_WAITOK);
  1250. kd.kd_slowdown = slowdown;
  1251. return (km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd));
  1252. }
  1253. void
  1254. pool_large_free_ni(struct pool *pp, void *v)
  1255. {
  1256. struct kmem_va_mode kv = kv_any;
  1257. if (POOL_INPGHDR(pp))
  1258. kv.kv_align = pp->pr_pgsize;
  1259. km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
  1260. }