nedmalloc.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954
  1. /* Alternative malloc implementation for multiple threads without
  2. lock contention based on dlmalloc. (C) 2005-2006 Niall Douglas
  3. Boost Software License - Version 1.0 - August 17th, 2003
  4. Permission is hereby granted, free of charge, to any person or organization
  5. obtaining a copy of the software and accompanying documentation covered by
  6. this license (the "Software") to use, reproduce, display, distribute,
  7. execute, and transmit the Software, and to prepare derivative works of the
  8. Software, and to permit third-parties to whom the Software is furnished to
  9. do so, all subject to the following:
  10. The copyright notices in the Software and this entire statement, including
  11. the above license grant, this restriction and the following disclaimer,
  12. must be included in all copies of the Software, in whole or in part, and
  13. all derivative works of the Software, unless such copies or derivative
  14. works are solely in the form of machine-executable object code generated by
  15. a source language processor.
  16. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
  19. SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
  20. FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
  21. ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22. DEALINGS IN THE SOFTWARE.
  23. */
  24. #ifdef _MSC_VER
  25. /* Enable full aliasing on MSVC */
  26. /*#pragma optimize("a", on)*/
  27. #endif
  28. /*#define FULLSANITYCHECKS*/
  29. #include "nedmalloc.h"
  30. #if defined(WIN32)
  31. #include <malloc.h>
  32. #endif
  33. #define MSPACES 1
  34. #define ONLY_MSPACES 1
  35. #ifndef USE_LOCKS
  36. #define USE_LOCKS 1
  37. #endif
  38. #define FOOTERS 1 /* Need to enable footers so frees lock the right mspace */
  39. #undef DEBUG /* dlmalloc wants DEBUG either 0 or 1 */
  40. #ifdef _DEBUG
  41. #define DEBUG 1
  42. #else
  43. #define DEBUG 0
  44. #endif
  45. #ifdef NDEBUG /* Disable assert checking on release builds */
  46. #undef DEBUG
  47. #endif
  48. /* The default of 64Kb means we spend too much time kernel-side */
  49. #ifndef DEFAULT_GRANULARITY
  50. #define DEFAULT_GRANULARITY (1*1024*1024)
  51. #endif
  52. /*#define USE_SPIN_LOCKS 0*/
  53. /*#define FORCEINLINE*/
  54. #include "malloc.c.h"
  55. #ifdef NDEBUG /* Disable assert checking on release builds */
  56. #undef DEBUG
  57. #endif
  58. /* The maximum concurrent threads in a pool possible */
  59. #ifndef MAXTHREADSINPOOL
  60. #define MAXTHREADSINPOOL 16
  61. #endif
  62. /* The maximum number of threadcaches which can be allocated */
  63. #ifndef THREADCACHEMAXCACHES
  64. #define THREADCACHEMAXCACHES 256
  65. #endif
  66. /* The maximum size to be allocated from the thread cache */
  67. #ifndef THREADCACHEMAX
  68. #define THREADCACHEMAX 8192
  69. #endif
  70. #if 0
  71. /* The number of cache entries for finer grained bins. This is (topbitpos(THREADCACHEMAX)-4)*2 */
  72. #define THREADCACHEMAXBINS ((13-4)*2)
  73. #else
  74. /* The number of cache entries. This is (topbitpos(THREADCACHEMAX)-4) */
  75. #define THREADCACHEMAXBINS (13-4)
  76. #endif
  77. /* Point at which the free space in a thread cache is garbage collected */
  78. #ifndef THREADCACHEMAXFREESPACE
  79. #define THREADCACHEMAXFREESPACE (512*1024)
  80. #endif
  81. #ifdef WIN32
  82. #define TLSVAR DWORD
  83. #define TLSALLOC(k) (*(k)=TlsAlloc(), TLS_OUT_OF_INDEXES==*(k))
  84. #define TLSFREE(k) (!TlsFree(k))
  85. #define TLSGET(k) TlsGetValue(k)
  86. #define TLSSET(k, a) (!TlsSetValue(k, a))
  87. #ifdef DEBUG
  88. static LPVOID ChkedTlsGetValue(DWORD idx)
  89. {
  90. LPVOID ret=TlsGetValue(idx);
  91. assert(S_OK==GetLastError());
  92. return ret;
  93. }
  94. #undef TLSGET
  95. #define TLSGET(k) ChkedTlsGetValue(k)
  96. #endif
  97. #else
  98. #define TLSVAR pthread_key_t
  99. #define TLSALLOC(k) pthread_key_create(k, 0)
  100. #define TLSFREE(k) pthread_key_delete(k)
  101. #define TLSGET(k) pthread_getspecific(k)
  102. #define TLSSET(k, a) pthread_setspecific(k, a)
  103. #endif
  104. #if 0
  105. /* Only enable if testing with valgrind. Causes misoperation */
  106. #define mspace_malloc(p, s) malloc(s)
  107. #define mspace_realloc(p, m, s) realloc(m, s)
  108. #define mspace_calloc(p, n, s) calloc(n, s)
  109. #define mspace_free(p, m) free(m)
  110. #endif
  111. #if defined(__cplusplus)
  112. #if !defined(NO_NED_NAMESPACE)
  113. namespace nedalloc {
  114. #else
  115. extern "C" {
  116. #endif
  117. #endif
  118. size_t nedblksize(void *mem) THROWSPEC
  119. {
  120. #if 0
  121. /* Only enable if testing with valgrind. Causes misoperation */
  122. return THREADCACHEMAX;
  123. #else
  124. if(mem)
  125. {
  126. mchunkptr p=mem2chunk(mem);
  127. assert(cinuse(p)); /* If this fails, someone tried to free a block twice */
  128. if(cinuse(p))
  129. return chunksize(p)-overhead_for(p);
  130. }
  131. return 0;
  132. #endif
  133. }
  134. void nedsetvalue(void *v) THROWSPEC { nedpsetvalue(0, v); }
  135. void * nedmalloc(size_t size) THROWSPEC { return nedpmalloc(0, size); }
  136. void * nedcalloc(size_t no, size_t size) THROWSPEC { return nedpcalloc(0, no, size); }
  137. void * nedrealloc(void *mem, size_t size) THROWSPEC { return nedprealloc(0, mem, size); }
  138. void nedfree(void *mem) THROWSPEC { nedpfree(0, mem); }
  139. void * nedmemalign(size_t alignment, size_t bytes) THROWSPEC { return nedpmemalign(0, alignment, bytes); }
  140. #if !NO_MALLINFO
  141. struct mallinfo nedmallinfo(void) THROWSPEC { return nedpmallinfo(0); }
  142. #endif
  143. int nedmallopt(int parno, int value) THROWSPEC { return nedpmallopt(0, parno, value); }
  144. int nedmalloc_trim(size_t pad) THROWSPEC { return nedpmalloc_trim(0, pad); }
  145. void nedmalloc_stats(void) THROWSPEC { nedpmalloc_stats(0); }
  146. size_t nedmalloc_footprint(void) THROWSPEC { return nedpmalloc_footprint(0); }
  147. void **nedindependent_calloc(size_t elemsno, size_t elemsize, void **chunks) THROWSPEC { return nedpindependent_calloc(0, elemsno, elemsize, chunks); }
  148. void **nedindependent_comalloc(size_t elems, size_t *sizes, void **chunks) THROWSPEC { return nedpindependent_comalloc(0, elems, sizes, chunks); }
  149. struct threadcacheblk_t;
  150. typedef struct threadcacheblk_t threadcacheblk;
  151. struct threadcacheblk_t
  152. { /* Keep less than 16 bytes on 32 bit systems and 32 bytes on 64 bit systems */
  153. #ifdef FULLSANITYCHECKS
  154. unsigned int magic;
  155. #endif
  156. unsigned int lastUsed, size;
  157. threadcacheblk *next, *prev;
  158. };
  159. typedef struct threadcache_t
  160. {
  161. #ifdef FULLSANITYCHECKS
  162. unsigned int magic1;
  163. #endif
  164. int mymspace; /* Last mspace entry this thread used */
  165. long threadid;
  166. unsigned int mallocs, frees, successes;
  167. size_t freeInCache; /* How much free space is stored in this cache */
  168. threadcacheblk *bins[(THREADCACHEMAXBINS+1)*2];
  169. #ifdef FULLSANITYCHECKS
  170. unsigned int magic2;
  171. #endif
  172. } threadcache;
  173. struct nedpool_t
  174. {
  175. MLOCK_T mutex;
  176. void *uservalue;
  177. int threads; /* Max entries in m to use */
  178. threadcache *caches[THREADCACHEMAXCACHES];
  179. TLSVAR mycache; /* Thread cache for this thread. 0 for unset, negative for use mspace-1 directly, otherwise is cache-1 */
  180. mstate m[MAXTHREADSINPOOL+1]; /* mspace entries for this pool */
  181. };
  182. static nedpool syspool;
  183. static FORCEINLINE unsigned int size2binidx(size_t _size) THROWSPEC
  184. { /* 8=1000 16=10000 20=10100 24=11000 32=100000 48=110000 4096=1000000000000 */
  185. unsigned int topbit, size=(unsigned int)(_size>>4);
  186. /* 16=1 20=1 24=1 32=10 48=11 64=100 96=110 128=1000 4096=100000000 */
  187. #if defined(__GNUC__)
  188. topbit = sizeof(size)*__CHAR_BIT__ - 1 - __builtin_clz(size);
  189. #elif defined(_MSC_VER) && _MSC_VER>=1300
  190. {
  191. unsigned long bsrTopBit;
  192. _BitScanReverse(&bsrTopBit, size);
  193. topbit = bsrTopBit;
  194. }
  195. #else
  196. #if 0
  197. union {
  198. unsigned asInt[2];
  199. double asDouble;
  200. };
  201. int n;
  202. asDouble = (double)size + 0.5;
  203. topbit = (asInt[!FOX_BIGENDIAN] >> 20) - 1023;
  204. #else
  205. {
  206. unsigned int x=size;
  207. x = x | (x >> 1);
  208. x = x | (x >> 2);
  209. x = x | (x >> 4);
  210. x = x | (x >> 8);
  211. x = x | (x >>16);
  212. x = ~x;
  213. x = x - ((x >> 1) & 0x55555555);
  214. x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
  215. x = (x + (x >> 4)) & 0x0F0F0F0F;
  216. x = x + (x << 8);
  217. x = x + (x << 16);
  218. topbit=31 - (x >> 24);
  219. }
  220. #endif
  221. #endif
  222. return topbit;
  223. }
  224. #ifdef FULLSANITYCHECKS
  225. static void tcsanitycheck(threadcacheblk **ptr) THROWSPEC
  226. {
  227. assert((ptr[0] && ptr[1]) || (!ptr[0] && !ptr[1]));
  228. if(ptr[0] && ptr[1])
  229. {
  230. assert(nedblksize(ptr[0])>=sizeof(threadcacheblk));
  231. assert(nedblksize(ptr[1])>=sizeof(threadcacheblk));
  232. assert(*(unsigned int *) "NEDN"==ptr[0]->magic);
  233. assert(*(unsigned int *) "NEDN"==ptr[1]->magic);
  234. assert(!ptr[0]->prev);
  235. assert(!ptr[1]->next);
  236. if(ptr[0]==ptr[1])
  237. {
  238. assert(!ptr[0]->next);
  239. assert(!ptr[1]->prev);
  240. }
  241. }
  242. }
  243. static void tcfullsanitycheck(threadcache *tc) THROWSPEC
  244. {
  245. threadcacheblk **tcbptr=tc->bins;
  246. int n;
  247. for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2)
  248. {
  249. threadcacheblk *b, *ob=0;
  250. tcsanitycheck(tcbptr);
  251. for(b=tcbptr[0]; b; ob=b, b=b->next)
  252. {
  253. assert(*(unsigned int *) "NEDN"==b->magic);
  254. assert(!ob || ob->next==b);
  255. assert(!ob || b->prev==ob);
  256. }
  257. }
  258. }
  259. #endif
  260. static NOINLINE void RemoveCacheEntries(nedpool *p, threadcache *tc, unsigned int age) THROWSPEC
  261. {
  262. #ifdef FULLSANITYCHECKS
  263. tcfullsanitycheck(tc);
  264. #endif
  265. if(tc->freeInCache)
  266. {
  267. threadcacheblk **tcbptr=tc->bins;
  268. int n;
  269. for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2)
  270. {
  271. threadcacheblk **tcb=tcbptr+1; /* come from oldest end of list */
  272. /*tcsanitycheck(tcbptr);*/
  273. for(; *tcb && tc->frees-(*tcb)->lastUsed>=age; )
  274. {
  275. threadcacheblk *f=*tcb;
  276. size_t blksize=f->size; /*nedblksize(f);*/
  277. assert(blksize<=nedblksize(f));
  278. assert(blksize);
  279. #ifdef FULLSANITYCHECKS
  280. assert(*(unsigned int *) "NEDN"==(*tcb)->magic);
  281. #endif
  282. *tcb=(*tcb)->prev;
  283. if(*tcb)
  284. (*tcb)->next=0;
  285. else
  286. *tcbptr=0;
  287. tc->freeInCache-=blksize;
  288. assert((long) tc->freeInCache>=0);
  289. mspace_free(0, f);
  290. /*tcsanitycheck(tcbptr);*/
  291. }
  292. }
  293. }
  294. #ifdef FULLSANITYCHECKS
  295. tcfullsanitycheck(tc);
  296. #endif
  297. }
  298. static void DestroyCaches(nedpool *p) THROWSPEC
  299. {
  300. if(p->caches)
  301. {
  302. threadcache *tc;
  303. int n;
  304. for(n=0; n<THREADCACHEMAXCACHES; n++)
  305. {
  306. if((tc=p->caches[n]))
  307. {
  308. tc->frees++;
  309. RemoveCacheEntries(p, tc, 0);
  310. assert(!tc->freeInCache);
  311. tc->mymspace=-1;
  312. tc->threadid=0;
  313. mspace_free(0, tc);
  314. p->caches[n]=0;
  315. }
  316. }
  317. }
  318. }
  319. static NOINLINE threadcache *AllocCache(nedpool *p) THROWSPEC
  320. {
  321. threadcache *tc=0;
  322. int n, end;
  323. ACQUIRE_LOCK(&p->mutex);
  324. for(n=0; n<THREADCACHEMAXCACHES && p->caches[n]; n++);
  325. if(THREADCACHEMAXCACHES==n)
  326. { /* List exhausted, so disable for this thread */
  327. RELEASE_LOCK(&p->mutex);
  328. return 0;
  329. }
  330. tc=p->caches[n]=(threadcache *) mspace_calloc(p->m[0], 1, sizeof(threadcache));
  331. if(!tc)
  332. {
  333. RELEASE_LOCK(&p->mutex);
  334. return 0;
  335. }
  336. #ifdef FULLSANITYCHECKS
  337. tc->magic1=*(unsigned int *)"NEDMALC1";
  338. tc->magic2=*(unsigned int *)"NEDMALC2";
  339. #endif
  340. tc->threadid=(long)(size_t)CURRENT_THREAD;
  341. for(end=0; p->m[end]; end++);
  342. tc->mymspace=tc->threadid % end;
  343. RELEASE_LOCK(&p->mutex);
  344. if(TLSSET(p->mycache, (void *)(size_t)(n+1))) abort();
  345. return tc;
  346. }
  347. static void *threadcache_malloc(nedpool *p, threadcache *tc, size_t *size) THROWSPEC
  348. {
  349. void *ret=0;
  350. unsigned int bestsize;
  351. unsigned int idx=size2binidx(*size);
  352. size_t blksize=0;
  353. threadcacheblk *blk, **binsptr;
  354. #ifdef FULLSANITYCHECKS
  355. tcfullsanitycheck(tc);
  356. #endif
  357. /* Calculate best fit bin size */
  358. bestsize=1<<(idx+4);
  359. #if 0
  360. /* Finer grained bin fit */
  361. idx<<=1;
  362. if(*size>bestsize)
  363. {
  364. idx++;
  365. bestsize+=bestsize>>1;
  366. }
  367. if(*size>bestsize)
  368. {
  369. idx++;
  370. bestsize=1<<(4+(idx>>1));
  371. }
  372. #else
  373. if(*size>bestsize)
  374. {
  375. idx++;
  376. bestsize<<=1;
  377. }
  378. #endif
  379. assert(bestsize>=*size);
  380. if(*size<bestsize) *size=bestsize;
  381. assert(*size<=THREADCACHEMAX);
  382. assert(idx<=THREADCACHEMAXBINS);
  383. binsptr=&tc->bins[idx*2];
  384. /* Try to match close, but move up a bin if necessary */
  385. blk=*binsptr;
  386. if(!blk || blk->size<*size)
  387. { /* Bump it up a bin */
  388. if(idx<THREADCACHEMAXBINS)
  389. {
  390. idx++;
  391. binsptr+=2;
  392. blk=*binsptr;
  393. }
  394. }
  395. if(blk)
  396. {
  397. blksize=blk->size; /*nedblksize(blk);*/
  398. assert(nedblksize(blk)>=blksize);
  399. assert(blksize>=*size);
  400. if(blk->next)
  401. blk->next->prev=0;
  402. *binsptr=blk->next;
  403. if(!*binsptr)
  404. binsptr[1]=0;
  405. #ifdef FULLSANITYCHECKS
  406. blk->magic=0;
  407. #endif
  408. assert(binsptr[0]!=blk && binsptr[1]!=blk);
  409. assert(nedblksize(blk)>=sizeof(threadcacheblk) && nedblksize(blk)<=THREADCACHEMAX+CHUNK_OVERHEAD);
  410. /*printf("malloc: %p, %p, %p, %lu\n", p, tc, blk, (long) size);*/
  411. ret=(void *) blk;
  412. }
  413. ++tc->mallocs;
  414. if(ret)
  415. {
  416. assert(blksize>=*size);
  417. ++tc->successes;
  418. tc->freeInCache-=blksize;
  419. assert((long) tc->freeInCache>=0);
  420. }
  421. #if defined(DEBUG) && 0
  422. if(!(tc->mallocs & 0xfff))
  423. {
  424. printf("*** threadcache=%u, mallocs=%u (%f), free=%u (%f), freeInCache=%u\n", (unsigned int) tc->threadid, tc->mallocs,
  425. (float) tc->successes/tc->mallocs, tc->frees, (float) tc->successes/tc->frees, (unsigned int) tc->freeInCache);
  426. }
  427. #endif
  428. #ifdef FULLSANITYCHECKS
  429. tcfullsanitycheck(tc);
  430. #endif
  431. return ret;
  432. }
  433. static NOINLINE void ReleaseFreeInCache(nedpool *p, threadcache *tc, int mymspace) THROWSPEC
  434. {
  435. unsigned int age=THREADCACHEMAXFREESPACE/8192;
  436. /*ACQUIRE_LOCK(&p->m[mymspace]->mutex);*/
  437. while(age && tc->freeInCache>=THREADCACHEMAXFREESPACE)
  438. {
  439. RemoveCacheEntries(p, tc, age);
  440. /*printf("*** Removing cache entries older than %u (%u)\n", age, (unsigned int) tc->freeInCache);*/
  441. age>>=1;
  442. }
  443. /*RELEASE_LOCK(&p->m[mymspace]->mutex);*/
  444. }
  445. static void threadcache_free(nedpool *p, threadcache *tc, int mymspace, void *mem, size_t size) THROWSPEC
  446. {
  447. unsigned int bestsize;
  448. unsigned int idx=size2binidx(size);
  449. threadcacheblk **binsptr, *tck=(threadcacheblk *) mem;
  450. assert(size>=sizeof(threadcacheblk) && size<=THREADCACHEMAX+CHUNK_OVERHEAD);
  451. #ifdef DEBUG
  452. { /* Make sure this is a valid memory block */
  453. mchunkptr p = mem2chunk(mem);
  454. mstate fm = get_mstate_for(p);
  455. if (!ok_magic(fm)) {
  456. USAGE_ERROR_ACTION(fm, p);
  457. return;
  458. }
  459. }
  460. #endif
  461. #ifdef FULLSANITYCHECKS
  462. tcfullsanitycheck(tc);
  463. #endif
  464. /* Calculate best fit bin size */
  465. bestsize=1<<(idx+4);
  466. #if 0
  467. /* Finer grained bin fit */
  468. idx<<=1;
  469. if(size>bestsize)
  470. {
  471. unsigned int biggerbestsize=bestsize+bestsize<<1;
  472. if(size>=biggerbestsize)
  473. {
  474. idx++;
  475. bestsize=biggerbestsize;
  476. }
  477. }
  478. #endif
  479. if(bestsize!=size) /* dlmalloc can round up, so we round down to preserve indexing */
  480. size=bestsize;
  481. binsptr=&tc->bins[idx*2];
  482. assert(idx<=THREADCACHEMAXBINS);
  483. if(tck==*binsptr)
  484. {
  485. fprintf(stderr, "Attempt to free already freed memory block %p - aborting!\n", tck);
  486. abort();
  487. }
  488. #ifdef FULLSANITYCHECKS
  489. tck->magic=*(unsigned int *) "NEDN";
  490. #endif
  491. tck->lastUsed=++tc->frees;
  492. tck->size=(unsigned int) size;
  493. tck->next=*binsptr;
  494. tck->prev=0;
  495. if(tck->next)
  496. tck->next->prev=tck;
  497. else
  498. binsptr[1]=tck;
  499. assert(!*binsptr || (*binsptr)->size==tck->size);
  500. *binsptr=tck;
  501. assert(tck==tc->bins[idx*2]);
  502. assert(tc->bins[idx*2+1]==tck || binsptr[0]->next->prev==tck);
  503. /*printf("free: %p, %p, %p, %lu\n", p, tc, mem, (long) size);*/
  504. tc->freeInCache+=size;
  505. #ifdef FULLSANITYCHECKS
  506. tcfullsanitycheck(tc);
  507. #endif
  508. #if 1
  509. if(tc->freeInCache>=THREADCACHEMAXFREESPACE)
  510. ReleaseFreeInCache(p, tc, mymspace);
  511. #endif
  512. }
  513. static NOINLINE int InitPool(nedpool *p, size_t capacity, int threads) THROWSPEC
  514. { /* threads is -1 for system pool */
  515. ensure_initialization();
  516. ACQUIRE_MALLOC_GLOBAL_LOCK();
  517. if(p->threads) goto done;
  518. if(INITIAL_LOCK(&p->mutex)) goto err;
  519. if(TLSALLOC(&p->mycache)) goto err;
  520. if(!(p->m[0]=(mstate) create_mspace(capacity, 1))) goto err;
  521. p->m[0]->extp=p;
  522. p->threads=(threads<1 || threads>MAXTHREADSINPOOL) ? MAXTHREADSINPOOL : threads;
  523. done:
  524. RELEASE_MALLOC_GLOBAL_LOCK();
  525. return 1;
  526. err:
  527. if(threads<0)
  528. abort(); /* If you can't allocate for system pool, we're screwed */
  529. DestroyCaches(p);
  530. if(p->m[0])
  531. {
  532. destroy_mspace(p->m[0]);
  533. p->m[0]=0;
  534. }
  535. if(p->mycache)
  536. {
  537. if(TLSFREE(p->mycache)) abort();
  538. p->mycache=0;
  539. }
  540. RELEASE_MALLOC_GLOBAL_LOCK();
  541. return 0;
  542. }
  543. static NOINLINE mstate FindMSpace(nedpool *p, threadcache *tc, int *lastUsed, size_t size) THROWSPEC
  544. { /* Gets called when thread's last used mspace is in use. The strategy
  545. is to run through the list of all available mspaces looking for an
  546. unlocked one and if we fail, we create a new one so long as we don't
  547. exceed p->threads */
  548. int n, end;
  549. for(n=end=*lastUsed+1; p->m[n]; end=++n)
  550. {
  551. if(TRY_LOCK(&p->m[n]->mutex)) goto found;
  552. }
  553. for(n=0; n<*lastUsed && p->m[n]; n++)
  554. {
  555. if(TRY_LOCK(&p->m[n]->mutex)) goto found;
  556. }
  557. if(end<p->threads)
  558. {
  559. mstate temp;
  560. if(!(temp=(mstate) create_mspace(size, 1)))
  561. goto badexit;
  562. /* Now we're ready to modify the lists, we lock */
  563. ACQUIRE_LOCK(&p->mutex);
  564. while(p->m[end] && end<p->threads)
  565. end++;
  566. if(end>=p->threads)
  567. { /* Drat, must destroy it now */
  568. RELEASE_LOCK(&p->mutex);
  569. destroy_mspace((mspace) temp);
  570. goto badexit;
  571. }
  572. /* We really want to make sure this goes into memory now but we
  573. have to be careful of breaking aliasing rules, so write it twice */
  574. {
  575. volatile struct malloc_state **_m=(volatile struct malloc_state **) &p->m[end];
  576. *_m=(p->m[end]=temp);
  577. }
  578. ACQUIRE_LOCK(&p->m[end]->mutex);
  579. /*printf("Created mspace idx %d\n", end);*/
  580. RELEASE_LOCK(&p->mutex);
  581. n=end;
  582. goto found;
  583. }
  584. /* Let it lock on the last one it used */
  585. badexit:
  586. ACQUIRE_LOCK(&p->m[*lastUsed]->mutex);
  587. return p->m[*lastUsed];
  588. found:
  589. *lastUsed=n;
  590. if(tc)
  591. tc->mymspace=n;
  592. else
  593. {
  594. if(TLSSET(p->mycache, (void *)(size_t)(-(n+1)))) abort();
  595. }
  596. return p->m[n];
  597. }
  598. nedpool *nedcreatepool(size_t capacity, int threads) THROWSPEC
  599. {
  600. nedpool *ret;
  601. if(!(ret=(nedpool *) nedpcalloc(0, 1, sizeof(nedpool)))) return 0;
  602. if(!InitPool(ret, capacity, threads))
  603. {
  604. nedpfree(0, ret);
  605. return 0;
  606. }
  607. return ret;
  608. }
  609. void neddestroypool(nedpool *p) THROWSPEC
  610. {
  611. int n;
  612. ACQUIRE_LOCK(&p->mutex);
  613. DestroyCaches(p);
  614. for(n=0; p->m[n]; n++)
  615. {
  616. destroy_mspace(p->m[n]);
  617. p->m[n]=0;
  618. }
  619. RELEASE_LOCK(&p->mutex);
  620. if(TLSFREE(p->mycache)) abort();
  621. nedpfree(0, p);
  622. }
  623. void nedpsetvalue(nedpool *p, void *v) THROWSPEC
  624. {
  625. if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
  626. p->uservalue=v;
  627. }
  628. void *nedgetvalue(nedpool **p, void *mem) THROWSPEC
  629. {
  630. nedpool *np=0;
  631. mchunkptr mcp=mem2chunk(mem);
  632. mstate fm;
  633. if(!(is_aligned(chunk2mem(mcp))) && mcp->head != FENCEPOST_HEAD) return 0;
  634. if(!cinuse(mcp)) return 0;
  635. if(!next_pinuse(mcp)) return 0;
  636. if(!is_mmapped(mcp) && !pinuse(mcp))
  637. {
  638. if(next_chunk(prev_chunk(mcp))!=mcp) return 0;
  639. }
  640. fm=get_mstate_for(mcp);
  641. if(!ok_magic(fm)) return 0;
  642. if(!ok_address(fm, mcp)) return 0;
  643. if(!fm->extp) return 0;
  644. np=(nedpool *) fm->extp;
  645. if(p) *p=np;
  646. return np->uservalue;
  647. }
  648. void neddisablethreadcache(nedpool *p) THROWSPEC
  649. {
  650. int mycache;
  651. if(!p)
  652. {
  653. p=&syspool;
  654. if(!syspool.threads) InitPool(&syspool, 0, -1);
  655. }
  656. mycache=(int)(size_t) TLSGET(p->mycache);
  657. if(!mycache)
  658. { /* Set to mspace 0 */
  659. if(TLSSET(p->mycache, (void *)-1)) abort();
  660. }
  661. else if(mycache>0)
  662. { /* Set to last used mspace */
  663. threadcache *tc=p->caches[mycache-1];
  664. #if defined(DEBUG)
  665. printf("Threadcache utilisation: %lf%% in cache with %lf%% lost to other threads\n",
  666. 100.0*tc->successes/tc->mallocs, 100.0*((double) tc->mallocs-tc->frees)/tc->mallocs);
  667. #endif
  668. if(TLSSET(p->mycache, (void *)(size_t)(-tc->mymspace))) abort();
  669. tc->frees++;
  670. RemoveCacheEntries(p, tc, 0);
  671. assert(!tc->freeInCache);
  672. tc->mymspace=-1;
  673. tc->threadid=0;
  674. mspace_free(0, p->caches[mycache-1]);
  675. p->caches[mycache-1]=0;
  676. }
  677. }
  678. #define GETMSPACE(m,p,tc,ms,s,action) \
  679. do \
  680. { \
  681. mstate m = GetMSpace((p),(tc),(ms),(s)); \
  682. action; \
  683. RELEASE_LOCK(&m->mutex); \
  684. } while (0)
  685. static FORCEINLINE mstate GetMSpace(nedpool *p, threadcache *tc, int mymspace, size_t size) THROWSPEC
  686. { /* Returns a locked and ready for use mspace */
  687. mstate m=p->m[mymspace];
  688. assert(m);
  689. if(!TRY_LOCK(&p->m[mymspace]->mutex)) m=FindMSpace(p, tc, &mymspace, size);\
  690. /*assert(IS_LOCKED(&p->m[mymspace]->mutex));*/
  691. return m;
  692. }
  693. static FORCEINLINE void GetThreadCache(nedpool **p, threadcache **tc, int *mymspace, size_t *size) THROWSPEC
  694. {
  695. int mycache;
  696. if(size && *size<sizeof(threadcacheblk)) *size=sizeof(threadcacheblk);
  697. if(!*p)
  698. {
  699. *p=&syspool;
  700. if(!syspool.threads) InitPool(&syspool, 0, -1);
  701. }
  702. mycache=(int)(size_t) TLSGET((*p)->mycache);
  703. if(mycache>0)
  704. {
  705. *tc=(*p)->caches[mycache-1];
  706. *mymspace=(*tc)->mymspace;
  707. }
  708. else if(!mycache)
  709. {
  710. *tc=AllocCache(*p);
  711. if(!*tc)
  712. { /* Disable */
  713. if(TLSSET((*p)->mycache, (void *)-1)) abort();
  714. *mymspace=0;
  715. }
  716. else
  717. *mymspace=(*tc)->mymspace;
  718. }
  719. else
  720. {
  721. *tc=0;
  722. *mymspace=-mycache-1;
  723. }
  724. assert(*mymspace>=0);
  725. assert((long)(size_t)CURRENT_THREAD==(*tc)->threadid);
  726. #ifdef FULLSANITYCHECKS
  727. if(*tc)
  728. {
  729. if(*(unsigned int *)"NEDMALC1"!=(*tc)->magic1 || *(unsigned int *)"NEDMALC2"!=(*tc)->magic2)
  730. {
  731. abort();
  732. }
  733. }
  734. #endif
  735. }
  736. void * nedpmalloc(nedpool *p, size_t size) THROWSPEC
  737. {
  738. void *ret=0;
  739. threadcache *tc;
  740. int mymspace;
  741. GetThreadCache(&p, &tc, &mymspace, &size);
  742. #if THREADCACHEMAX
  743. if(tc && size<=THREADCACHEMAX)
  744. { /* Use the thread cache */
  745. ret=threadcache_malloc(p, tc, &size);
  746. }
  747. #endif
  748. if(!ret)
  749. { /* Use this thread's mspace */
  750. GETMSPACE(m, p, tc, mymspace, size,
  751. ret=mspace_malloc(m, size));
  752. }
  753. return ret;
  754. }
  755. void * nedpcalloc(nedpool *p, size_t no, size_t size) THROWSPEC
  756. {
  757. size_t rsize=size*no;
  758. void *ret=0;
  759. threadcache *tc;
  760. int mymspace;
  761. GetThreadCache(&p, &tc, &mymspace, &rsize);
  762. #if THREADCACHEMAX
  763. if(tc && rsize<=THREADCACHEMAX)
  764. { /* Use the thread cache */
  765. if((ret=threadcache_malloc(p, tc, &rsize)))
  766. memset(ret, 0, rsize);
  767. }
  768. #endif
  769. if(!ret)
  770. { /* Use this thread's mspace */
  771. GETMSPACE(m, p, tc, mymspace, rsize,
  772. ret=mspace_calloc(m, 1, rsize));
  773. }
  774. return ret;
  775. }
  776. void * nedprealloc(nedpool *p, void *mem, size_t size) THROWSPEC
  777. {
  778. void *ret=0;
  779. threadcache *tc;
  780. int mymspace;
  781. if(!mem) return nedpmalloc(p, size);
  782. GetThreadCache(&p, &tc, &mymspace, &size);
  783. #if THREADCACHEMAX
  784. if(tc && size && size<=THREADCACHEMAX)
  785. { /* Use the thread cache */
  786. size_t memsize=nedblksize(mem);
  787. assert(memsize);
  788. if((ret=threadcache_malloc(p, tc, &size)))
  789. {
  790. memcpy(ret, mem, memsize<size ? memsize : size);
  791. if(memsize<=THREADCACHEMAX)
  792. threadcache_free(p, tc, mymspace, mem, memsize);
  793. else
  794. mspace_free(0, mem);
  795. }
  796. }
  797. #endif
  798. if(!ret)
  799. { /* Reallocs always happen in the mspace they happened in, so skip
  800. locking the preferred mspace for this thread */
  801. ret=mspace_realloc(0, mem, size);
  802. }
  803. return ret;
  804. }
  805. void nedpfree(nedpool *p, void *mem) THROWSPEC
  806. { /* Frees always happen in the mspace they happened in, so skip
  807. locking the preferred mspace for this thread */
  808. threadcache *tc;
  809. int mymspace;
  810. size_t memsize;
  811. assert(mem);
  812. GetThreadCache(&p, &tc, &mymspace, 0);
  813. #if THREADCACHEMAX
  814. memsize=nedblksize(mem);
  815. assert(memsize);
  816. if(mem && tc && memsize<=(THREADCACHEMAX+CHUNK_OVERHEAD))
  817. threadcache_free(p, tc, mymspace, mem, memsize);
  818. else
  819. #endif
  820. mspace_free(0, mem);
  821. }
  822. void * nedpmemalign(nedpool *p, size_t alignment, size_t bytes) THROWSPEC
  823. {
  824. void *ret;
  825. threadcache *tc;
  826. int mymspace;
  827. GetThreadCache(&p, &tc, &mymspace, &bytes);
  828. { /* Use this thread's mspace */
  829. GETMSPACE(m, p, tc, mymspace, bytes,
  830. ret=mspace_memalign(m, alignment, bytes));
  831. }
  832. return ret;
  833. }
  834. #if !NO_MALLINFO
  835. struct mallinfo nedpmallinfo(nedpool *p) THROWSPEC
  836. {
  837. int n;
  838. struct mallinfo ret={0};
  839. if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
  840. for(n=0; p->m[n]; n++)
  841. {
  842. struct mallinfo t=mspace_mallinfo(p->m[n]);
  843. ret.arena+=t.arena;
  844. ret.ordblks+=t.ordblks;
  845. ret.hblkhd+=t.hblkhd;
  846. ret.usmblks+=t.usmblks;
  847. ret.uordblks+=t.uordblks;
  848. ret.fordblks+=t.fordblks;
  849. ret.keepcost+=t.keepcost;
  850. }
  851. return ret;
  852. }
  853. #endif
  854. int nedpmallopt(nedpool *p, int parno, int value) THROWSPEC
  855. {
  856. return mspace_mallopt(parno, value);
  857. }
  858. int nedpmalloc_trim(nedpool *p, size_t pad) THROWSPEC
  859. {
  860. int n, ret=0;
  861. if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
  862. for(n=0; p->m[n]; n++)
  863. {
  864. ret+=mspace_trim(p->m[n], pad);
  865. }
  866. return ret;
  867. }
  868. void nedpmalloc_stats(nedpool *p) THROWSPEC
  869. {
  870. int n;
  871. if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
  872. for(n=0; p->m[n]; n++)
  873. {
  874. mspace_malloc_stats(p->m[n]);
  875. }
  876. }
  877. size_t nedpmalloc_footprint(nedpool *p) THROWSPEC
  878. {
  879. size_t ret=0;
  880. int n;
  881. if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
  882. for(n=0; p->m[n]; n++)
  883. {
  884. ret+=mspace_footprint(p->m[n]);
  885. }
  886. return ret;
  887. }
  888. void **nedpindependent_calloc(nedpool *p, size_t elemsno, size_t elemsize, void **chunks) THROWSPEC
  889. {
  890. void **ret;
  891. threadcache *tc;
  892. int mymspace;
  893. GetThreadCache(&p, &tc, &mymspace, &elemsize);
  894. GETMSPACE(m, p, tc, mymspace, elemsno*elemsize,
  895. ret=mspace_independent_calloc(m, elemsno, elemsize, chunks));
  896. return ret;
  897. }
  898. void **nedpindependent_comalloc(nedpool *p, size_t elems, size_t *sizes, void **chunks) THROWSPEC
  899. {
  900. void **ret;
  901. threadcache *tc;
  902. int mymspace;
  903. size_t i, *adjustedsizes=(size_t *) alloca(elems*sizeof(size_t));
  904. if(!adjustedsizes) return 0;
  905. for(i=0; i<elems; i++)
  906. adjustedsizes[i]=sizes[i]<sizeof(threadcacheblk) ? sizeof(threadcacheblk) : sizes[i];
  907. GetThreadCache(&p, &tc, &mymspace, 0);
  908. GETMSPACE(m, p, tc, mymspace, 0,
  909. ret=mspace_independent_comalloc(m, elems, adjustedsizes, chunks));
  910. return ret;
  911. }
  912. #if defined(__cplusplus)
  913. }
  914. #endif