123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586 |
- Submitted By: Xi Ruoyao <xry111 at xry111.site>
- Date: 2023-08-13
- Initial Package Version: 2.38
- Upstream Status: Under review
- Origin: Upstream & Self
- - 1/3: https://sourceware.org/git/?p=glibc.git;a=patch;h=542b11058525
- - 2/3: https://sourceware.org/pipermail/libc-alpha/2023-August/150857.html
- - 3/3: Trivial unused code removal
- Description: Fixes a regression causing posix_memalign()
- very slow in certain conditions to avoid
- breaking ffmpeg-based applications.
- From fc01478d06658ace8d57e5328c1e717275acfe84 Mon Sep 17 00:00:00 2001
- From: Florian Weimer <fweimer@redhat.com>
- Date: Fri, 11 Aug 2023 11:18:17 +0200
- Subject: [PATCH 1/3] malloc: Enable merging of remainders in memalign (bug
- 30723)
- Previously, calling _int_free from _int_memalign could put remainders
- into the tcache or into fastbins, where they are invisible to the
- low-level allocator. This results in missed merge opportunities
- because once these freed chunks become available to the low-level
- allocator, further memalign allocations (even of the same size are)
- likely obstructing merges.
- Furthermore, during forwards merging in _int_memalign, do not
- completely give up when the remainder is too small to serve as a
- chunk on its own. We can still give it back if it can be merged
- with the following unused chunk. This makes it more likely that
- memalign calls in a loop achieve a compact memory layout,
- independently of initial heap layout.
- Drop some useless (unsigned long) casts along the way, and tweak
- the style to more closely match GNU on changed lines.
- Reviewed-by: DJ Delorie <dj@redhat.com>
- (cherry picked from commit 542b1105852568c3ebc712225ae78b8c8ba31a78)
- ---
- malloc/malloc.c | 197 +++++++++++++++++++++++++++++-------------------
- 1 file changed, 121 insertions(+), 76 deletions(-)
- diff --git a/malloc/malloc.c b/malloc/malloc.c
- index e2f1a615a4..948f9759af 100644
- --- a/malloc/malloc.c
- +++ b/malloc/malloc.c
- @@ -1086,6 +1086,11 @@ typedef struct malloc_chunk* mchunkptr;
-
- static void* _int_malloc(mstate, size_t);
- static void _int_free(mstate, mchunkptr, int);
- +static void _int_free_merge_chunk (mstate, mchunkptr, INTERNAL_SIZE_T);
- +static INTERNAL_SIZE_T _int_free_create_chunk (mstate,
- + mchunkptr, INTERNAL_SIZE_T,
- + mchunkptr, INTERNAL_SIZE_T);
- +static void _int_free_maybe_consolidate (mstate, INTERNAL_SIZE_T);
- static void* _int_realloc(mstate, mchunkptr, INTERNAL_SIZE_T,
- INTERNAL_SIZE_T);
- static void* _int_memalign(mstate, size_t, size_t);
- @@ -4637,31 +4642,52 @@ _int_free (mstate av, mchunkptr p, int have_lock)
- if (!have_lock)
- __libc_lock_lock (av->mutex);
-
- - nextchunk = chunk_at_offset(p, size);
- -
- - /* Lightweight tests: check whether the block is already the
- - top block. */
- - if (__glibc_unlikely (p == av->top))
- - malloc_printerr ("double free or corruption (top)");
- - /* Or whether the next chunk is beyond the boundaries of the arena. */
- - if (__builtin_expect (contiguous (av)
- - && (char *) nextchunk
- - >= ((char *) av->top + chunksize(av->top)), 0))
- - malloc_printerr ("double free or corruption (out)");
- - /* Or whether the block is actually not marked used. */
- - if (__glibc_unlikely (!prev_inuse(nextchunk)))
- - malloc_printerr ("double free or corruption (!prev)");
- -
- - nextsize = chunksize(nextchunk);
- - if (__builtin_expect (chunksize_nomask (nextchunk) <= CHUNK_HDR_SZ, 0)
- - || __builtin_expect (nextsize >= av->system_mem, 0))
- - malloc_printerr ("free(): invalid next size (normal)");
- + _int_free_merge_chunk (av, p, size);
-
- - free_perturb (chunk2mem(p), size - CHUNK_HDR_SZ);
- + if (!have_lock)
- + __libc_lock_unlock (av->mutex);
- + }
- + /*
- + If the chunk was allocated via mmap, release via munmap().
- + */
- +
- + else {
- + munmap_chunk (p);
- + }
- +}
- +
- +/* Try to merge chunk P of SIZE bytes with its neighbors. Put the
- + resulting chunk on the appropriate bin list. P must not be on a
- + bin list yet, and it can be in use. */
- +static void
- +_int_free_merge_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T size)
- +{
- + mchunkptr nextchunk = chunk_at_offset(p, size);
- +
- + /* Lightweight tests: check whether the block is already the
- + top block. */
- + if (__glibc_unlikely (p == av->top))
- + malloc_printerr ("double free or corruption (top)");
- + /* Or whether the next chunk is beyond the boundaries of the arena. */
- + if (__builtin_expect (contiguous (av)
- + && (char *) nextchunk
- + >= ((char *) av->top + chunksize(av->top)), 0))
- + malloc_printerr ("double free or corruption (out)");
- + /* Or whether the block is actually not marked used. */
- + if (__glibc_unlikely (!prev_inuse(nextchunk)))
- + malloc_printerr ("double free or corruption (!prev)");
- +
- + INTERNAL_SIZE_T nextsize = chunksize(nextchunk);
- + if (__builtin_expect (chunksize_nomask (nextchunk) <= CHUNK_HDR_SZ, 0)
- + || __builtin_expect (nextsize >= av->system_mem, 0))
- + malloc_printerr ("free(): invalid next size (normal)");
- +
- + free_perturb (chunk2mem(p), size - CHUNK_HDR_SZ);
-
- - /* consolidate backward */
- - if (!prev_inuse(p)) {
- - prevsize = prev_size (p);
- + /* Consolidate backward. */
- + if (!prev_inuse(p))
- + {
- + INTERNAL_SIZE_T prevsize = prev_size (p);
- size += prevsize;
- p = chunk_at_offset(p, -((long) prevsize));
- if (__glibc_unlikely (chunksize(p) != prevsize))
- @@ -4669,9 +4695,25 @@ _int_free (mstate av, mchunkptr p, int have_lock)
- unlink_chunk (av, p);
- }
-
- - if (nextchunk != av->top) {
- + /* Write the chunk header, maybe after merging with the following chunk. */
- + size = _int_free_create_chunk (av, p, size, nextchunk, nextsize);
- + _int_free_maybe_consolidate (av, size);
- +}
- +
- +/* Create a chunk at P of SIZE bytes, with SIZE potentially increased
- + to cover the immediately following chunk NEXTCHUNK of NEXTSIZE
- + bytes (if NEXTCHUNK is unused). The chunk at P is not actually
- + read and does not have to be initialized. After creation, it is
- + placed on the appropriate bin list. The function returns the size
- + of the new chunk. */
- +static INTERNAL_SIZE_T
- +_int_free_create_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T size,
- + mchunkptr nextchunk, INTERNAL_SIZE_T nextsize)
- +{
- + if (nextchunk != av->top)
- + {
- /* get and clear inuse bit */
- - nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
- + bool nextinuse = inuse_bit_at_offset (nextchunk, nextsize);
-
- /* consolidate forward */
- if (!nextinuse) {
- @@ -4686,8 +4728,8 @@ _int_free (mstate av, mchunkptr p, int have_lock)
- been given one chance to be used in malloc.
- */
-
- - bck = unsorted_chunks(av);
- - fwd = bck->fd;
- + mchunkptr bck = unsorted_chunks (av);
- + mchunkptr fwd = bck->fd;
- if (__glibc_unlikely (fwd->bk != bck))
- malloc_printerr ("free(): corrupted unsorted chunks");
- p->fd = fwd;
- @@ -4706,61 +4748,52 @@ _int_free (mstate av, mchunkptr p, int have_lock)
- check_free_chunk(av, p);
- }
-
- - /*
- - If the chunk borders the current high end of memory,
- - consolidate into top
- - */
- -
- - else {
- + else
- + {
- + /* If the chunk borders the current high end of memory,
- + consolidate into top. */
- size += nextsize;
- set_head(p, size | PREV_INUSE);
- av->top = p;
- check_chunk(av, p);
- }
-
- - /*
- - If freeing a large space, consolidate possibly-surrounding
- - chunks. Then, if the total unused topmost memory exceeds trim
- - threshold, ask malloc_trim to reduce top.
- -
- - Unless max_fast is 0, we don't know if there are fastbins
- - bordering top, so we cannot tell for sure whether threshold
- - has been reached unless fastbins are consolidated. But we
- - don't want to consolidate on each free. As a compromise,
- - consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
- - is reached.
- - */
- + return size;
- +}
-
- - if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
- +/* If freeing a large space, consolidate possibly-surrounding
- + chunks. Then, if the total unused topmost memory exceeds trim
- + threshold, ask malloc_trim to reduce top. */
- +static void
- +_int_free_maybe_consolidate (mstate av, INTERNAL_SIZE_T size)
- +{
- + /* Unless max_fast is 0, we don't know if there are fastbins
- + bordering top, so we cannot tell for sure whether threshold has
- + been reached unless fastbins are consolidated. But we don't want
- + to consolidate on each free. As a compromise, consolidation is
- + performed if FASTBIN_CONSOLIDATION_THRESHOLD is reached. */
- + if (size >= FASTBIN_CONSOLIDATION_THRESHOLD)
- + {
- if (atomic_load_relaxed (&av->have_fastchunks))
- malloc_consolidate(av);
-
- - if (av == &main_arena) {
- + if (av == &main_arena)
- + {
- #ifndef MORECORE_CANNOT_TRIM
- - if ((unsigned long)(chunksize(av->top)) >=
- - (unsigned long)(mp_.trim_threshold))
- - systrim(mp_.top_pad, av);
- + if (chunksize (av->top) >= mp_.trim_threshold)
- + systrim (mp_.top_pad, av);
- #endif
- - } else {
- - /* Always try heap_trim(), even if the top chunk is not
- - large, because the corresponding heap might go away. */
- - heap_info *heap = heap_for_ptr(top(av));
- + }
- + else
- + {
- + /* Always try heap_trim, even if the top chunk is not large,
- + because the corresponding heap might go away. */
- + heap_info *heap = heap_for_ptr (top (av));
-
- - assert(heap->ar_ptr == av);
- - heap_trim(heap, mp_.top_pad);
- - }
- + assert (heap->ar_ptr == av);
- + heap_trim (heap, mp_.top_pad);
- + }
- }
- -
- - if (!have_lock)
- - __libc_lock_unlock (av->mutex);
- - }
- - /*
- - If the chunk was allocated via mmap, release via munmap().
- - */
- -
- - else {
- - munmap_chunk (p);
- - }
- }
-
- /*
- @@ -5221,7 +5254,7 @@ _int_memalign (mstate av, size_t alignment, size_t bytes)
- (av != &main_arena ? NON_MAIN_ARENA : 0));
- set_inuse_bit_at_offset (newp, newsize);
- set_head_size (p, leadsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
- - _int_free (av, p, 1);
- + _int_free_merge_chunk (av, p, leadsize);
- p = newp;
-
- assert (newsize >= nb &&
- @@ -5232,15 +5265,27 @@ _int_memalign (mstate av, size_t alignment, size_t bytes)
- if (!chunk_is_mmapped (p))
- {
- size = chunksize (p);
- - if ((unsigned long) (size) > (unsigned long) (nb + MINSIZE))
- + mchunkptr nextchunk = chunk_at_offset(p, size);
- + INTERNAL_SIZE_T nextsize = chunksize(nextchunk);
- + if (size > nb)
- {
- remainder_size = size - nb;
- - remainder = chunk_at_offset (p, nb);
- - set_head (remainder, remainder_size | PREV_INUSE |
- - (av != &main_arena ? NON_MAIN_ARENA : 0));
- - set_head_size (p, nb);
- - _int_free (av, remainder, 1);
- - }
- + if (remainder_size >= MINSIZE
- + || nextchunk == av->top
- + || !inuse_bit_at_offset (nextchunk, nextsize))
- + {
- + /* We can only give back the tail if it is larger than
- + MINSIZE, or if the following chunk is unused (top
- + chunk or unused in-heap chunk). Otherwise we would
- + create a chunk that is smaller than MINSIZE. */
- + remainder = chunk_at_offset (p, nb);
- + set_head_size (p, nb);
- + remainder_size = _int_free_create_chunk (av, remainder,
- + remainder_size,
- + nextchunk, nextsize);
- + _int_free_maybe_consolidate (av, remainder_size);
- + }
- + }
- }
-
- check_inuse_chunk (av, p);
- --
- 2.41.0
- From b37e836b7cc2dba672e1de1cc7e076ba1c712614 Mon Sep 17 00:00:00 2001
- From: Florian Weimer <fweimer@redhat.com>
- Date: Fri, 11 Aug 2023 17:48:13 +0200
- Subject: [PATCH 2/3] malloc: Remove bin scanning from memalign (bug 30723)
- On the test workload (mpv --cache=yes with VP9 video decoding), the
- bin scanning has a very poor success rate (less than 2%). The tcache
- scanning has about 50% success rate, so keep that.
- Update comments in malloc/tst-memalign-2 to indicate the purpose
- of the tests. Even with the scanning removed, the additional
- merging opportunities since commit 542b1105852568c3ebc712225ae78b
- ("malloc: Enable merging of remainders in memalign (bug 30723)")
- are sufficient to pass the existing large bins test.
- Link: https://sourceware.org/pipermail/libc-alpha/2023-August/150857.html
- ---
- malloc/malloc.c | 127 ++--------------------------------------
- malloc/tst-memalign-2.c | 7 ++-
- 2 files changed, 10 insertions(+), 124 deletions(-)
- diff --git a/malloc/malloc.c b/malloc/malloc.c
- index 948f9759af..9c2cab7a59 100644
- --- a/malloc/malloc.c
- +++ b/malloc/malloc.c
- @@ -5082,7 +5082,6 @@ _int_memalign (mstate av, size_t alignment, size_t bytes)
- mchunkptr remainder; /* spare room at end to split off */
- unsigned long remainder_size; /* its size */
- INTERNAL_SIZE_T size;
- - mchunkptr victim;
-
- nb = checked_request2size (bytes);
- if (nb == 0)
- @@ -5101,129 +5100,13 @@ _int_memalign (mstate av, size_t alignment, size_t bytes)
- we don't find anything in those bins, the common malloc code will
- scan starting at 2x. */
-
- - /* This will be set if we found a candidate chunk. */
- - victim = NULL;
- + /* Call malloc with worst case padding to hit alignment. */
- + m = (char *) (_int_malloc (av, nb + alignment + MINSIZE));
-
- - /* Fast bins are singly-linked, hard to remove a chunk from the middle
- - and unlikely to meet our alignment requirements. We have not done
- - any experimentation with searching for aligned fastbins. */
- + if (m == 0)
- + return 0; /* propagate failure */
-
- - if (av != NULL)
- - {
- - int first_bin_index;
- - int first_largebin_index;
- - int last_bin_index;
- -
- - if (in_smallbin_range (nb))
- - first_bin_index = smallbin_index (nb);
- - else
- - first_bin_index = largebin_index (nb);
- -
- - if (in_smallbin_range (nb * 2))
- - last_bin_index = smallbin_index (nb * 2);
- - else
- - last_bin_index = largebin_index (nb * 2);
- -
- - first_largebin_index = largebin_index (MIN_LARGE_SIZE);
- -
- - int victim_index; /* its bin index */
- -
- - for (victim_index = first_bin_index;
- - victim_index < last_bin_index;
- - victim_index ++)
- - {
- - victim = NULL;
- -
- - if (victim_index < first_largebin_index)
- - {
- - /* Check small bins. Small bin chunks are doubly-linked despite
- - being the same size. */
- -
- - mchunkptr fwd; /* misc temp for linking */
- - mchunkptr bck; /* misc temp for linking */
- -
- - bck = bin_at (av, victim_index);
- - fwd = bck->fd;
- - while (fwd != bck)
- - {
- - if (chunk_ok_for_memalign (fwd, alignment, nb) > 0)
- - {
- - victim = fwd;
- -
- - /* Unlink it */
- - victim->fd->bk = victim->bk;
- - victim->bk->fd = victim->fd;
- - break;
- - }
- -
- - fwd = fwd->fd;
- - }
- - }
- - else
- - {
- - /* Check large bins. */
- - mchunkptr fwd; /* misc temp for linking */
- - mchunkptr bck; /* misc temp for linking */
- - mchunkptr best = NULL;
- - size_t best_size = 0;
- -
- - bck = bin_at (av, victim_index);
- - fwd = bck->fd;
- -
- - while (fwd != bck)
- - {
- - int extra;
- -
- - if (chunksize (fwd) < nb)
- - break;
- - extra = chunk_ok_for_memalign (fwd, alignment, nb);
- - if (extra > 0
- - && (extra <= best_size || best == NULL))
- - {
- - best = fwd;
- - best_size = extra;
- - }
- -
- - fwd = fwd->fd;
- - }
- - victim = best;
- -
- - if (victim != NULL)
- - {
- - unlink_chunk (av, victim);
- - break;
- - }
- - }
- -
- - if (victim != NULL)
- - break;
- - }
- - }
- -
- - /* Strategy: find a spot within that chunk that meets the alignment
- - request, and then possibly free the leading and trailing space.
- - This strategy is incredibly costly and can lead to external
- - fragmentation if header and footer chunks are unused. */
- -
- - if (victim != NULL)
- - {
- - p = victim;
- - m = chunk2mem (p);
- - set_inuse (p);
- - if (av != &main_arena)
- - set_non_main_arena (p);
- - }
- - else
- - {
- - /* Call malloc with worst case padding to hit alignment. */
- -
- - m = (char *) (_int_malloc (av, nb + alignment + MINSIZE));
- -
- - if (m == 0)
- - return 0; /* propagate failure */
- -
- - p = mem2chunk (m);
- - }
- + p = mem2chunk (m);
-
- if ((((unsigned long) (m)) % alignment) != 0) /* misaligned */
- {
- diff --git a/malloc/tst-memalign-2.c b/malloc/tst-memalign-2.c
- index f229283dbf..ecd6fa249e 100644
- --- a/malloc/tst-memalign-2.c
- +++ b/malloc/tst-memalign-2.c
- @@ -86,7 +86,8 @@ do_test (void)
- TEST_VERIFY (tcache_allocs[i].ptr1 == tcache_allocs[i].ptr2);
- }
-
- - /* Test for non-head tcache hits. */
- + /* Test for non-head tcache hits. This exercises the memalign
- + scanning code to find matching allocations. */
- for (i = 0; i < array_length (ptr); ++ i)
- {
- if (i == 4)
- @@ -113,7 +114,9 @@ do_test (void)
- free (p);
- TEST_VERIFY (count > 0);
-
- - /* Large bins test. */
- + /* Large bins test. This verifies that the over-allocated parts
- + that memalign releases for future allocations can be reused by
- + memalign itself at least in some cases. */
-
- for (i = 0; i < LN; ++ i)
- {
- --
- 2.41.0
- From 26973f7b09c33e67f6bcbc79371796c8dd334528 Mon Sep 17 00:00:00 2001
- From: Xi Ruoyao <xry111@xry111.site>
- Date: Mon, 14 Aug 2023 11:05:18 +0800
- Subject: [PATCH 3/3] malloc: Remove unused functions and variables
- Remove unused chunk_ok_for_memalign function and unused local variables
- in _int_free.
- Signed-off-by: Xi Ruoyao <xry111@xry111.site>
- ---
- malloc/malloc.c | 42 ------------------------------------------
- 1 file changed, 42 deletions(-)
- diff --git a/malloc/malloc.c b/malloc/malloc.c
- index 9c2cab7a59..d0bbbf3710 100644
- --- a/malloc/malloc.c
- +++ b/malloc/malloc.c
- @@ -4488,12 +4488,6 @@ _int_free (mstate av, mchunkptr p, int have_lock)
- {
- INTERNAL_SIZE_T size; /* its size */
- mfastbinptr *fb; /* associated fastbin */
- - mchunkptr nextchunk; /* next contiguous chunk */
- - INTERNAL_SIZE_T nextsize; /* its size */
- - int nextinuse; /* true if nextchunk is used */
- - INTERNAL_SIZE_T prevsize; /* size of previous contiguous chunk */
- - mchunkptr bck; /* misc temp for linking */
- - mchunkptr fwd; /* misc temp for linking */
-
- size = chunksize (p);
-
- @@ -5032,42 +5026,6 @@ _int_realloc (mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize,
- ------------------------------ memalign ------------------------------
- */
-
- -/* Returns 0 if the chunk is not and does not contain the requested
- - aligned sub-chunk, else returns the amount of "waste" from
- - trimming. NB is the *chunk* byte size, not the user byte
- - size. */
- -static size_t
- -chunk_ok_for_memalign (mchunkptr p, size_t alignment, size_t nb)
- -{
- - void *m = chunk2mem (p);
- - INTERNAL_SIZE_T size = chunksize (p);
- - void *aligned_m = m;
- -
- - if (__glibc_unlikely (misaligned_chunk (p)))
- - malloc_printerr ("_int_memalign(): unaligned chunk detected");
- -
- - aligned_m = PTR_ALIGN_UP (m, alignment);
- -
- - INTERNAL_SIZE_T front_extra = (intptr_t) aligned_m - (intptr_t) m;
- -
- - /* We can't trim off the front as it's too small. */
- - if (front_extra > 0 && front_extra < MINSIZE)
- - return 0;
- -
- - /* If it's a perfect fit, it's an exception to the return value rule
- - (we would return zero waste, which looks like "not usable"), so
- - handle it here by returning a small non-zero value instead. */
- - if (size == nb && front_extra == 0)
- - return 1;
- -
- - /* If the block we need fits in the chunk, calculate total waste. */
- - if (size > nb + front_extra)
- - return size - nb;
- -
- - /* Can't use this chunk. */
- - return 0;
- -}
- -
- /* BYTES is user requested bytes, not requested chunksize bytes. */
- static void *
- _int_memalign (mstate av, size_t alignment, size_t bytes)
- --
- 2.41.0
|