fix a rare active defrag edge case bug leading to stagnation

There's a rare case which leads to stagnation in the defragger, causing
it to keep scanning the keyspace and do nothing (not moving any
allocation), this happens when all the allocator slabs of a certain bin
have the same % utilization, but the slab from which new allocations are
made have a lower utilization.

this commit fixes it by removing the current slab from the overall
average utilization of the bin, and also eliminate any precision loss in
the utilization calculation and move the decision about the defrag to
reside inside jemalloc.

and also add a test that consistently reproduce this issue.
This commit is contained in:
Oran Agra 2020-05-20 14:08:40 +03:00 committed by Yoav Steinberg
parent 908d3bdad9
commit c6a26519a1
2 changed files with 20 additions and 11 deletions

View File

@ -220,7 +220,7 @@ ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,
}
JEMALLOC_ALWAYS_INLINE int
iget_defrag_hint(tsdn_t *tsdn, void* ptr, int *bin_util, int *run_util) {
iget_defrag_hint(tsdn_t *tsdn, void* ptr) {
int defrag = 0;
rtree_ctx_t rtree_ctx_fallback;
rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
@ -237,11 +237,22 @@ iget_defrag_hint(tsdn_t *tsdn, void* ptr, int *bin_util, int *run_util) {
malloc_mutex_lock(tsdn, &bin->lock);
/* don't bother moving allocations from the slab currently used for new allocations */
if (slab != bin->slabcur) {
const bin_info_t *bin_info = &bin_infos[binind];
size_t availregs = bin_info->nregs * bin->stats.curslabs;
*bin_util = ((long long)bin->stats.curregs<<16) / availregs;
*run_util = ((long long)(bin_info->nregs - extent_nfree_get(slab))<<16) / bin_info->nregs;
defrag = 1;
int free_in_slab = extent_nfree_get(slab);
if (free_in_slab) {
const bin_info_t *bin_info = &bin_infos[binind];
int curslabs = bin->stats.curslabs;
size_t curregs = bin->stats.curregs;
if (bin->slabcur) {
/* remove slabcur from the overall utilization */
curregs -= bin_info->nregs - extent_nfree_get(bin->slabcur);
curslabs -= 1;
}
/* Compare the utilization ratio of the slab in question to the total average,
* to avoid precision lost and division, we do that by extrapolating the usage
* of the slab as if all slabs have the same usage. If this slab is less used
* than the average, we'll prefer to evict the data to hopefully more used ones */
defrag = (bin_info->nregs - free_in_slab) * curslabs <= curregs;
}
}
malloc_mutex_unlock(tsdn, &bin->lock);
}

View File

@ -3922,12 +3922,10 @@ jemalloc_postfork_child(void) {
/******************************************************************************/
/* Helps the application decide if a pointer is worth re-allocating in order to reduce fragmentation.
* returns 0 if the allocation is in the currently active run,
* or when it is not causing any frag issue (large or huge bin)
* returns the bin utilization and run utilization both in fixed point 16:16.
* returns 1 if the allocation should be moved, and 0 if the allocation be kept.
* If the application decides to re-allocate it should use MALLOCX_TCACHE_NONE when doing so. */
JEMALLOC_EXPORT int JEMALLOC_NOTHROW
get_defrag_hint(void* ptr, int *bin_util, int *run_util) {
get_defrag_hint(void* ptr) {
assert(ptr != NULL);
return iget_defrag_hint(TSDN_NULL, ptr, bin_util, run_util);
return iget_defrag_hint(TSDN_NULL, ptr);
}