Shrink dict when deleting dictEntry (#12850)

When we insert entries into dict, it may autonomously expand if needed. However, when we delete entries from dict, it doesn't shrink to the proper size. If there are few entries in a very large dict, it may cause huge waste of memory and inefficiency when iterating. The main keyspace dicts (keys and expires), are shrinked by cron (`tryResizeHashTables` calls `htNeedsResize` and `dictResize`), And some data structures such as zset and hash also do that (call `htNeedsResize`) right after a loop of calls to `dictDelete`, But many other dicts are completely missing that call (they can only expand). In this PR, we provide the ability to automatically shrink the dict when deleting. The conditions triggering the shrinking is the same as `htNeedsResize` used to have. i.e. we expand when we're over 100% utilization, and shrink when we're below 10% utilization. Additionally: * Add `dictPauseAutoResize` so that flows that do mass deletions, will only trigger shrinkage at the end. * Rename `dictResize` to `dictShrinkToFit` (same logic as it used to have, but better name describing it) * Rename `_dictExpand` to `_dictResize` (same logic as it used to have, but better name describing it) related to discussion https://github.com/redis/redis/pull/12819#discussion_r1409293878 --------- Co-authored-by: Oran Agra <oran@redislabs.com> Co-authored-by: zhaozhao.zz <zhaozhao.zz@alibaba-inc.com>
2024-01-15 14:20:53 +08:00 · 2024-01-15 14:20:53 +08:00 · e2b7932b34
parent bb2b6e2927
commit e2b7932b34
8 changed files with 99 additions and 40 deletions
--- a/src/dict.c
+++ b/src/dict.c
@ -78,7 +78,8 @@ typedef struct {

 /* -------------------------- private prototypes ---------------------------- */

-static int _dictExpandIfNeeded(dict *d);
+static void _dictExpandIfNeeded(dict *d);
+static void _dictShrinkIfNeeded(dict *d);
 static signed char _dictNextExp(unsigned long size);
 static int _dictInit(dict *d, dictType *type);
 static dictEntry *dictGetNext(const dictEntry *de);
@ -208,12 +209,13 @@ int _dictInit(dict *d, dictType *type)
    d->type = type;
    d->rehashidx = -1;
    d->pauserehash = 0;
+    d->pauseAutoResize = 0;
    return DICT_OK;
 }

 /* Resize the table to the minimal size that contains all the elements,
 * but with the invariant of a USED/BUCKETS ratio near to <= 1 */
-int dictResize(dict *d)
+int dictShrinkToFit(dict *d)
 {
    unsigned long minimal;

@ -221,20 +223,18 @@ int dictResize(dict *d)
    minimal = d->ht_used[0];
    if (minimal < DICT_HT_INITIAL_SIZE)
        minimal = DICT_HT_INITIAL_SIZE;
-    return dictExpand(d, minimal);
+    return dictShrink(d, minimal);
 }

-/* Expand or create the hash table,
+/* Resize or create the hash table,
 * when malloc_failed is non-NULL, it'll avoid panic if malloc fails (in which case it'll be set to 1).
- * Returns DICT_OK if expand was performed, and DICT_ERR if skipped. */
-int _dictExpand(dict *d, unsigned long size, int* malloc_failed)
+ * Returns DICT_OK if resize was performed, and DICT_ERR if skipped. */
+int _dictResize(dict *d, unsigned long size, int* malloc_failed)
 {
    if (malloc_failed) *malloc_failed = 0;

-    /* the size is invalid if it is smaller than the number of
-     * elements already inside the hash table */
-    if (dictIsRehashing(d) || d->ht_used[0] > size)
-        return DICT_ERR;
+    /* We can't rehash twice if rehashing is ongoing. */
+    assert(!dictIsRehashing(d));

    /* the new hash table */
    dictEntry **new_ht_table;
@ -286,6 +286,14 @@ int _dictExpand(dict *d, unsigned long size, int* malloc_failed)
    return DICT_OK;
 }

+int _dictExpand(dict *d, unsigned long size, int* malloc_failed) {
+    /* the size is invalid if it is smaller than the size of the hash table 
+     * or smaller than the number of elements already inside the hash table */
+    if (dictIsRehashing(d) || d->ht_used[0] > size || DICTHT_SIZE(d->ht_size_exp[0]) >= size)
+        return DICT_ERR;
+    return _dictResize(d, size, malloc_failed);
+}
+
 /* return DICT_ERR if expand was not performed */
 int dictExpand(dict *d, unsigned long size) {
    return _dictExpand(d, size, NULL);
@ -293,11 +301,20 @@ int dictExpand(dict *d, unsigned long size) {

 /* return DICT_ERR if expand failed due to memory allocation failure */
 int dictTryExpand(dict *d, unsigned long size) {
-    int malloc_failed;
+    int malloc_failed = 0;
    _dictExpand(d, size, &malloc_failed);
    return malloc_failed? DICT_ERR : DICT_OK;
 }

+/* return DICT_ERR if shrink was not performed */
+int dictShrink(dict *d, unsigned long size) {
+    /* the size is invalid if it is bigger than the size of the hash table
+     * or smaller than the number of elements already inside the hash table */
+    if (dictIsRehashing(d) || d->ht_used[0] > size || DICTHT_SIZE(d->ht_size_exp[0]) <= size)
+        return DICT_ERR;
+    return _dictResize(d, size, NULL);
+}
+
 /* Performs N steps of incremental rehashing. Returns 1 if there are still
 * keys to move from the old to the new hash table, otherwise 0 is returned.
 *
@ -588,6 +605,7 @@ static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
                    dictFreeUnlinkedEntry(d, he);
                }
                d->ht_used[table]--;
+                _dictShrinkIfNeeded(d);
                return he;
            }
            prevHe = he;
@ -752,6 +770,7 @@ void dictTwoPhaseUnlinkFree(dict *d, dictEntry *he, dictEntry **plink, int table
    dictFreeKey(d, he);
    dictFreeVal(d, he);
    if (!entryIsKey(he)) zfree(decodeMaskedPtr(he));
+    _dictShrinkIfNeeded(d);
    dictResumeRehashing(d);
 }

@ -1401,21 +1420,27 @@ unsigned long dictScanDefrag(dict *d,
 /* Because we may need to allocate huge memory chunk at once when dict
 * expands, we will check this allocation is allowed or not if the dict
 * type has expandAllowed member function. */
-static int dictTypeExpandAllowed(dict *d) {
-    if (d->type->expandAllowed == NULL) return 1;
-    return d->type->expandAllowed(
+static int dictTypeResizeAllowed(dict *d) {
+    if (d->type->resizeAllowed == NULL) return 1;
+    return d->type->resizeAllowed(
                    DICTHT_SIZE(_dictNextExp(d->ht_used[0] + 1)) * sizeof(dictEntry*),
                    (double)d->ht_used[0] / DICTHT_SIZE(d->ht_size_exp[0]));
 }

 /* Expand the hash table if needed */
-static int _dictExpandIfNeeded(dict *d)
+static void _dictExpandIfNeeded(dict *d)
 {
+    /* Automatic resizing is disallowed. Return */
+    if (d->pauseAutoResize > 0) return;
+
    /* Incremental rehashing already in progress. Return. */
-    if (dictIsRehashing(d)) return DICT_OK;
+    if (dictIsRehashing(d)) return;

    /* If the hash table is empty expand it to the initial size. */
-    if (DICTHT_SIZE(d->ht_size_exp[0]) == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);
+    if (DICTHT_SIZE(d->ht_size_exp[0]) == 0) {
+        dictExpand(d, DICT_HT_INITIAL_SIZE);
+        return;
+    }

    /* If we reached the 1:1 ratio, and we are allowed to resize the hash
     * table (global setting) or we should avoid it but the ratio between
@ -1426,11 +1451,35 @@ static int _dictExpandIfNeeded(dict *d)
        (dict_can_resize != DICT_RESIZE_FORBID &&
         d->ht_used[0] / DICTHT_SIZE(d->ht_size_exp[0]) > dict_force_resize_ratio))
    {
-        if (!dictTypeExpandAllowed(d))
-            return DICT_OK;
-        return dictExpand(d, d->ht_used[0] + 1);
+        if (!dictTypeResizeAllowed(d))
+            return;
+        dictExpand(d, d->ht_used[0] + 1);
+    }
+}
+
+static void _dictShrinkIfNeeded(dict *d) 
+{
+    /* Automatic resizing is disallowed. Return */
+    if (d->pauseAutoResize > 0) return;
+
+    /* Incremental rehashing already in progress. Return. */
+    if (dictIsRehashing(d)) return;
+    
+    /* If the size of hash table is DICT_HT_INITIAL_SIZE, don't shrink it. */
+    if (DICTHT_SIZE(d->ht_size_exp[0]) == DICT_HT_INITIAL_SIZE) return;
+
+    /* If we reached below 1:10 elements/buckets ratio, and we are allowed to resize
+     * the hash table (global setting) or we should avoid it but the ratio is below 1:50,
+     * we'll trigger a resize of the hash table. */
+    if ((dict_can_resize == DICT_RESIZE_ENABLE &&  
+         d->ht_used[0] * 100 / DICTHT_SIZE(d->ht_size_exp[0]) < HASHTABLE_MIN_FILL) ||
+        (dict_can_resize != DICT_RESIZE_FORBID &&
+         d->ht_used[0] * 100 / DICTHT_SIZE(d->ht_size_exp[0]) < HASHTABLE_MIN_FILL / dict_force_resize_ratio))
+    {
+        if (!dictTypeResizeAllowed(d))
+            return;
+        dictShrink(d, d->ht_used[0]);
    }
-    return DICT_OK;
 }

 /* Our hash table capability is a power of two */
@ -1454,8 +1503,7 @@ void *dictFindPositionForInsert(dict *d, const void *key, dictEntry **existing)
    if (dictIsRehashing(d)) _dictRehashStep(d);

    /* Expand the hash table if needed */
-    if (_dictExpandIfNeeded(d) == DICT_ERR)
-        return NULL;
+    _dictExpandIfNeeded(d);
    for (table = 0; table <= 1; table++) {
        idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
        if (table == 0 && (long)idx < d->rehashidx) continue; 
@ -1483,6 +1531,7 @@ void dictEmpty(dict *d, void(callback)(dict*)) {
    _dictClear(d,1,callback);
    d->rehashidx = -1;
    d->pauserehash = 0;
+    d->pauseAutoResize = 0;
 }

 void dictSetResizeEnabled(dictResizeEnable enable) {
--- a/src/dict.h
+++ b/src/dict.h
@ -44,6 +44,9 @@
 #define DICT_OK 0
 #define DICT_ERR 1

+/* Hash table parameters */
+#define HASHTABLE_MIN_FILL        10      /* Minimal hash table fill 10% */
+
 typedef struct dictEntry dictEntry; /* opaque */
 typedef struct dict dict;

@ -54,7 +57,7 @@ typedef struct dictType {
    int (*keyCompare)(dict *d, const void *key1, const void *key2);
    void (*keyDestructor)(dict *d, void *key);
    void (*valDestructor)(dict *d, void *obj);
-    int (*expandAllowed)(size_t moreMem, double usedRatio);
+    int (*resizeAllowed)(size_t moreMem, double usedRatio);
    /* Invoked at the start of dict initialization/rehashing (old and new ht are already created) */
    void (*rehashingStarted)(dict *d);
    /* Invoked at the end of dict initialization/rehashing of all the entries from old to new ht. Both ht still exists
@ -91,6 +94,7 @@ struct dict {
    /* Keep small vars at end for optimal (minimal) struct padding */
    int16_t pauserehash; /* If >0 rehashing is paused (<0 indicates coding error) */
    signed char ht_size_exp[2]; /* exponent of size. (size = 1<<exp) */
+    int16_t pauseAutoResize;  /* If >0 automatic resizing is disallowed (<0 indicates coding error) */
    void *metadata[];
 };

@ -155,6 +159,8 @@ typedef struct {
 #define dictIsRehashing(d) ((d)->rehashidx != -1)
 #define dictPauseRehashing(d) ((d)->pauserehash++)
 #define dictResumeRehashing(d) ((d)->pauserehash--)
+#define dictPauseAutoResize(d) ((d)->pauseAutoResize++)
+#define dictResumeAutoResize(d) ((d)->pauseAutoResize--)

 /* If our unsigned long type can store a 64 bit number, use a 64 bit PRNG. */
 #if ULONG_MAX >= 0xffffffffffffffff
@ -174,6 +180,7 @@ dict *dictCreate(dictType *type);
 dict **dictCreateMultiple(dictType *type, int count);
 int dictExpand(dict *d, unsigned long size);
 int dictTryExpand(dict *d, unsigned long size);
+int dictShrink(dict *d, unsigned long size);
 int dictAdd(dict *d, void *key, void *val);
 dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing);
 void *dictFindPositionForInsert(dict *d, const void *key, dictEntry **existing);
@ -188,7 +195,7 @@ void dictTwoPhaseUnlinkFree(dict *d, dictEntry *he, dictEntry **plink, int table
 void dictRelease(dict *d);
 dictEntry * dictFind(dict *d, const void *key);
 void *dictFetchValue(dict *d, const void *key);
-int dictResize(dict *d);
+int dictShrinkToFit(dict *d);
 void dictSetKey(dict *d, dictEntry* de, void *key);
 void dictSetVal(dict *d, dictEntry *de, void *val);
 void dictSetSignedIntegerVal(dictEntry *de, int64_t val);
--- a/src/server.c
+++ b/src/server.c
@ -428,7 +428,7 @@ uint64_t dictEncObjHash(const void *key) {
 * provisionally if used memory will be over maxmemory after dict expands,
 * but to guarantee the performance of redis, we still allow dict to expand
 * if dict load factor exceeds HASHTABLE_MAX_LOAD_FACTOR. */
-int dictExpandAllowed(size_t moreMem, double usedRatio) {
+int dictResizeAllowed(size_t moreMem, double usedRatio) {
    if (usedRatio <= HASHTABLE_MAX_LOAD_FACTOR) {
        return !overMaxmemoryAfterAlloc(moreMem);
    } else {
@ -547,7 +547,7 @@ dictType dbDictType = {
    dictSdsKeyCompare,          /* key compare */
    dictSdsDestructor,          /* key destructor */
    dictObjectDestructor,       /* val destructor */
-    dictExpandAllowed,          /* allow to expand */
+    dictResizeAllowed,          /* allow to resize */
    dbDictRehashingStarted,
    dbDictRehashingCompleted,
    dbDictMetadataSize,
@ -561,7 +561,7 @@ dictType dbExpiresDictType = {
    dictSdsKeyCompare,          /* key compare */
    NULL,                       /* key destructor */
    NULL,                       /* val destructor */
-    dictExpandAllowed,           /* allow to expand */
+    dictResizeAllowed,          /* allow to resize */
    dbExpiresRehashingStarted,
    dbExpiresRehashingCompleted,
    dbDictMetadataSize,
@ -693,7 +693,7 @@ dictType clientDictType = {
    .no_value = 1               /* no values in this dict */
 };

-int htNeedsResize(dict *dict) {
+int htNeedsShrink(dict *dict) {
    long long size, used;

    size = dictBuckets(dict);
@ -718,8 +718,8 @@ void tryResizeHashTables(int dbid) {
        for (int i = 0; i < CRON_DBS_PER_CALL && db->sub_dict[subdict].resize_cursor != -1; i++) {
            int slot = db->sub_dict[subdict].resize_cursor;
            dict *d = (subdict == DB_MAIN ? db->dict[slot] : db->expires[slot]);
-            if (htNeedsResize(d))
-                dictResize(d);
+            if (htNeedsShrink(d))
+                dictShrinkToFit(d);
            db->sub_dict[subdict].resize_cursor = dbGetNextNonEmptySlot(db, slot, subdict);
        }
    }
--- a/src/server.h
+++ b/src/server.h
@ -198,7 +198,6 @@ struct hdr_histogram;
 extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT];

 /* Hash table parameters */
-#define HASHTABLE_MIN_FILL        10      /* Minimal hash table fill 10% */
 #define HASHTABLE_MAX_LOAD_FACTOR 1.618   /* Maximum hash table load factor. */

 /* Command flags. Please check the definition of struct redisCommand in this file
@ -3112,7 +3111,7 @@ void serverLogRaw(int level, const char *msg);
 void serverLogRawFromHandler(int level, const char *msg);
 void usage(void);
 void updateDictResizePolicy(void);
-int htNeedsResize(dict *dict);
+int htNeedsShrink(dict *dict);
 void populateCommandTable(void);
 void resetCommandTableStats(dict* commands);
 void resetErrorTableStats(void);
--- a/src/t_hash.c
+++ b/src/t_hash.c
@ -292,9 +292,6 @@ int hashTypeDelete(robj *o, sds field) {
    } else if (o->encoding == OBJ_ENCODING_HT) {
        if (dictDelete((dict*)o->ptr, field) == C_OK) {
            deleted = 1;
-
-            /* Always check if the dictionary needs a resize after a delete. */
-            if (htNeedsResize(o->ptr)) dictResize(o->ptr);
        }

    } else {
--- a/src/t_set.c
+++ b/src/t_set.c
@ -256,7 +256,6 @@ int setTypeRemoveAux(robj *setobj, char *str, size_t len, int64_t llval, int str
    if (setobj->encoding == OBJ_ENCODING_HT) {
        sds sdsval = str_is_sds ? (sds)str : sdsnewlen(str, len);
        int deleted = (dictDelete(setobj->ptr, sdsval) == DICT_OK);
-        if (deleted && htNeedsResize(setobj->ptr)) dictResize(setobj->ptr);
        if (sdsval != str) sdsfree(sdsval); /* free temp copy */
        return deleted;
    } else if (setobj->encoding == OBJ_ENCODING_LISTPACK) {
--- a/src/t_zset.c
+++ b/src/t_zset.c
@ -1596,7 +1596,6 @@ int zsetDel(robj *zobj, sds ele) {
    } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
        zset *zs = zobj->ptr;
        if (zsetRemoveFromSkiplist(zs, ele)) {
-            if (htNeedsResize(zs->dict)) dictResize(zs->dict);
            return 1;
        }
    } else {
@ -2011,6 +2010,7 @@ void zremrangeGenericCommand(client *c, zrange_type rangetype) {
        }
    } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
        zset *zs = zobj->ptr;
+        dictPauseAutoResize(zs->dict);
        switch(rangetype) {
        case ZRANGE_AUTO:
        case ZRANGE_RANK:
@ -2023,7 +2023,8 @@ void zremrangeGenericCommand(client *c, zrange_type rangetype) {
            deleted = zslDeleteRangeByLex(zs->zsl,&lexrange,zs->dict);
            break;
        }
-        if (htNeedsResize(zs->dict)) dictResize(zs->dict);
+        dictResumeAutoResize(zs->dict);
+        if (htNeedsShrink(zs->dict)) dictShrinkToFit(zs->dict);
        if (dictSize(zs->dict) == 0) {
            dbDelete(c->db,key);
            keyremoved = 1;
@ -2535,10 +2536,12 @@ static void zdiffAlgorithm2(zsetopsrc *src, long setnum, zset *dstzset, size_t *
                dictAdd(dstzset->dict,tmp,&znode->score);
                cardinality++;
            } else {
+                dictPauseAutoResize(dstzset->dict);
                tmp = zuiSdsFromValue(&zval);
                if (zsetRemoveFromSkiplist(dstzset, tmp)) {
                    cardinality--;
                }
+                dictResumeAutoResize(dstzset->dict);
            }

            /* Exit if result set is empty as any additional removal
@ -2551,7 +2554,7 @@ static void zdiffAlgorithm2(zsetopsrc *src, long setnum, zset *dstzset, size_t *
    }

    /* Resize dict if needed after removing multiple elements */
-    if (htNeedsResize(dstzset->dict)) dictResize(dstzset->dict);
+    if (htNeedsShrink(dstzset->dict)) dictShrinkToFit(dstzset->dict);

    /* Using this algorithm, we can't calculate the max element as we go,
     * we have to iterate through all elements to find the max one after. */
--- a/tests/unit/type/set.tcl
+++ b/tests/unit/type/set.tcl
@ -1101,6 +1101,11 @@ foreach type {single multiple single_multiple} {
        assert_equal [r scard myset] 30
        assert {[is_rehashing myset]}

+        # Wait for the hash set rehashing to finish.
+        while {[is_rehashing myset]} {
+            r srandmember myset 10
+        }
+
        # Now that we have a hash set with only one long chain bucket.
        set htstats [r debug HTSTATS-KEY myset full]
        assert {[regexp {different slots: ([0-9]+)} $htstats - different_slots]}