Reduce performance impact of dict rehashing and make it shorter. (#12899)

#### Problem Statement: For any read/update operation during rehashing, we're doing ~10+ random DRAM lookups to do the rehashing, as we are using the `rehashidx` to rehash 10 buckets, whose dict entries most likely aren't cached in the CPU or near the bucket we are operating on. If these random bucket are empty, the rehashing process during that command execution is skipped. #### Implementation: For reducing the performance recession while dict is rehashing, we determine the index at which the key would be stored in the 0th HT, we check if that index has already been rehashed, if not we will rehash the bucket containing the key and the bucket will be moved from 0th HT to the 1st HT. If the key has already been rehashed, we perform the random access bucket rehash (using `rehashidx`) and we again verify if rehashing is still ongoing and look up the key in the respective HT. This ensures rehashing is not skipped in any command call and that we rehash a particular bucket or random bucket in each call. #### Changes in this PR: - Added a new method `dictBucketRehash` to perform rehash on a single bucket. - Helper function `moveKeysInBucketOldtoNew` for `dictRehash` and `dictBucketRehash` to move all the keys in a bucket from the old to the new hash HT. - Helper function `verifyMoreRehashRequired` for `dictRehash` and `dictBucketRehash` to check if we have already rehashed the whole table and if more rehashing is required. ### Benchmark: - This PR still shows **~13%** improvement in the latency during rehashing. - Rehashing is now **~2%** faster for this PR when compared to unstable. --------- Co-authored-by: Oran Agra <oran@redislabs.com> Co-authored-by: Madelyn Olson <34459052+madolson@users.noreply.github.com>
2024-01-27 01:11:53 -08:00 · 2024-01-27 01:11:53 -08:00 · 5358bd7cdd
parent 98881f7558
commit 5358bd7cdd
2 changed files with 143 additions and 76 deletions
--- a/src/dict.c
+++ b/src/dict.c
@ -318,6 +318,72 @@ int dictShrink(dict *d, unsigned long size) {
    return _dictResize(d, size, NULL);
 }

+/* Helper function for `dictRehash` and `dictBucketRehash` which rehashes all the keys
+ * in a bucket at index `idx` from the old to the new hash HT. */
+static void rehashEntriesInBucketAtIndex(dict *d, uint64_t idx) {
+    dictEntry *de = d->ht_table[0][idx];
+    uint64_t h;
+    dictEntry *nextde;
+    while (de) {
+        nextde = dictGetNext(de);
+        void *key = dictGetKey(de);
+        /* Get the index in the new hash table */
+        if (d->ht_size_exp[1] > d->ht_size_exp[0]) {
+            h = dictHashKey(d, key) & DICTHT_SIZE_MASK(d->ht_size_exp[1]);
+        } else {
+            /* We're shrinking the table. The tables sizes are powers of
+             * two, so we simply mask the bucket index in the larger table
+             * to get the bucket index in the smaller table. */
+            h = idx & DICTHT_SIZE_MASK(d->ht_size_exp[1]);
+        }
+        if (d->type->no_value) {
+            if (d->type->keys_are_odd && !d->ht_table[1][h]) {
+                /* Destination bucket is empty and we can store the key
+                 * directly without an allocated entry. Free the old entry
+                 * if it's an allocated entry.
+                 *
+                 * TODO: Add a flag 'keys_are_even' and if set, we can use
+                 * this optimization for these dicts too. We can set the LSB
+                 * bit when stored as a dict entry and clear it again when
+                 * we need the key back. */
+                assert(entryIsKey(key));
+                if (!entryIsKey(de)) zfree(decodeMaskedPtr(de));
+                de = key;
+            } else if (entryIsKey(de)) {
+                /* We don't have an allocated entry but we need one. */
+                de = createEntryNoValue(key, d->ht_table[1][h]);
+            } else {
+                /* Just move the existing entry to the destination table and
+                 * update the 'next' field. */
+                assert(entryIsNoValue(de));
+                dictSetNext(de, d->ht_table[1][h]);
+            }
+        } else {
+            dictSetNext(de, d->ht_table[1][h]);
+        }
+        d->ht_table[1][h] = de;
+        d->ht_used[0]--;
+        d->ht_used[1]++;
+        de = nextde;
+    }
+    d->ht_table[0][idx] = NULL;
+}
+
+/* This checks if we already rehashed the whole table and if more rehashing is required */
+static int dictCheckRehashingCompleted(dict *d) {
+    if (d->ht_used[0] != 0) return 0;
+    
+    if (d->type->rehashingCompleted) d->type->rehashingCompleted(d);
+    zfree(d->ht_table[0]);
+    /* Copy the new ht onto the old one */
+    d->ht_table[0] = d->ht_table[1];
+    d->ht_used[0] = d->ht_used[1];
+    d->ht_size_exp[0] = d->ht_size_exp[1];
+    _dictReset(d, 1);
+    d->rehashidx = -1;
+    return 1;
+}
+
 /* Performs N steps of incremental rehashing. Returns 1 if there are still
 * keys to move from the old to the new hash table, otherwise 0 is returned.
 *
@ -343,8 +409,6 @@ int dictRehash(dict *d, int n) {
    }

    while(n-- && d->ht_used[0] != 0) {
-        dictEntry *de, *nextde;
-
        /* Note that rehashidx can't overflow as we are sure there are more
         * elements because ht[0].used != 0 */
        assert(DICTHT_SIZE(d->ht_size_exp[0]) > (unsigned long)d->rehashidx);
@ -352,71 +416,12 @@ int dictRehash(dict *d, int n) {
            d->rehashidx++;
            if (--empty_visits == 0) return 1;
        }
-        de = d->ht_table[0][d->rehashidx];
        /* Move all the keys in this bucket from the old to the new hash HT */
-        while(de) {
-            uint64_t h;
-
-            nextde = dictGetNext(de);
-            void *key = dictGetKey(de);
-            /* Get the index in the new hash table */
-            if (d->ht_size_exp[1] > d->ht_size_exp[0]) {
-                h = dictHashKey(d, key) & DICTHT_SIZE_MASK(d->ht_size_exp[1]);
-            } else {
-                /* We're shrinking the table. The tables sizes are powers of
-                 * two, so we simply mask the bucket index in the larger table
-                 * to get the bucket index in the smaller table. */
-                h = d->rehashidx & DICTHT_SIZE_MASK(d->ht_size_exp[1]);
-            }
-            if (d->type->no_value) {
-                if (d->type->keys_are_odd && !d->ht_table[1][h]) {
-                    /* Destination bucket is empty and we can store the key
-                     * directly without an allocated entry. Free the old entry
-                     * if it's an allocated entry.
-                     *
-                     * TODO: Add a flag 'keys_are_even' and if set, we can use
-                     * this optimization for these dicts too. We can set the LSB
-                     * bit when stored as a dict entry and clear it again when
-                     * we need the key back. */
-                    assert(entryIsKey(key));
-                    if (!entryIsKey(de)) zfree(decodeMaskedPtr(de));
-                    de = key;
-                } else if (entryIsKey(de)) {
-                    /* We don't have an allocated entry but we need one. */
-                    de = createEntryNoValue(key, d->ht_table[1][h]);
-                } else {
-                    /* Just move the existing entry to the destination table and
-                     * update the 'next' field. */
-                    assert(entryIsNoValue(de));
-                    dictSetNext(de, d->ht_table[1][h]);
-                }
-            } else {
-                dictSetNext(de, d->ht_table[1][h]);
-            }
-            d->ht_table[1][h] = de;
-            d->ht_used[0]--;
-            d->ht_used[1]++;
-            de = nextde;
-        }
-        d->ht_table[0][d->rehashidx] = NULL;
+        rehashEntriesInBucketAtIndex(d, d->rehashidx);
        d->rehashidx++;
    }

-    /* Check if we already rehashed the whole table... */
-    if (d->ht_used[0] == 0) {
-        if (d->type->rehashingCompleted) d->type->rehashingCompleted(d);
-        zfree(d->ht_table[0]);
-        /* Copy the new ht onto the old one */
-        d->ht_table[0] = d->ht_table[1];
-        d->ht_used[0] = d->ht_used[1];
-        d->ht_size_exp[0] = d->ht_size_exp[1];
-        _dictReset(d, 1);
-        d->rehashidx = -1;
-        return 0;
-    }
-
-    /* More to rehash... */
-    return 1;
+    return !dictCheckRehashingCompleted(d);
 }

 long long timeInMilliseconds(void) {
@ -455,6 +460,26 @@ static void _dictRehashStep(dict *d) {
    if (d->pauserehash == 0) dictRehash(d,1);
 }

+/* Performs rehashing on a single bucket. */
+int _dictBucketRehash(dict *d, uint64_t idx) {
+    if (d->pauserehash != 0) return 0;
+    unsigned long s0 = DICTHT_SIZE(d->ht_size_exp[0]);
+    unsigned long s1 = DICTHT_SIZE(d->ht_size_exp[1]);
+    if (dict_can_resize == DICT_RESIZE_FORBID || !dictIsRehashing(d)) return 0;
+    /* If dict_can_resize is DICT_RESIZE_AVOID, we want to avoid rehashing. 
+     * - If expanding, the threshold is dict_force_resize_ratio which is 4.
+     * - If shrinking, the threshold is 1 / (HASHTABLE_MIN_FILL * dict_force_resize_ratio) which is 1/32. */
+    if (dict_can_resize == DICT_RESIZE_AVOID && 
+        ((s1 > s0 && s1 < dict_force_resize_ratio * s0) ||
+         (s1 < s0 && s0 < HASHTABLE_MIN_FILL * dict_force_resize_ratio * s1)))
+    {
+        return 0;
+    }
+    rehashEntriesInBucketAtIndex(d, idx);
+    dictCheckRehashingCompleted(d);
+    return 1;
+}
+
 /* Add an element to the target hash table */
 int dictAdd(dict *d, void *key, void *val)
 {
@ -591,12 +616,24 @@ static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
    /* dict is empty */
    if (dictSize(d) == 0) return NULL;

-    if (dictIsRehashing(d)) _dictRehashStep(d);
    h = dictHashKey(d, key);
+    idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[0]);
+
+    if (dictIsRehashing(d)) {
+        if ((long)idx >= d->rehashidx && d->ht_table[0][idx]) {
+            /* If we have a valid hash entry at `idx` in ht0, we perform
+             * rehash on the bucket at `idx` (being more CPU cache friendly) */
+            _dictBucketRehash(d, idx);
+        } else {
+            /* If the hash entry is not in ht0, we rehash the buckets based
+             * on the rehashidx (not CPU cache friendly). */
+            _dictRehashStep(d);
+        }
+    }

    for (table = 0; table <= 1; table++) {
-        idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
        if (table == 0 && (long)idx < d->rehashidx) continue;
+        idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
        he = d->ht_table[table][idx];
        prevHe = NULL;
        while(he) {
@ -703,11 +740,25 @@ dictEntry *dictFind(dict *d, const void *key)
    uint64_t h, idx, table;

    if (dictSize(d) == 0) return NULL; /* dict is empty */
-    if (dictIsRehashing(d)) _dictRehashStep(d);
+
    h = dictHashKey(d, key);
+    idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[0]);
+
+    if (dictIsRehashing(d)) {
+        if ((long)idx >= d->rehashidx && d->ht_table[0][idx]) {
+            /* If we have a valid hash entry at `idx` in ht0, we perform
+             * rehash on the bucket at `idx` (being more CPU cache friendly) */
+            _dictBucketRehash(d, idx);
+        } else {
+            /* If the hash entry is not in ht0, we rehash the buckets based
+             * on the rehashidx (not CPU cache friendly). */
+            _dictRehashStep(d);
+        }
+    }
+
    for (table = 0; table <= 1; table++) {
+        if (table == 0 && (long)idx < d->rehashidx) continue;
        idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
-        if (table == 0 && (long)idx < d->rehashidx) continue; 
        he = d->ht_table[table][idx];
        while(he) {
            void *he_key = dictGetKey(he);
@ -1504,15 +1555,27 @@ static signed char _dictNextExp(unsigned long size)
 void *dictFindPositionForInsert(dict *d, const void *key, dictEntry **existing) {
    unsigned long idx, table;
    dictEntry *he;
-    uint64_t hash = dictHashKey(d, key);
    if (existing) *existing = NULL;
-    if (dictIsRehashing(d)) _dictRehashStep(d);
+    uint64_t hash = dictHashKey(d, key);
+    idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[0]);
+
+    if (dictIsRehashing(d)) {
+        if ((long)idx >= d->rehashidx && d->ht_table[0][idx]) {
+            /* If we have a valid hash entry at `idx` in ht0, we perform
+             * rehash on the bucket at `idx` (being more CPU cache friendly) */
+            _dictBucketRehash(d, idx);
+        } else {
+            /* If the hash entry is not in ht0, we rehash the buckets based
+             * on the rehashidx (not CPU cache friendly). */
+            _dictRehashStep(d);
+        }
+    }

    /* Expand the hash table if needed */
    _dictExpandIfNeeded(d);
    for (table = 0; table <= 1; table++) {
-        idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
        if (table == 0 && (long)idx < d->rehashidx) continue; 
+        idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
        /* Search if this slot does not already contain the given key */
        he = d->ht_table[table][idx];
        while(he) {
--- a/tests/unit/type/set.tcl
+++ b/tests/unit/type/set.tcl
@ -1029,6 +1029,12 @@ foreach type {single multiple single_multiple} {
        r srem $myset {*}$members
    }

+    proc verify_rehashing_completed_key {myset table_size keys} {
+        set htstats [r debug HTSTATS-KEY $myset]
+        assert {![string match {*rehashing target*} $htstats]}
+        return {[string match {*table size: $table_size*number of elements: $keys*} $htstats]}
+    }
+
    test "SRANDMEMBER with a dict containing long chain" {
        set origin_save [config_get_set save ""]
        set origin_max_lp [config_get_set set-max-listpack-entries 0]
@ -1099,12 +1105,10 @@ foreach type {single multiple single_multiple} {
        #    otherwise we would need more iterations.
        rem_hash_set_top_N myset [expr {[r scard myset] - 30}]
        assert_equal [r scard myset] 30
-        assert {[is_rehashing myset]}
-
-        # Wait for the hash set rehashing to finish.
-        while {[is_rehashing myset]} {
-            r srandmember myset 10
-        }
+        
+        # Hash set rehashing would be completed while removing members from the `myset`
+        # We also check the size and members in the hash table.
+        verify_rehashing_completed_key myset 64 30

        # Now that we have a hash set with only one long chain bucket.
        set htstats [r debug HTSTATS-KEY myset full]