Fix rehashingStarted miscalculating bucket_count in dict initialization (#12846)

In the old dictRehashingInfo implementation, for the initialization
scenario,
it mistakenly directly set to_size to DICTHT_SIZE(DICT_HT_INITIAL_EXP),
which
is 4 in our code by default.

In scenarios where dictExpand directly passes the target size as
initialization,
the code will calculate bucket_count incorrectly. For example, in DEBUG
POPULATE
or RDB load scenarios, it will cause the final bucket_count to be
initialized to
65536 (16384 * 4), see:
```
before:
DB 0: 10000000 keys (0 volatile) in 65536 slots HT.

it should be:
DB 0: 10000000 keys (0 volatile) in 16777216 slots HT.
```

In PR, new ht will also be initialized before calling rehashingStarted
in
_dictExpand, so that the calls in dictRehashingInfo can be unified.

Bug was introduced in #12697.
This commit is contained in:
Binbin 2023-12-10 16:55:30 +08:00 committed by GitHub
parent a3ae2ed37b
commit e6423b7a7e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 12 additions and 23 deletions

View File

@ -238,7 +238,7 @@ int _dictExpand(dict *d, unsigned long size, int* malloc_failed)
signed char new_ht_size_exp = _dictNextExp(size);
/* Detect overflows */
size_t newsize = 1ul<<new_ht_size_exp;
size_t newsize = DICTHT_SIZE(new_ht_size_exp);
if (newsize < size || newsize * sizeof(dictEntry*) < newsize)
return DICT_ERR;
@ -256,27 +256,21 @@ int _dictExpand(dict *d, unsigned long size, int* malloc_failed)
new_ht_used = 0;
/* Is this the first initialization? If so it's not really a rehashing
* we just set the first hash table so that it can accept keys. */
if (d->ht_table[0] == NULL) {
if (d->type->rehashingStarted) d->type->rehashingStarted(d);
if (d->type->rehashingCompleted) d->type->rehashingCompleted(d);
d->ht_size_exp[0] = new_ht_size_exp;
d->ht_used[0] = new_ht_used;
d->ht_table[0] = new_ht_table;
return DICT_OK;
}
/* Prepare a second hash table for incremental rehashing */
/* Prepare a second hash table for incremental rehashing.
* We do this even for the first initialization, so that we can trigger the
* rehashingStarted more conveniently, we will clean it up right after. */
d->ht_size_exp[1] = new_ht_size_exp;
d->ht_used[1] = new_ht_used;
d->ht_table[1] = new_ht_table;
d->rehashidx = 0;
if (d->type->rehashingStarted) d->type->rehashingStarted(d);
/* If the dict is empty, we finish rehashing ASAP.*/
if (d->ht_used[0] == 0) {
/* Is this the first initialization or is the first hash table empty? If so
* it's not really a rehashing, we can just set the first hash table so that
* it can accept keys. */
if (d->ht_table[0] == NULL || d->ht_used[0] == 0) {
if (d->type->rehashingCompleted) d->type->rehashingCompleted(d);
zfree(d->ht_table[0]);
if (d->ht_table[0]) zfree(d->ht_table[0]);
d->ht_size_exp[0] = new_ht_size_exp;
d->ht_used[0] = new_ht_used;
d->ht_table[0] = new_ht_table;
@ -284,6 +278,7 @@ int _dictExpand(dict *d, unsigned long size, int* malloc_failed)
d->rehashidx = -1;
return DICT_OK;
}
return DICT_OK;
}
@ -1521,12 +1516,6 @@ dictEntry *dictFindEntryByPtrAndHash(dict *d, const void *oldptr, uint64_t hash)
/* Provides the old and new ht size for a given dictionary during rehashing. This method
* should only be invoked during initialization/rehashing. */
void dictRehashingInfo(dict *d, unsigned long long *from_size, unsigned long long *to_size) {
/* Expansion during initialization. */
if (d->ht_size_exp[0] == -1) {
*from_size = DICTHT_SIZE(d->ht_size_exp[0]);
*to_size = DICTHT_SIZE(DICT_HT_INITIAL_EXP);
return;
}
/* Invalid method usage if rehashing isn't ongoing. */
assert(dictIsRehashing(d));
*from_size = DICTHT_SIZE(d->ht_size_exp[0]);

View File

@ -222,7 +222,7 @@ unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, void *pri
unsigned long dictScanDefrag(dict *d, unsigned long v, dictScanFunction *fn, dictDefragFunctions *defragfns, void *privdata);
uint64_t dictGetHash(dict *d, const void *key);
dictEntry *dictFindEntryByPtrAndHash(dict *d, const void *oldptr, uint64_t hash);
void dictRehashingInfo(dict *d, unsigned long long *from, unsigned long long *to);
void dictRehashingInfo(dict *d, unsigned long long *from_size, unsigned long long *to_size);
size_t dictGetStatsMsg(char *buf, size_t bufsize, dictStats *stats, int full);
dictStats* dictGetStatsHt(dict *d, int htidx, int full);