Optimize resizing hash table to resize not only non-empty dicts. (#12819)

The function `tryResizeHashTables` only attempts to shrink the dicts
that has keys (change from #11695), this was a serious problem until the
change in #12850 since it meant if all keys are deleted, we won't shrink
the dick.
But still, both dictShrink and dictExpand may be blocked by a fork child
process, therefore, the cron job needs to perform both dictShrink and
dictExpand, for not just non-empty dicts, but all dicts in DBs.

What this PR does:

1. Try to resize all dicts in DBs (not just non-empty ones, as it was
since #12850)
2. handle both shrink and expand (not just shrink, as it was since
forever)
3. Refactor some APIs about dict resizing (get rid of `htNeedsShrink`
`htNeedsShrink` `dictShrinkToFit`, and expose `dictShrinkIfNeeded`
`dictExpandIfNeeded` which already contains all the code of those
functions we get rid of, to make APIs more neat)
4. In the `Don't rehash if redis has child process` test, now that cron
would do resizing, we no longer need to write to DB after the child
process got killed, and can wait for the cron to expand the hash table.
This commit is contained in:
Chen Tianjie 2024-01-30 03:02:07 +08:00 committed by GitHub
parent c5273cae18
commit af7ceeb765
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 89 additions and 60 deletions

View File

@ -692,7 +692,7 @@ long long emptyDbStructure(redisDb *dbarray, int dbnum, int async,
for (dbKeyType subdict = DB_MAIN; subdict <= DB_EXPIRES; subdict++) {
dbarray[j].sub_dict[subdict].non_empty_slots = 0;
dbarray[j].sub_dict[subdict].key_count = 0;
dbarray[j].sub_dict[subdict].resize_cursor = -1;
dbarray[j].sub_dict[subdict].resize_cursor = 0;
if (server.cluster_enabled) {
dbarray[j].sub_dict[subdict].bucket_count = 0;
unsigned long long *slot_size_index = dbarray[j].sub_dict[subdict].slot_size_index;

View File

@ -216,19 +216,6 @@ int _dictInit(dict *d, dictType *type)
return DICT_OK;
}
/* Resize the table to the minimal size that contains all the elements,
* but with the invariant of a USED/BUCKETS ratio near to <= 1 */
int dictShrinkToFit(dict *d)
{
unsigned long minimal;
if (dict_can_resize != DICT_RESIZE_ENABLE || dictIsRehashing(d)) return DICT_ERR;
minimal = d->ht_used[0];
if (minimal < DICT_HT_INITIAL_SIZE)
minimal = DICT_HT_INITIAL_SIZE;
return dictShrink(d, minimal);
}
/* Resize or create the hash table,
* when malloc_failed is non-NULL, it'll avoid panic if malloc fails (in which case it'll be set to 1).
* Returns DICT_OK if resize was performed, and DICT_ERR if skipped. */
@ -1484,19 +1471,17 @@ static int dictTypeResizeAllowed(dict *d, size_t size) {
(double)d->ht_used[0] / DICTHT_SIZE(d->ht_size_exp[0]));
}
/* Expand the hash table if needed */
static void _dictExpandIfNeeded(dict *d)
{
/* Automatic resizing is disallowed. Return */
if (d->pauseAutoResize > 0) return;
/* Returning DICT_OK indicates a successful expand or the dictionary is undergoing rehashing,
* and there is nothing else we need to do about this dictionary currently. While DICT_ERR indicates
* that expand has not been triggered (may be try shrinking?)*/
int dictExpandIfNeeded(dict *d) {
/* Incremental rehashing already in progress. Return. */
if (dictIsRehashing(d)) return;
if (dictIsRehashing(d)) return DICT_OK;
/* If the hash table is empty expand it to the initial size. */
if (DICTHT_SIZE(d->ht_size_exp[0]) == 0) {
dictExpand(d, DICT_HT_INITIAL_SIZE);
return;
return DICT_OK;
}
/* If we reached the 1:1 ratio, and we are allowed to resize the hash
@ -1508,22 +1493,30 @@ static void _dictExpandIfNeeded(dict *d)
(dict_can_resize != DICT_RESIZE_FORBID &&
d->ht_used[0] >= dict_force_resize_ratio * DICTHT_SIZE(d->ht_size_exp[0])))
{
if (!dictTypeResizeAllowed(d, d->ht_used[0] + 1))
return;
dictExpand(d, d->ht_used[0] + 1);
if (dictTypeResizeAllowed(d, d->ht_used[0] + 1))
dictExpand(d, d->ht_used[0] + 1);
return DICT_OK;
}
return DICT_ERR;
}
static void _dictShrinkIfNeeded(dict *d)
{
/* Expand the hash table if needed */
static void _dictExpandIfNeeded(dict *d) {
/* Automatic resizing is disallowed. Return */
if (d->pauseAutoResize > 0) return;
dictExpandIfNeeded(d);
}
/* Returning DICT_OK indicates a successful shrinking or the dictionary is undergoing rehashing,
* and there is nothing else we need to do about this dictionary currently. While DICT_ERR indicates
* that shrinking has not been triggered (may be try expanding?)*/
int dictShrinkIfNeeded(dict *d) {
/* Incremental rehashing already in progress. Return. */
if (dictIsRehashing(d)) return;
if (dictIsRehashing(d)) return DICT_OK;
/* If the size of hash table is DICT_HT_INITIAL_SIZE, don't shrink it. */
if (DICTHT_SIZE(d->ht_size_exp[0]) == DICT_HT_INITIAL_SIZE) return;
if (DICTHT_SIZE(d->ht_size_exp[0]) <= DICT_HT_INITIAL_SIZE) return DICT_OK;
/* If we reached below 1:8 elements/buckets ratio, and we are allowed to resize
* the hash table (global setting) or we should avoid it but the ratio is below 1:32,
@ -1533,10 +1526,19 @@ static void _dictShrinkIfNeeded(dict *d)
(dict_can_resize != DICT_RESIZE_FORBID &&
d->ht_used[0] * HASHTABLE_MIN_FILL * dict_force_resize_ratio <= DICTHT_SIZE(d->ht_size_exp[0])))
{
if (!dictTypeResizeAllowed(d, d->ht_used[0]))
return;
dictShrink(d, d->ht_used[0]);
if (dictTypeResizeAllowed(d, d->ht_used[0]))
dictShrink(d, d->ht_used[0]);
return DICT_OK;
}
return DICT_ERR;
}
static void _dictShrinkIfNeeded(dict *d)
{
/* Automatic resizing is disallowed. Return */
if (d->pauseAutoResize > 0) return;
dictShrinkIfNeeded(d);
}
/* Our hash table capability is a power of two */

View File

@ -195,7 +195,8 @@ void dictTwoPhaseUnlinkFree(dict *d, dictEntry *he, dictEntry **plink, int table
void dictRelease(dict *d);
dictEntry * dictFind(dict *d, const void *key);
void *dictFetchValue(dict *d, const void *key);
int dictShrinkToFit(dict *d);
int dictShrinkIfNeeded(dict *d);
int dictExpandIfNeeded(dict *d);
void dictSetKey(dict *d, dictEntry* de, void *key);
void dictSetVal(dict *d, dictEntry *de, void *val);
void dictSetSignedIntegerVal(dictEntry *de, int64_t val);

View File

@ -693,34 +693,23 @@ dictType clientDictType = {
.no_value = 1 /* no values in this dict */
};
int htNeedsShrink(dict *dict) {
long long size, used;
size = dictBuckets(dict);
used = dictSize(dict);
return (size > DICT_HT_INITIAL_SIZE &&
(used * HASHTABLE_MIN_FILL <= size));
}
/* In cluster-enabled setup, this method traverses through all main/expires dictionaries (CLUSTER_SLOTS)
* and triggers a resize if the percentage of used buckets in the HT reaches (100 / HASHTABLE_MIN_FILL)
* we resize the hash table to save memory.
* we shrink the hash table to save memory, or expand the hash when the percentage of used buckets reached
* 100.
*
* In non cluster-enabled setup, it resize main/expires dictionary based on the same condition described above. */
void tryResizeHashTables(int dbid) {
redisDb *db = &server.db[dbid];
int dicts_per_call = min(CRON_DICTS_PER_DB, db->dict_count);
for (dbKeyType subdict = DB_MAIN; subdict <= DB_EXPIRES; subdict++) {
if (dbSize(db, subdict) == 0) continue;
if (db->sub_dict[subdict].resize_cursor == -1)
db->sub_dict[subdict].resize_cursor = findSlotByKeyIndex(db, 1, subdict);
for (int i = 0; i < CRON_DBS_PER_CALL && db->sub_dict[subdict].resize_cursor != -1; i++) {
for (int i = 0; i < dicts_per_call; i++) {
int slot = db->sub_dict[subdict].resize_cursor;
dict *d = (subdict == DB_MAIN ? db->dict[slot] : db->expires[slot]);
if (htNeedsShrink(d))
dictShrinkToFit(d);
db->sub_dict[subdict].resize_cursor = dbGetNextNonEmptySlot(db, slot, subdict);
if (dictShrinkIfNeeded(d) == DICT_ERR) {
dictExpandIfNeeded(d);
}
db->sub_dict[subdict].resize_cursor = (slot + 1) % db->dict_count;
}
}
}
@ -2685,7 +2674,7 @@ void initDbState(redisDb *db){
for (dbKeyType subdict = DB_MAIN; subdict <= DB_EXPIRES; subdict++) {
db->sub_dict[subdict].non_empty_slots = 0;
db->sub_dict[subdict].key_count = 0;
db->sub_dict[subdict].resize_cursor = -1;
db->sub_dict[subdict].resize_cursor = 0;
db->sub_dict[subdict].slot_size_index = server.cluster_enabled ? zcalloc(sizeof(unsigned long long) * (CLUSTER_SLOTS + 1)) : NULL;
db->sub_dict[subdict].bucket_count = 0;
}

View File

@ -115,6 +115,7 @@ struct hdr_histogram;
#define CONFIG_MAX_HZ 500
#define MAX_CLIENTS_PER_CLOCK_TICK 200 /* HZ is adapted based on that. */
#define CRON_DBS_PER_CALL 16
#define CRON_DICTS_PER_DB 16
#define NET_MAX_WRITES_PER_EVENT (1024*64)
#define PROTO_SHARED_SELECT_CMDS 10
#define OBJ_SHARED_INTEGERS 10000
@ -970,7 +971,7 @@ typedef struct replBufBlock {
/* When adding fields, please check the swap db related logic. */
typedef struct dbDictState {
int resize_cursor; /* Cron job uses this cursor to gradually resize dictionaries (only used for cluster-enabled). */
int resize_cursor; /* Cron job uses this cursor to gradually resize all dictionaries. */
int non_empty_slots; /* The number of non-empty slots. */
unsigned long long key_count; /* Total number of keys in this DB. */
unsigned long long bucket_count; /* Total number of buckets in this DB across dictionaries (only used for cluster-enabled). */
@ -3111,7 +3112,6 @@ void serverLogRaw(int level, const char *msg);
void serverLogRawFromHandler(int level, const char *msg);
void usage(void);
void updateDictResizePolicy(void);
int htNeedsShrink(dict *dict);
void populateCommandTable(void);
void resetCommandTableStats(dict* commands);
void resetErrorTableStats(void);

View File

@ -2024,7 +2024,7 @@ void zremrangeGenericCommand(client *c, zrange_type rangetype) {
break;
}
dictResumeAutoResize(zs->dict);
if (htNeedsShrink(zs->dict)) dictShrinkToFit(zs->dict);
dictShrinkIfNeeded(zs->dict);
if (dictSize(zs->dict) == 0) {
dbDelete(c->db,key);
keyremoved = 1;
@ -2554,7 +2554,7 @@ static void zdiffAlgorithm2(zsetopsrc *src, long setnum, zset *dstzset, size_t *
}
/* Resize dict if needed after removing multiple elements */
if (htNeedsShrink(dstzset->dict)) dictShrinkToFit(dstzset->dict);
dictShrinkIfNeeded(dstzset->dict);
/* Using this algorithm, we can't calculate the max element as we go,
* we have to iterate through all elements to find the max one after. */

View File

@ -373,12 +373,14 @@ start_server {tags {"other external:skip"}} {
assert_no_match "*table size: 8192*" [r debug HTSTATS 9]
exec kill -9 [get_child_pid 0]
waitForBgsave r
after 200 ;# waiting for serverCron
# Hash table should rehash since there is no child process,
# size is power of two and over 4098, so it is 8192
r set k3 v3
assert_match "*table size: 8192*" [r debug HTSTATS 9]
wait_for_condition 50 100 {
[string match "*table size: 8192*" [r debug HTSTATS 9]]
} else {
fail "hash table did not rehash after child process killed"
}
} {} {needs:debug needs:local-process}
}
@ -487,3 +489,38 @@ start_cluster 1 0 {tags {"other external:skip cluster slow"}} {
assert_match "*table size: 16*" [r debug HTSTATS 0]
} {} {needs:debug}
}
proc get_overhead_hashtable_main {} {
set main 0
set stats [r memory stats]
set list_stats [split $stats " "]
for {set j 0} {$j < [llength $list_stats]} {incr j} {
if {[string equal -nocase "\{overhead.hashtable.main" [lindex $list_stats $j]]} {
set main [lindex $list_stats [expr $j+1]]
break
}
}
return $main
}
start_server {tags {"other external:skip"}} {
test "Redis can resize empty dict" {
# Write and then delete 128 keys, creating an empty dict
r flushall
for {set j 1} {$j <= 128} {incr j} {
r set $j{b} a
}
for {set j 1} {$j <= 128} {incr j} {
r del $j{b}
}
# Set a key to enable overhead display of db 0
r set a b
# The dict containing 128 keys must have expanded,
# its hash table itself takes a lot more than 200 bytes
wait_for_condition 100 50 {
[get_overhead_hashtable_main] < 200
} else {
fail "dict did not resize in time"
}
}
}