Add new DEBUG dict-resizing command to disable the dict resize (#13043)

The test fails here and there:
```
*** [err]: expire scan should skip dictionaries with lot's of empty buckets in tests/unit/expire.tcl
scan didn't handle slot skipping logic.
```

There are two case:
1. In the case of passing the test, we use child process to avoid the
dict resize, but it can not completely limit it, since in the dictDelete
we still have chance to trigger the resize (hit the force radio). The
reason why our test passed before is because the expire dict is still
in the rehashing process, so the dictDelete, the dictShrinkIfNeeded can
not trigger the resize.

2. In the case of failing the test, the expire dict finished the
rehashing,
so the last dictDelete, the dictShrinkIfNeeded trigger the dict resize
since it hit the force radio, so the skipping logic fail.

This PR add a new DEBUG command to disbale the dict resize.
This commit is contained in:
Binbin 2024-02-08 22:39:58 +08:00 committed by GitHub
parent 813327b231
commit 493e31e3ad
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 24 additions and 30 deletions

View File

@ -496,6 +496,8 @@ void debugCommand(client *c) {
" In case RESET is provided the peak reset time will be restored to the default value",
"REPLYBUFFER RESIZING <0|1>",
" Enable or disable the reply buffer resize cron job",
"DICT-RESIZING <0|1>",
" Enable or disable the main dict and expire dict resizing.",
NULL
};
addExtendedReplyHelp(c, help, clusterDebugCommandExtendedHelp());
@ -1021,6 +1023,9 @@ NULL
return;
}
addReply(c, shared.ok);
} else if (!strcasecmp(c->argv[1]->ptr, "dict-resizing") && c->argc == 3) {
server.dict_resizing = atoi(c->argv[2]->ptr);
addReply(c, shared.ok);
} else if(!handleDebugClusterCommand(c)) {
addReplySubcommandSyntaxError(c);
return;

View File

@ -429,6 +429,9 @@ uint64_t dictEncObjHash(const void *key) {
* but to guarantee the performance of redis, we still allow dict to expand
* if dict load factor exceeds HASHTABLE_MAX_LOAD_FACTOR. */
int dictResizeAllowed(size_t moreMem, double usedRatio) {
/* for debug purposes: dict is not allowed to be resized. */
if (!server.dict_resizing) return 0;
if (usedRatio <= HASHTABLE_MAX_LOAD_FACTOR) {
return !overMaxmemoryAfterAlloc(moreMem);
} else {
@ -2079,6 +2082,7 @@ void initServerConfig(void) {
server.next_client_id = 1; /* Client IDs, start from 1 .*/
server.page_size = sysconf(_SC_PAGESIZE);
server.pause_cron = 0;
server.dict_resizing = 1;
server.latency_tracking_info_percentiles_len = 3;
server.latency_tracking_info_percentiles = zmalloc(sizeof(double)*(server.latency_tracking_info_percentiles_len));

View File

@ -1754,6 +1754,7 @@ struct redisServer {
char *proc_title_template; /* Process title template format */
clientBufferLimitsConfig client_obuf_limits[CLIENT_TYPE_OBUF_COUNT];
int pause_cron; /* Don't run cron tasks (debug) */
int dict_resizing; /* Whether to allow main dict and expired dict to be resized (debug) */
int latency_tracking_enabled; /* 1 if extended latency tracking is enabled, 0 otherwise. */
double *latency_tracking_info_percentiles; /* Extended latency tracking info output percentile list configuration. */
int latency_tracking_info_percentiles_len;

View File

@ -853,9 +853,9 @@ start_cluster 1 0 {tags {"expire external:skip cluster slow"}} {
# hashslot(key) is 12539
r psetex key 500 val
# disable resizing
r config set rdb-key-save-delay 10000000
r bgsave
# disable resizing, the reason for not using slow bgsave is because
# it will hit the dict_force_resize_ratio.
r debug dict-resizing 0
# delete data to have lot's (99%) of empty buckets (slot 12182 should be skipped)
for {set j 1} {$j <= 99} {incr j} {
@ -872,20 +872,16 @@ start_cluster 1 0 {tags {"expire external:skip cluster slow"}} {
[r dbsize] eq 1
} else {
if {[r dbsize] eq 0} {
puts [r debug htstats 0]
fail "scan didn't handle slot skipping logic."
} else {
puts [r debug htstats 0]
fail "scan didn't process all valid slots."
}
}
# Enable resizing
r config set rdb-key-save-delay 0
catch {exec kill -9 [get_child_pid 0]}
wait_for_condition 1000 10 {
[s rdb_bgsave_in_progress] eq 0
} else {
fail "bgsave did not stop in time."
}
r debug dict-resizing 1
# put some data into slot 12182 and trigger the resize
r psetex "{foo}0" 500 a

View File

@ -438,9 +438,9 @@ start_cluster 1 0 {tags {"other external:skip cluster slow"}} {
}
assert_match "*table size: 128*" [r debug HTSTATS 0]
# disable resizing
r config set rdb-key-save-delay 10000000
r bgsave
# disable resizing, the reason for not using slow bgsave is because
# it will hit the dict_force_resize_ratio.
r debug dict-resizing 0
# delete data to have lot's (96%) of empty buckets
for {set j 1} {$j <= 123} {incr j} {
@ -449,13 +449,7 @@ start_cluster 1 0 {tags {"other external:skip cluster slow"}} {
assert_match "*table size: 128*" [r debug HTSTATS 0]
# enable resizing
r config set rdb-key-save-delay 0
catch {exec kill -9 [get_child_pid 0]}
wait_for_condition 1000 10 {
[s rdb_bgsave_in_progress] eq 0
} else {
fail "bgsave did not stop in time."
}
r debug dict-resizing 1
# waiting for serverCron to resize the tables
wait_for_condition 1000 10 {
@ -474,22 +468,16 @@ start_cluster 1 0 {tags {"other external:skip cluster slow"}} {
r set "{alice}$j" a
}
# disable resizing
r config set rdb-key-save-delay 10000000
r bgsave
# disable resizing, the reason for not using slow bgsave is because
# it will hit the dict_force_resize_ratio.
r debug dict-resizing 0
for {set j 1} {$j <= 123} {incr j} {
r del "{alice}$j"
}
# enable resizing
r config set rdb-key-save-delay 0
catch {exec kill -9 [get_child_pid 0]}
wait_for_condition 1000 10 {
[s rdb_bgsave_in_progress] eq 0
} else {
fail "bgsave did not stop in time."
}
r debug dict-resizing 1
# waiting for serverCron to resize the tables
wait_for_condition 1000 10 {