Introduce debug command to disable reply buffer resizing (#10360)

In order to resolve some flaky tests which hard rely on examine memory footprint.
we introduce the following fixes:

# Fix in client-eviction test - by @yoav-steinberg 
Sometime the libc allocator can use different size client struct allocations.
this may cause unexpected memory calculations to fail the test.

# Introduce new DEBUG command for disabling reply buffer resizing
In order to eliminate reply buffer resizing during specific tests.
we introduced the ability to disable (and enable) the resizing cron job

Co-authored-by: yoav-steinberg yoav@redislabs.com
This commit is contained in:
ranshid 2022-03-01 14:40:29 +02:00 committed by GitHub
parent 4a45386e3c
commit 9b15dd288e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 48 additions and 25 deletions

View File

@ -482,10 +482,12 @@ void debugCommand(client *c) {
" Show low level client eviction pools info (maxmemory-clients).",
"PAUSE-CRON <0|1>",
" Stop periodic cron job processing.",
"REPLYBUFFER-PEAK-RESET-TIME <NEVER||RESET|time>",
"REPLYBUFFER PEAK-RESET-TIME <NEVER||RESET|time>",
" Sets the time (in milliseconds) to wait between client reply buffer peak resets.",
" In case NEVER is provided the last observed peak will never be reset",
" In case RESET is provided the peak reset time will be restored to the default value",
"REPLYBUFFER RESIZING <0|1>",
" Enable or disable the replay buffer resize cron job",
NULL
};
addReplyHelp(c, help);
@ -962,14 +964,21 @@ NULL
{
server.pause_cron = atoi(c->argv[2]->ptr);
addReply(c,shared.ok);
} else if (!strcasecmp(c->argv[1]->ptr,"replybuffer-peak-reset-time") && c->argc == 3 ) {
if (!strcasecmp(c->argv[2]->ptr, "never")) {
server.reply_buffer_peak_reset_time = -1;
} else if(!strcasecmp(c->argv[2]->ptr, "reset")) {
server.reply_buffer_peak_reset_time = REPLY_BUFFER_DEFAULT_PEAK_RESET_TIME;
} else if (!strcasecmp(c->argv[1]->ptr,"replybuffer") && c->argc == 4 ) {
if(!strcasecmp(c->argv[2]->ptr, "peak-reset-time")) {
if (!strcasecmp(c->argv[3]->ptr, "never")) {
server.reply_buffer_peak_reset_time = -1;
} else if(!strcasecmp(c->argv[3]->ptr, "reset")) {
server.reply_buffer_peak_reset_time = REPLY_BUFFER_DEFAULT_PEAK_RESET_TIME;
} else {
if (getLongFromObjectOrReply(c, c->argv[3], &server.reply_buffer_peak_reset_time, NULL) != C_OK)
return;
}
} else if(!strcasecmp(c->argv[2]->ptr,"resizing")) {
server.reply_buffer_resizing_enabled = atoi(c->argv[3]->ptr);
} else {
if (getLongFromObjectOrReply(c, c->argv[2], &server.reply_buffer_peak_reset_time, NULL) != C_OK)
return;
addReplySubcommandSyntaxError(c);
return;
}
addReply(c, shared.ok);
} else {

View File

@ -744,6 +744,10 @@ int clientsCronResizeOutputBuffer(client *c, mstime_t now_ms) {
const size_t buffer_target_shrink_size = c->buf_usable_size/2;
const size_t buffer_target_expand_size = c->buf_usable_size*2;
/* in case the resizing is disabled return immediately */
if(!server.reply_buffer_resizing_enabled)
return 0;
if (buffer_target_shrink_size >= PROTO_REPLY_MIN_BYTES &&
c->buf_peak < buffer_target_shrink_size )
{
@ -2429,6 +2433,7 @@ void initServer(void) {
server.thp_enabled = 0;
server.cluster_drop_packet_filter = -1;
server.reply_buffer_peak_reset_time = REPLY_BUFFER_DEFAULT_PEAK_RESET_TIME;
server.reply_buffer_resizing_enabled = 1;
resetReplicationBuffer();
if ((server.tls_port || server.tls_replication || server.tls_cluster)

View File

@ -1914,6 +1914,7 @@ struct redisServer {
int cluster_allow_pubsubshard_when_down; /* Is pubsubshard allowed when the cluster
is down, doesn't affect pubsub global. */
long reply_buffer_peak_reset_time; /* The amount of time (in milliseconds) to wait between reply buffer peak resets */
int reply_buffer_resizing_enabled; /* Is reply buffer resizing enabled (1 by default) */
};
#define MAX_KEYS_BUFFER 256

View File

@ -395,13 +395,14 @@ start_server {} {
test "evict clients only until below limit" {
set client_count 10
set client_mem [mb 1]
r debug replybuffer-peak-reset-time never
r debug replybuffer resizing 0
r config set maxmemory-clients 0
r client setname control
r client no-evict on
# Make multiple clients consume together roughly 1mb less than maxmemory_clients
set total_client_mem 0
set max_client_mem 0
set rrs {}
for {set j 0} {$j < $client_count} {incr j} {
set rr [redis_client]
@ -414,20 +415,27 @@ start_server {} {
} else {
fail "Failed to fill qbuf for test"
}
incr total_client_mem [client_field client$j tot-mem]
# In theory all these clients should use the same amount of memory (~1mb). But in practice
# some allocators (libc) can return different allocation sizes for the same malloc argument causing
# some clients to use slightly more memory than others. We find the largest client and make sure
# all clients are roughly the same size (+-1%). Then we can safely set the client eviction limit and
# expect consistent results in the test.
set cmem [client_field client$j tot-mem]
if {$max_client_mem > 0} {
set size_ratio [expr $max_client_mem.0/$cmem.0]
assert_range $size_ratio 0.99 1.01
}
if {$cmem > $max_client_mem} {
set max_client_mem $cmem
}
}
set client_actual_mem [expr $total_client_mem / $client_count]
# Make sure client_acutal_mem is more or equal to what we intended
assert {$client_actual_mem >= $client_mem}
# Make sure all clients are still connected
set connected_clients [llength [lsearch -all [split [string trim [r client list]] "\r\n"] *name=client*]]
assert {$connected_clients == $client_count}
# Set maxmemory-clients to accommodate half our clients (taking into account the control client)
set maxmemory_clients [expr ($client_actual_mem * $client_count) / 2 + [client_field control tot-mem]]
set maxmemory_clients [expr ($max_client_mem * $client_count) / 2 + [client_field control tot-mem]]
r config set maxmemory-clients $maxmemory_clients
# Make sure total used memory is below maxmemory_clients
@ -438,8 +446,8 @@ start_server {} {
set connected_clients [llength [lsearch -all [split [string trim [r client list]] "\r\n"] *name=client*]]
assert {$connected_clients == [expr $client_count / 2]}
# Restore the peak reset time to default
r debug replybuffer-peak-reset-time reset
# Restore the reply buffer resize to default
r debug replybuffer resizing 1
foreach rr $rrs {$rr close}
} {} {needs:debug}
@ -454,7 +462,7 @@ start_server {} {
r client setname control
r client no-evict on
r config set maxmemory-clients 0
r debug replybuffer-peak-reset-time never
r debug replybuffer resizing 0
# Run over all sizes and create some clients using up that size
set total_client_mem 0
@ -505,8 +513,8 @@ start_server {} {
}
}
# Restore the peak reset time to default
r debug replybuffer-peak-reset-time reset
# Restore the reply buffer resize to default
r debug replybuffer resizing 1
foreach rr $rrs {$rr close}
} {} {needs:debug}

View File

@ -3,7 +3,7 @@ proc get_reply_buffer_size {cname} {
set clients [split [string trim [r client list]] "\r\n"]
set c [lsearch -inline $clients *name=$cname*]
if {![regexp rbs=(\[a-zA-Z0-9-\]+) $c - rbufsize]} {
error "field rbus not found in $c"
error "field rbs not found in $c"
}
return $rbufsize
}
@ -12,7 +12,7 @@ start_server {tags {"replybufsize"}} {
test {verify reply buffer limits} {
# In order to reduce test time we can set the peak reset time very low
r debug replybuffer-peak-reset-time 100
r debug replybuffer peak-reset-time 100
# Create a simple idle test client
variable tc [redis_client]
@ -29,7 +29,7 @@ start_server {tags {"replybufsize"}} {
r set bigval [string repeat x 32768]
# In order to reduce test time we can set the peak reset time very low
r debug replybuffer-peak-reset-time never
r debug replybuffer peak-reset-time never
wait_for_condition 10 100 {
[$tc get bigval ; get_reply_buffer_size test_client] >= 16384 && [get_reply_buffer_size test_client] < 32768
@ -39,7 +39,7 @@ start_server {tags {"replybufsize"}} {
}
# Restore the peak reset time to default
r debug replybuffer-peak-reset-time reset
r debug replybuffer peak-reset-time reset
$tc close
} {0} {needs:debug}