flushSlavesOutputBuffers should not write to replicas scheduled to drop (#12242)

This will increase the size of an already large COB (one already passed
the threshold for disconnection)

This could also mean that we'll attempt to write that data to the socket
and the replica will manage to read it, which will result in an
undesired partial sync (undesired for the test)
This commit is contained in:
Oran Agra 2023-06-12 12:05:34 +01:00 committed by GitHub
parent 0bfb6d5582
commit f228ec1ea5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 25 additions and 11 deletions

View File

@ -3947,6 +3947,7 @@ void flushSlavesOutputBuffers(void) {
* 3. Obviously if the slave is not ONLINE.
*/
if (slave->replstate == SLAVE_STATE_ONLINE &&
!(slave->flags & CLIENT_CLOSE_ASAP) &&
can_receive_writes &&
!slave->repl_start_cmd_stream_on_ack &&
clientHasPendingReplies(slave))

View File

@ -248,6 +248,7 @@ test {Replica client-output-buffer size is limited to backlog_limit/16 when no r
set master [srv 0 client]
set master_host [srv 0 host]
set master_port [srv 0 port]
$master config set maxmemory-policy allkeys-lru
$master config set repl-backlog-size 16384
$master config set client-output-buffer-limit "replica 32768 32768 60"
@ -262,13 +263,21 @@ test {Replica client-output-buffer size is limited to backlog_limit/16 when no r
fail "Can't turn the instance into a replica"
}
# Write a big key that is gonna breach the obuf limit and cause the replica to disconnect,
# then in the same event loop, add at least 16 more keys, and enable eviction, so that the
# eviction code has a chance to call flushSlavesOutputBuffers, and then run PING to trigger the eviction code
set _v [prepare_value $keysize]
$master set key $_v
$master write "[format_command mset key $_v k1 1 k2 2 k3 3 k4 4 k5 5 k6 6 k7 7 k8 8 k9 9 ka a kb b kc c kd d ke e kf f kg g kh h]config set maxmemory 1\r\nping\r\n"
$master flush
$master read
$master read
$master read
wait_for_ofs_sync $master $replica
# Write another key to force the test to wait for another event loop iteration
# to give the serverCron a chance to disconnect replicas with COB size exceeding the limits
$master set key1 "1"
# Write another key to force the test to wait for another event loop iteration so that we
# give the serverCron a chance to disconnect replicas with COB size exceeding the limits
$master config set maxmemory 0
$master set key1 1
wait_for_ofs_sync $master $replica
assert {[status $master connected_slaves] == 1}
@ -279,6 +288,8 @@ test {Replica client-output-buffer size is limited to backlog_limit/16 when no r
fail "replica client-output-buffer usage is higher than expected."
}
# now we expect the replica to re-connect but fail partial sync (it doesn't have large
# enough COB limit and must result in a full-sync)
assert {[status $master sync_partial_ok] == 0}
# Before this fix (#11905), the test would trigger an assertion in 'o->used >= c->ref_block_pos'

View File

@ -1106,3 +1106,12 @@ proc lmap args {
}
set temp
}
proc format_command {args} {
set cmd "*[llength $args]\r\n"
foreach a $args {
append cmd "$[string length $a]\r\n$a\r\n"
}
set _ $cmd
}

View File

@ -1,11 +1,4 @@
start_server {tags {"quit"}} {
proc format_command {args} {
set cmd "*[llength $args]\r\n"
foreach a $args {
append cmd "$[string length $a]\r\n$a\r\n"
}
set _ $cmd
}
test "QUIT returns OK" {
reconnect