Kill disk-based fork child when all replicas drop and 'save' is not enabled (#7819)

When all replicas waiting for a bgsave get disconnected (possibly due to output buffer limit),
It may be good to kill the bgsave child. in diskless replication it already happens, but in
disk-based, the child may still serve some purpose (for persistence).

By killing the child, we prevent it from eating COW memory in vain, and we also allow a new child fork sooner for the next full synchronization or bgsave.
We do that only if rdb persistence wasn't enabled in the configuration.

Btw, now, rdbRemoveTempFile in killRDBChild won't block server, so we can killRDBChild safely.
This commit is contained in:
Wang Yuan 2020-09-22 14:47:58 +08:00 committed by GitHub
parent 23b50bcccc
commit 1bb5794a1f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 95 additions and 0 deletions

View File

@ -1063,6 +1063,25 @@ void disconnectSlaves(void) {
}
}
/* Check if there is any other slave waiting dumping RDB finished expect me.
* This function is useful to judge current dumping RDB can be used for full
* synchronization or not. */
int anyOtherSlaveWaitRdb(client *except_me) {
listIter li;
listNode *ln;
listRewind(server.slaves, &li);
while((ln = listNext(&li))) {
client *slave = ln->value;
if (slave != except_me &&
slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END)
{
return 1;
}
}
return 0;
}
/* Remove the specified client from global lists where the client could
* be referenced, not including the Pub/Sub channels.
* This is used by freeClient() and replicationCacheMaster(). */
@ -1213,6 +1232,21 @@ void freeClient(client *c) {
/* Master/slave cleanup Case 1:
* we lost the connection with a slave. */
if (c->flags & CLIENT_SLAVE) {
/* If there is no any other slave waiting dumping RDB finished, the
* current child process need not continue to dump RDB, then we kill it.
* So child process won't use more memory, and we also can fork a new
* child process asap to dump rdb for next full synchronization or bgsave.
* But we also need to check if users enable 'save' RDB, if enable, we
* should not remove directly since that means RDB is important for users
* to keep data safe and we may delay configured 'save' for full sync. */
if (server.saveparamslen == 0 &&
c->replstate == SLAVE_STATE_WAIT_BGSAVE_END &&
server.rdb_child_pid != -1 &&
server.rdb_child_type == RDB_CHILD_TYPE_DISK &&
anyOtherSlaveWaitRdb(c) == 0)
{
killRDBChild();
}
if (c->replstate == SLAVE_STATE_SEND_BULK) {
if (c->repldbfd != -1) close(c->repldbfd);
if (c->replpreamble) sdsfree(c->replpreamble);

View File

@ -752,3 +752,64 @@ test {replicaof right after disconnection} {
}
}
}
test {Kill rdb child process if its dumping RDB is not useful} {
start_server {tags {"repl"}} {
set slave1 [srv 0 client]
start_server {} {
set slave2 [srv 0 client]
start_server {} {
set master [srv 0 client]
set master_host [srv 0 host]
set master_port [srv 0 port]
for {set i 0} {$i < 10} {incr i} {
$master set $i $i
}
# Generating RDB will cost 10s(10 * 1s)
$master config set rdb-key-save-delay 1000000
$master config set repl-diskless-sync no
$master config set save ""
$slave1 slaveof $master_host $master_port
$slave2 slaveof $master_host $master_port
# Wait for starting child
wait_for_condition 50 100 {
[s 0 rdb_bgsave_in_progress] == 1
} else {
fail "rdb child didn't start"
}
# Slave1 disconnect with master
$slave1 slaveof no one
# Shouldn't kill child since another slave wait for rdb
after 100
assert {[s 0 rdb_bgsave_in_progress] == 1}
# Slave2 disconnect with master
$slave2 slaveof no one
# Should kill child
wait_for_condition 20 10 {
[s 0 rdb_bgsave_in_progress] eq 0
} else {
fail "can't kill rdb child"
}
# If have save parameters, won't kill child
$master config set save "900 1"
$slave1 slaveof $master_host $master_port
$slave2 slaveof $master_host $master_port
wait_for_condition 50 100 {
[s 0 rdb_bgsave_in_progress] == 1
} else {
fail "rdb child didn't start"
}
$slave1 slaveof no one
$slave2 slaveof no one
after 200
assert {[s 0 rdb_bgsave_in_progress] == 1}
catch {$master shutdown nosave}
}
}
}
}