From 1bb5794a1fa1527ea9ce181269f2c8c1714fc045 Mon Sep 17 00:00:00 2001
From: Wang Yuan <wangyuan21@baidu.com>
Date: Tue, 22 Sep 2020 14:47:58 +0800
Subject: [PATCH] Kill disk-based fork child when all replicas drop and 'save'
 is not enabled (#7819)

When all replicas waiting for a bgsave get disconnected (possibly due to output buffer limit),
It may be good to kill the bgsave child. in diskless replication it already happens, but in
disk-based, the child may still serve some purpose (for persistence).

By killing the child, we prevent it from eating COW memory in vain, and we also allow a new child fork sooner for the next full synchronization or bgsave.
We do that only if rdb persistence wasn't enabled in the configuration.

Btw, now, rdbRemoveTempFile in killRDBChild won't block server, so we can killRDBChild safely.
---
 src/networking.c                  | 34 +++++++++++++++++
 tests/integration/replication.tcl | 61 +++++++++++++++++++++++++++++++
 2 files changed, 95 insertions(+)

diff --git a/src/networking.c b/src/networking.c
index d3b0cd241..7ac8ed1c4 100644
--- a/src/networking.c
+++ b/src/networking.c
@@ -1063,6 +1063,25 @@ void disconnectSlaves(void) {
     }
 }
 
+/* Check if there is any other slave waiting dumping RDB finished expect me.
+ * This function is useful to judge current dumping RDB can be used for full
+ * synchronization or not. */
+int anyOtherSlaveWaitRdb(client *except_me) {
+    listIter li;
+    listNode *ln;
+
+    listRewind(server.slaves, &li);
+    while((ln = listNext(&li))) {
+        client *slave = ln->value;
+        if (slave != except_me &&
+            slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END)
+        {
+            return 1;
+        }
+    }
+    return 0;
+}
+
 /* Remove the specified client from global lists where the client could
  * be referenced, not including the Pub/Sub channels.
  * This is used by freeClient() and replicationCacheMaster(). */
@@ -1213,6 +1232,21 @@ void freeClient(client *c) {
     /* Master/slave cleanup Case 1:
      * we lost the connection with a slave. */
     if (c->flags & CLIENT_SLAVE) {
+        /* If there is no any other slave waiting dumping RDB finished, the
+         * current child process need not continue to dump RDB, then we kill it.
+         * So child process won't use more memory, and we also can fork a new
+         * child process asap to dump rdb for next full synchronization or bgsave.
+         * But we also need to check if users enable 'save' RDB, if enable, we
+         * should not remove directly since that means RDB is important for users
+         * to keep data safe and we may delay configured 'save' for full sync. */
+        if (server.saveparamslen == 0 &&
+            c->replstate == SLAVE_STATE_WAIT_BGSAVE_END &&
+            server.rdb_child_pid != -1 &&
+            server.rdb_child_type == RDB_CHILD_TYPE_DISK &&
+            anyOtherSlaveWaitRdb(c) == 0)
+        {
+            killRDBChild();
+        }
         if (c->replstate == SLAVE_STATE_SEND_BULK) {
             if (c->repldbfd != -1) close(c->repldbfd);
             if (c->replpreamble) sdsfree(c->replpreamble);
diff --git a/tests/integration/replication.tcl b/tests/integration/replication.tcl
index 5438b0eba..d2c8b8e56 100644
--- a/tests/integration/replication.tcl
+++ b/tests/integration/replication.tcl
@@ -752,3 +752,64 @@ test {replicaof right after disconnection} {
         }
     }
 }
+
+test {Kill rdb child process if its dumping RDB is not useful} {
+    start_server {tags {"repl"}} {
+        set slave1 [srv 0 client]
+        start_server {} {
+            set slave2 [srv 0 client]
+            start_server {} {
+                set master [srv 0 client]
+                set master_host [srv 0 host]
+                set master_port [srv 0 port]
+                for {set i 0} {$i < 10} {incr i} {
+                    $master set $i $i
+                }
+                # Generating RDB will cost 10s(10 * 1s)
+                $master config set rdb-key-save-delay 1000000
+                $master config set repl-diskless-sync no
+                $master config set save ""
+
+                $slave1 slaveof $master_host $master_port
+                $slave2 slaveof $master_host $master_port
+
+                # Wait for starting child
+                wait_for_condition 50 100 {
+                    [s 0 rdb_bgsave_in_progress] == 1
+                } else {
+                    fail "rdb child didn't start"
+                }
+
+                # Slave1 disconnect with master
+                $slave1 slaveof no one
+                # Shouldn't kill child since another slave wait for rdb
+                after 100
+                assert {[s 0 rdb_bgsave_in_progress] == 1}
+
+                # Slave2 disconnect with master
+                $slave2 slaveof no one
+                # Should kill child
+                wait_for_condition 20 10 {
+                    [s 0 rdb_bgsave_in_progress] eq 0
+                } else {
+                    fail "can't kill rdb child"
+                }
+
+                # If have save parameters, won't kill child
+                $master config set save "900 1"
+                $slave1 slaveof $master_host $master_port
+                $slave2 slaveof $master_host $master_port
+                wait_for_condition 50 100 {
+                    [s 0 rdb_bgsave_in_progress] == 1
+                } else {
+                    fail "rdb child didn't start"
+                }
+                $slave1 slaveof no one
+                $slave2 slaveof no one
+                after 200
+                assert {[s 0 rdb_bgsave_in_progress] == 1}
+                catch {$master shutdown nosave}
+            }
+        }
+    }
+}