From 8ef4f1dbad3c40dc90920c4aaa8a2f9f72637786 Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Tue, 14 Jun 2022 21:09:50 +0300 Subject: [PATCH] Script that made modification will not break with unexpected NOREPLICAS error (#10855) If a script made a modification and then was interrupted for taking too long. there's a chance redis will detect that a replica dropped and would like to reject write commands with NOREPLICAS due to insufficient good replicas. returning an error on a command in this case breaks the script atomicity. The same could in theory happen with READONLY, MISCONF, but i don't think these state changes can happen during script execution. --- src/script.c | 5 +++++ tests/unit/scripting.tcl | 46 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/src/script.c b/src/script.c index f12f3c8c6..a6d2e1a0c 100644 --- a/src/script.c +++ b/src/script.c @@ -363,6 +363,11 @@ static int scriptVerifyWriteCommandAllow(scriptRunCtx *run_ctx, char **err) { if (!(run_ctx->c->cmd->flags & CMD_WRITE)) return C_OK; + /* If the script already made a modification to the dataset, we can't + * fail it on unpredictable error state. */ + if ((run_ctx->flags & SCRIPT_WRITE_DIRTY)) + return C_OK; + /* Write commands are forbidden against read-only slaves, or if a * command marked as non-deterministic was already called in the context * of this script. */ diff --git a/tests/unit/scripting.tcl b/tests/unit/scripting.tcl index a91eef7e9..f1c05a4d4 100644 --- a/tests/unit/scripting.tcl +++ b/tests/unit/scripting.tcl @@ -1570,6 +1570,52 @@ start_server {tags {"scripting"}} { r config set min-replicas-to-write 0 } + test "not enough good replicas state change during long script" { + r set x "pre-script value" + r config set min-replicas-to-write 1 + r config set lua-time-limit 10 + start_server {tags {"external:skip"}} { + # add a replica and wait for the master to recognize it's online + r slaveof [srv -1 host] [srv -1 port] + wait_replica_online [srv -1 client] + + # run a slow script that does one write, then waits for INFO to indicate + # that the replica dropped, and then runs another write + set rd [redis_deferring_client -1] + $rd eval { + redis.call('set','x',"script value") + while true do + local info = redis.call('info','replication') + if (string.match(info, "connected_slaves:0")) then + redis.call('set','x',info) + break + end + end + return 1 + } 1 x + + # wait for the script to time out and yield + wait_for_condition 100 100 { + [catch {r -1 ping} e] == 1 + } else { + fail "Can't wait for script to start running" + } + catch {r -1 ping} e + assert_match {BUSY*} $e + + # cause the replica to disconnect (triggering the busy script to exit) + r slaveof no one + + # make sure the script was able to write after the replica dropped + assert_equal [$rd read] 1 + assert_match {*connected_slaves:0*} [r -1 get x] + + $rd close + } + r config set min-replicas-to-write 0 + r config set lua-time-limit 5000 + } {OK} {external:skip needs:repl} + test "allow-stale shebang flag" { r config set replica-serve-stale-data no r replicaof 127.0.0.1 1