Script that made modification will not break with unexpected NOREPLICAS error (#10855)

If a script made a modification and then was interrupted for taking too long.
there's a chance redis will detect that a replica dropped and would like to reject
write commands with NOREPLICAS due to insufficient good replicas.
returning an error on a command in this case breaks the script atomicity.

The same could in theory happen with READONLY, MISCONF, but i don't think
these state changes can happen during script execution.
This commit is contained in:
Oran Agra 2022-06-14 21:09:50 +03:00 committed by GitHub
parent ffa0077041
commit 8ef4f1dbad
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 51 additions and 0 deletions

View File

@ -363,6 +363,11 @@ static int scriptVerifyWriteCommandAllow(scriptRunCtx *run_ctx, char **err) {
if (!(run_ctx->c->cmd->flags & CMD_WRITE))
return C_OK;
/* If the script already made a modification to the dataset, we can't
* fail it on unpredictable error state. */
if ((run_ctx->flags & SCRIPT_WRITE_DIRTY))
return C_OK;
/* Write commands are forbidden against read-only slaves, or if a
* command marked as non-deterministic was already called in the context
* of this script. */

View File

@ -1570,6 +1570,52 @@ start_server {tags {"scripting"}} {
r config set min-replicas-to-write 0
}
test "not enough good replicas state change during long script" {
r set x "pre-script value"
r config set min-replicas-to-write 1
r config set lua-time-limit 10
start_server {tags {"external:skip"}} {
# add a replica and wait for the master to recognize it's online
r slaveof [srv -1 host] [srv -1 port]
wait_replica_online [srv -1 client]
# run a slow script that does one write, then waits for INFO to indicate
# that the replica dropped, and then runs another write
set rd [redis_deferring_client -1]
$rd eval {
redis.call('set','x',"script value")
while true do
local info = redis.call('info','replication')
if (string.match(info, "connected_slaves:0")) then
redis.call('set','x',info)
break
end
end
return 1
} 1 x
# wait for the script to time out and yield
wait_for_condition 100 100 {
[catch {r -1 ping} e] == 1
} else {
fail "Can't wait for script to start running"
}
catch {r -1 ping} e
assert_match {BUSY*} $e
# cause the replica to disconnect (triggering the busy script to exit)
r slaveof no one
# make sure the script was able to write after the replica dropped
assert_equal [$rd read] 1
assert_match {*connected_slaves:0*} [r -1 get x]
$rd close
}
r config set min-replicas-to-write 0
r config set lua-time-limit 5000
} {OK} {external:skip needs:repl}
test "allow-stale shebang flag" {
r config set replica-serve-stale-data no
r replicaof 127.0.0.1 1