WAITAOF: Update fsynced_reploff_pending even if there's nothing to fsync (#12622)

The problem is that WAITAOF could have hang in case commands were
propagated only to replicas.
This can happen if a module uses RM_Call with the REDISMODULE_ARGV_NO_AOF flag.
In that case, master_repl_offset would increase, but there would be nothing to fsync, so
in the absence of other traffic, fsynced_reploff_pending would stay the static, and WAITAOF can hang.

This commit updates fsynced_reploff_pending to the latest offset in flushAppendOnlyFile in case
there's nothing to fsync. i.e. in case it's behind because of the above mentions case it'll be refreshed
and release the WAITAOF.

Other changes:
Fix a race in wait.tcl (client getting blocked vs. the fsync thread)
This commit is contained in:
guybe7 2023-09-28 16:19:20 +02:00 committed by GitHub
parent bfa3931a04
commit c2a4b78491
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 40 additions and 1 deletions

View File

@ -1087,6 +1087,13 @@ void flushAppendOnlyFile(int force) {
{
goto try_fsync;
} else {
/* All data is fsync'd already: Update fsynced_reploff_pending just in case.
* This is needed to avoid a WAITAOF hang in case a module used RM_Call with the NO_AOF flag,
* in which case master_repl_offset will increase but fsynced_reploff_pending won't be updated
* (because there's no reason, from the AOF POV, to call fsync) and then WAITAOF may wait on
* the higher offset (which contains data that was only propagated to replicas, and not to AOF) */
if (!sync_in_progress && server.aof_fsync != AOF_FSYNC_NO)
atomicSet(server.fsynced_reploff_pending, server.master_repl_offset);
return;
}
}

View File

@ -133,4 +133,36 @@ start_server {tags {"modules usercall"}} {
assert_equal [dict get $entry reason] {command}
assert_match {*cmd=usercall.call_with_user_flag*} [dict get $entry client-info]
}
start_server {tags {"wait aof network external:skip"}} {
set slave [srv 0 client]
set slave_host [srv 0 host]
set slave_port [srv 0 port]
set slave_pid [srv 0 pid]
set master [srv -1 client]
set master_host [srv -1 host]
set master_port [srv -1 port]
$master config set appendonly yes
$master config set appendfsync everysec
$slave config set appendonly yes
$slave config set appendfsync everysec
test {Setup slave} {
$slave slaveof $master_host $master_port
wait_for_condition 50 100 {
[s 0 master_link_status] eq {up}
} else {
fail "Replication not started."
}
}
test {test module replicate only to replicas and WAITAOF} {
$master set x 1
assert_equal [$master waitaof 1 1 10000] {1 1}
$master usercall.call_with_user_flag A! config set loglevel notice
# Make sure WAITAOF doesn't hang
assert_equal [$master waitaof 1 1 10000] {1 1}
}
}
}

View File

@ -121,10 +121,10 @@ tags {"wait aof network external:skip"} {
r config set appendfsync always
$master incr foo
assert_equal [$master waitaof 1 0 0] {1 0}
r config set appendfsync everysec
}
test {WAITAOF local wait and then stop aof} {
r config set appendfsync no
set rd [redis_deferring_client]
$rd incr foo
$rd read