Fix slot data persistency when advancing physical replication slots

Advancing a physical replication slot with pg_replication_slot_advance()
did not mark the slot as dirty if any advancing was done, preventing the
follow-up checkpoint to flush the slot data to disk.  This caused the
advancing to be lost even on clean restarts.  This does not happen for
logical slots as any advancing marked the slot as dirty.  Per
discussion, the original feature has been implemented so as in the event
of a crash the slot may move backwards to a past LSN.  This property is
kept and more documentation is added about that.

This commit adds some new TAP tests to check the persistency of physical
and logical slots after advancing across clean restarts.

Author: Alexey Kondratov, Michael Paquier
Reviewed-by: Andres Freund, Kyotaro Horiguchi, Craig Ringer
Discussion: https://postgr.es/m/059cc53a-8b14-653a-a24d-5f867503b0ee@postgrespro.ru
Backpatch-through: 11
This commit is contained in:
Michael Paquier 2020-01-30 11:14:02 +09:00
parent 26a81bb815
commit b0afdcad21
4 changed files with 67 additions and 16 deletions

View File

@ -20470,8 +20470,11 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup());
<entry>
Advances the current confirmed position of a replication slot named
<parameter>slot_name</parameter>. The slot will not be moved backwards,
and it will not be moved beyond the current insert location. Returns
name of the slot and real position to which it was advanced to.
and it will not be moved beyond the current insert location. Returns
the name of the slot and the real position to which it was advanced to.
The information of the updated slot is written out at the follow-up
checkpoint if any advancing is done. In the event of a crash, the
slot may return to an earlier position.
</entry>
</row>

View File

@ -370,6 +370,14 @@ pg_physical_replication_slot_advance(XLogRecPtr moveto)
MyReplicationSlot->data.restart_lsn = moveto;
SpinLockRelease(&MyReplicationSlot->mutex);
retlsn = moveto;
/*
* Dirty the slot so as it is written out at the next checkpoint.
* Note that the LSN position advanced may still be lost in the
* event of a crash, but this makes the data consistent after a
* clean shutdown.
*/
ReplicationSlotMarkDirty();
}
return retlsn;
@ -467,9 +475,9 @@ pg_logical_replication_slot_advance(XLogRecPtr moveto)
* keep track of their progress, so we should make more of an
* effort to save it for them.
*
* Dirty the slot so it's written out at the next checkpoint.
* We'll still lose its position on crash, as documented, but it's
* better than always losing the position even on clean restart.
* Dirty the slot so it is written out at the next checkpoint.
* The LSN position advanced to may still be lost on a crash
* but this makes the data consistent after a clean shutdown.
*/
ReplicationSlotMarkDirty();
}
@ -566,15 +574,6 @@ pg_replication_slot_advance(PG_FUNCTION_ARGS)
values[0] = NameGetDatum(&MyReplicationSlot->data.name);
nulls[0] = false;
/* Update the on disk state when lsn was updated. */
if (XLogRecPtrIsInvalid(endlsn))
{
ReplicationSlotMarkDirty();
ReplicationSlotsComputeRequiredXmin(false);
ReplicationSlotsComputeRequiredLSN();
ReplicationSlotSave();
}
ReplicationSlotRelease();
/* Return the reached position. */

View File

@ -3,7 +3,7 @@ use strict;
use warnings;
use PostgresNode;
use TestLib;
use Test::More tests => 32;
use Test::More tests => 34;
# Initialize master node
my $node_master = get_new_node('master');
@ -344,3 +344,28 @@ is($catalog_xmin, '',
is($xmin, '', 'xmin of cascaded slot null with hs feedback reset');
is($catalog_xmin, '',
'catalog xmin of cascaded slot still null with hs_feedback reset');
# Test physical slot advancing and its durability. Create a new slot on
# the primary, not used by any of the standbys. This reserves WAL at creation.
my $phys_slot = 'phys_slot';
$node_master->safe_psql('postgres',
"SELECT pg_create_physical_replication_slot('$phys_slot', true);");
$node_master->psql('postgres', "
CREATE TABLE tab_phys_slot (a int);
INSERT INTO tab_phys_slot VALUES (generate_series(1,10));");
my $current_lsn = $node_master->safe_psql('postgres',
"SELECT pg_current_wal_lsn();");
chomp($current_lsn);
my $psql_rc = $node_master->psql('postgres',
"SELECT pg_replication_slot_advance('$phys_slot', '$current_lsn'::pg_lsn);");
is($psql_rc, '0', 'slot advancing with physical slot');
my $phys_restart_lsn_pre = $node_master->safe_psql('postgres',
"SELECT restart_lsn from pg_replication_slots WHERE slot_name = '$phys_slot';");
chomp($phys_restart_lsn_pre);
# Slot advance should persist across clean restarts.
$node_master->restart;
my $phys_restart_lsn_post = $node_master->safe_psql('postgres',
"SELECT restart_lsn from pg_replication_slots WHERE slot_name = '$phys_slot';");
chomp($phys_restart_lsn_post);
ok(($phys_restart_lsn_pre cmp $phys_restart_lsn_post) == 0,
"physical slot advance persists across restarts");

View File

@ -7,7 +7,7 @@ use strict;
use warnings;
use PostgresNode;
use TestLib;
use Test::More tests => 10;
use Test::More tests => 12;
use Config;
# Initialize master node
@ -135,5 +135,29 @@ is($node_master->psql('postgres', 'DROP DATABASE otherdb'),
is($node_master->slot('otherdb_slot')->{'slot_name'},
undef, 'logical slot was actually dropped with DB');
# Test logical slot advancing and its durability.
my $logical_slot = 'logical_slot';
$node_master->safe_psql('postgres',
"SELECT pg_create_logical_replication_slot('$logical_slot', 'test_decoding', false);");
$node_master->psql('postgres', "
CREATE TABLE tab_logical_slot (a int);
INSERT INTO tab_logical_slot VALUES (generate_series(1,10));");
my $current_lsn = $node_master->safe_psql('postgres',
"SELECT pg_current_wal_lsn();");
chomp($current_lsn);
my $psql_rc = $node_master->psql('postgres',
"SELECT pg_replication_slot_advance('$logical_slot', '$current_lsn'::pg_lsn);");
is($psql_rc, '0', 'slot advancing with logical slot');
my $logical_restart_lsn_pre = $node_master->safe_psql('postgres',
"SELECT restart_lsn from pg_replication_slots WHERE slot_name = '$logical_slot';");
chomp($logical_restart_lsn_pre);
# Slot advance should persists across clean restarts.
$node_master->restart;
my $logical_restart_lsn_post = $node_master->safe_psql('postgres',
"SELECT restart_lsn from pg_replication_slots WHERE slot_name = '$logical_slot';");
chomp($logical_restart_lsn_post);
ok(($logical_restart_lsn_pre cmp $logical_restart_lsn_post) == 0,
"logical slot advance persists across restarts");
# done with the node
$node_master->stop;