Fix false reports in pg_visibility

Currently, pg_visibility computes its xid horizon using the
GetOldestNonRemovableTransactionId().  The problem is that this horizon can
sometimes go backward.  That can lead to reporting false errors.

In order to fix that, this commit implements a new function
GetStrictOldestNonRemovableTransactionId().  This function computes the xid
horizon, which would be guaranteed to be newer or equal to any xid horizon
computed before.

We have to do the following to achieve this.

1. Ignore processes xmin's, because they consider connection to other databases
   that were ignored before.
2. Ignore KnownAssignedXids, because they are not database-aware. At the same
   time, the primary could compute its horizons database-aware.
3. Ignore walsender xmin, because it could go backward if some replication
   connections don't use replication slots.

As a result, we're using only currently running xids to compute the horizon.
Surely these would significantly sacrifice accuracy.  But we have to do so to
avoid reporting false errors.

Inspired by earlier patch by Daniel Shelepanov and the following discussion
with Robert Haas and Tom Lane.

Discussion: https://postgr.es/m/1649062270.289865713%40f403.i.mail.ru
Reviewed-by: Alexander Lakhin, Dmitry Koval
This commit is contained in:
Alexander Korotkov 2024-03-14 13:08:53 +02:00
parent cc6e64afda
commit e85662df44
6 changed files with 129 additions and 6 deletions

View File

@ -11,6 +11,7 @@ DATA = pg_visibility--1.1.sql pg_visibility--1.1--1.2.sql \
PGFILEDESC = "pg_visibility - page visibility information"
REGRESS = pg_visibility
TAP_TESTS = 1
ifdef USE_PGXS
PG_CONFIG = pg_config

View File

@ -32,5 +32,9 @@ tests += {
'sql': [
'pg_visibility',
],
'tap': {
'tests': [
't/001_concurrent_transaction.pl',
],
},
}

View File

@ -19,6 +19,7 @@
#include "funcapi.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
#include "storage/proc.h"
#include "storage/procarray.h"
#include "storage/smgr.h"
#include "utils/rel.h"
@ -532,6 +533,63 @@ collect_visibility_data(Oid relid, bool include_pd)
return info;
}
/*
* The "strict" version of GetOldestNonRemovableTransactionId(). The
* pg_visibility check can tolerate false positives (don't report some of the
* errors), but can't tolerate false negatives (report false errors). Normally,
* horizons move forwards, but there are cases when it could move backward
* (see comment for ComputeXidHorizons()).
*
* This is why we have to implement our own function for xid horizon, which
* would be guaranteed to be newer or equal to any xid horizon computed before.
* We have to do the following to achieve this.
*
* 1. Ignore processes xmin's, because they consider connection to other
* databases that were ignored before.
* 2. Ignore KnownAssignedXids, because they are not database-aware. At the
* same time, the primary could compute its horizons database-aware.
* 3. Ignore walsender xmin, because it could go backward if some replication
* connections don't use replication slots.
*
* As a result, we're using only currently running xids to compute the horizon.
* Surely these would significantly sacrifice accuracy. But we have to do so
* to avoid reporting false errors.
*/
static TransactionId
GetStrictOldestNonRemovableTransactionId(Relation rel)
{
RunningTransactions runningTransactions;
if (rel == NULL || rel->rd_rel->relisshared || RecoveryInProgress())
{
/* Shared relation: take into account all running xids */
runningTransactions = GetRunningTransactionData();
LWLockRelease(ProcArrayLock);
LWLockRelease(XidGenLock);
return runningTransactions->oldestRunningXid;
}
else if (!RELATION_IS_LOCAL(rel))
{
/*
* Normal relation: take into account xids running within the current
* database
*/
runningTransactions = GetRunningTransactionData();
LWLockRelease(ProcArrayLock);
LWLockRelease(XidGenLock);
return runningTransactions->oldestDatabaseRunningXid;
}
else
{
/*
* For temporary relations, ComputeXidHorizons() uses only
* TransamVariables->latestCompletedXid and MyProc->xid. These two
* shouldn't go backwards. So we're fine with this horizon.
*/
return GetOldestNonRemovableTransactionId(rel);
}
}
/*
* Returns a list of items whose visibility map information does not match
* the status of the tuples on the page.
@ -563,7 +621,7 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
check_relation_relkind(rel);
if (all_visible)
OldestXmin = GetOldestNonRemovableTransactionId(rel);
OldestXmin = GetStrictOldestNonRemovableTransactionId(rel);
nblocks = RelationGetNumberOfBlocks(rel);
@ -671,11 +729,11 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
* retake ProcArrayLock here while we're holding the buffer
* exclusively locked, but it should be safe against
* deadlocks, because surely
* GetOldestNonRemovableTransactionId() should never take a
* buffer lock. And this shouldn't happen often, so it's worth
* being careful so as to avoid false positives.
* GetStrictOldestNonRemovableTransactionId() should never
* take a buffer lock. And this shouldn't happen often, so
* it's worth being careful so as to avoid false positives.
*/
RecomputedOldestXmin = GetOldestNonRemovableTransactionId(rel);
RecomputedOldestXmin = GetStrictOldestNonRemovableTransactionId(rel);
if (!TransactionIdPrecedes(OldestXmin, RecomputedOldestXmin))
record_corrupt_item(items, &tuple.t_self);

View File

@ -0,0 +1,47 @@
# Copyright (c) 2021-2024, PostgreSQL Global Development Group
# Check that a concurrent transaction doesn't cause false negatives in
# pg_check_visible() function
use strict;
use warnings FATAL => 'all';
use PostgreSQL::Test::Cluster;
use PostgreSQL::Test::Utils;
use Test::More;
my $node = PostgreSQL::Test::Cluster->new('main');
$node->init;
$node->start;
# Setup another database
$node->safe_psql("postgres", "CREATE DATABASE other_database;\n");
my $bsession = $node->background_psql('other_database');
# Run a concurrent transaction
$bsession->query_safe(
qq[
BEGIN;
SELECT txid_current();
]);
# Create a sample table and run vacuum
$node->safe_psql("postgres",
"CREATE EXTENSION pg_visibility;\n"
. "CREATE TABLE vacuum_test AS SELECT 42 i;\n"
. "VACUUM (disable_page_skipping) vacuum_test;");
# Run pg_check_visible()
my $result = $node->safe_psql("postgres",
"SELECT * FROM pg_check_visible('vacuum_test');");
# There should be no false negatives
ok($result eq "", "pg_check_visible() detects no errors");
# Shutdown
$bsession->query_safe("COMMIT;");
$bsession->quit;
$node->stop;
done_testing();

View File

@ -2688,6 +2688,7 @@ GetRunningTransactionData(void)
RunningTransactions CurrentRunningXacts = &CurrentRunningXactsData;
TransactionId latestCompletedXid;
TransactionId oldestRunningXid;
TransactionId oldestDatabaseRunningXid;
TransactionId *xids;
int index;
int count;
@ -2732,7 +2733,7 @@ GetRunningTransactionData(void)
latestCompletedXid =
XidFromFullTransactionId(TransamVariables->latestCompletedXid);
oldestRunningXid =
oldestDatabaseRunningXid = oldestRunningXid =
XidFromFullTransactionId(TransamVariables->nextXid);
/*
@ -2740,6 +2741,8 @@ GetRunningTransactionData(void)
*/
for (index = 0; index < arrayP->numProcs; index++)
{
int pgprocno = arrayP->pgprocnos[index];
PGPROC *proc = &allProcs[pgprocno];
TransactionId xid;
/* Fetch xid just once - see GetNewTransactionId */
@ -2760,6 +2763,13 @@ GetRunningTransactionData(void)
if (TransactionIdPrecedes(xid, oldestRunningXid))
oldestRunningXid = xid;
/*
* Also, update the oldest running xid within the current database.
*/
if (proc->databaseId == MyDatabaseId &&
TransactionIdPrecedes(xid, oldestRunningXid))
oldestDatabaseRunningXid = xid;
if (ProcGlobal->subxidStates[index].overflowed)
suboverflowed = true;
@ -2826,6 +2836,7 @@ GetRunningTransactionData(void)
CurrentRunningXacts->subxid_overflow = suboverflowed;
CurrentRunningXacts->nextXid = XidFromFullTransactionId(TransamVariables->nextXid);
CurrentRunningXacts->oldestRunningXid = oldestRunningXid;
CurrentRunningXacts->oldestDatabaseRunningXid = oldestDatabaseRunningXid;
CurrentRunningXacts->latestCompletedXid = latestCompletedXid;
Assert(TransactionIdIsValid(CurrentRunningXacts->nextXid));

View File

@ -82,6 +82,8 @@ typedef struct RunningTransactionsData
bool subxid_overflow; /* snapshot overflowed, subxids missing */
TransactionId nextXid; /* xid from TransamVariables->nextXid */
TransactionId oldestRunningXid; /* *not* oldestXmin */
TransactionId oldestDatabaseRunningXid; /* same as above, but within the
* current database */
TransactionId latestCompletedXid; /* so we can set xmax */
TransactionId *xids; /* array of (sub)xids still running */