Refactor heap_prune_chain()

Keep track of the number of deleted tuples in PruneState and record this
information when recording a tuple dead, unused or redirected. This
removes a special case from the traversal and chain processing logic as
well as setting a precedent of recording the impact of prune actions in
the record functions themselves. This paradigm will be used in future
commits which move tracking of additional statistics on pruning actions
from lazy_scan_prune() to heap_prune_chain().

Simplify heap_prune_chain()'s chain traversal logic by handling each
case explicitly. That is, do not attempt to share code when processing
different types of chains. For each category of chain, process it
specifically and procedurally: first handling the root, then any
intervening tuples, and, finally, the end of the chain.

While we are at it, add a few new comments to heap_prune_chain()
clarifying some special cases involving RECENTLY_DEAD tuples.

Author: Melanie Plageman <melanieplageman@gmail.com>
Discussion: https://www.postgresql.org/message-id/20240330055710.kqg6ii2cdojsxgje@liskov
This commit is contained in:
Heikki Linnakangas 2024-04-01 13:28:44 +03:00
parent 9917e79d99
commit 7aa00f1360
1 changed files with 120 additions and 95 deletions

View File

@ -51,20 +51,22 @@ typedef struct
* 1. Otherwise every access would need to subtract 1.
*/
bool marked[MaxHeapTuplesPerPage + 1];
int ndeleted; /* Number of tuples deleted from the page */
} PruneState;
/* Local functions */
static HTSV_Result heap_prune_satisfies_vacuum(PruneState *prstate,
HeapTuple tup,
Buffer buffer);
static int heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
static void heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
OffsetNumber rootoffnum, int8 *htsv, PruneState *prstate);
static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid);
static void heap_prune_record_redirect(PruneState *prstate,
OffsetNumber offnum, OffsetNumber rdoffnum);
static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum);
static void heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum);
static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum);
OffsetNumber offnum, OffsetNumber rdoffnum, bool was_normal);
static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum, bool was_normal);
static void heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum, bool was_normal);
static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum, bool was_normal);
static void page_verify_redirects(Page page);
@ -241,6 +243,7 @@ heap_page_prune(Relation relation, Buffer buffer,
prstate.snapshotConflictHorizon = InvalidTransactionId;
prstate.nredirected = prstate.ndead = prstate.nunused = 0;
memset(prstate.marked, 0, sizeof(prstate.marked));
prstate.ndeleted = 0;
/*
* presult->htsv is not initialized here because all ntuple spots in the
@ -321,8 +324,8 @@ heap_page_prune(Relation relation, Buffer buffer,
continue;
/* Process this item or chain of items */
presult->ndeleted += heap_prune_chain(page, blockno, maxoff, offnum,
presult->htsv, &prstate);
heap_prune_chain(page, blockno, maxoff,
offnum, presult->htsv, &prstate);
}
/* Clear the offset information once we have processed the given page. */
@ -394,8 +397,9 @@ heap_page_prune(Relation relation, Buffer buffer,
END_CRIT_SECTION();
/* Record number of newly-set-LP_DEAD items for caller */
/* Copy information back for caller */
presult->nnewlpdead = prstate.ndead;
presult->ndeleted = prstate.ndeleted;
}
@ -444,22 +448,23 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
* to the redirected[] array (two entries per redirection); items to be set to
* LP_DEAD state are added to nowdead[]; and items to be set to LP_UNUSED
* state are added to nowunused[].
*
* Returns the number of tuples (to be) deleted from the page.
*/
static int
static void
heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
OffsetNumber rootoffnum, int8 *htsv, PruneState *prstate)
{
int ndeleted = 0;
TransactionId priorXmax = InvalidTransactionId;
ItemId rootlp;
HeapTupleHeader htup;
OffsetNumber latestdead = InvalidOffsetNumber,
offnum;
OffsetNumber offnum;
OffsetNumber chainitems[MaxHeapTuplesPerPage];
int nchain = 0,
i;
/*
* After traversing the HOT chain, ndeadchain is the index in chainitems
* of the first live successor after the last dead item.
*/
int ndeadchain = 0,
nchain = 0;
rootlp = PageGetItemId(page, rootoffnum);
@ -494,14 +499,12 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
if (htsv[rootoffnum] == HEAPTUPLE_DEAD &&
!HeapTupleHeaderIsHotUpdated(htup))
{
heap_prune_record_unused(prstate, rootoffnum);
heap_prune_record_unused(prstate, rootoffnum, true);
HeapTupleHeaderAdvanceConflictHorizon(htup,
&prstate->snapshotConflictHorizon);
ndeleted++;
}
/* Nothing more to do */
return ndeleted;
return;
}
}
@ -512,8 +515,6 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
for (;;)
{
ItemId lp;
bool tupdead,
recent_dead;
/* Sanity check (pure paranoia) */
if (offnum < FirstOffsetNumber)
@ -563,7 +564,7 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
* the LP was already marked dead.
*/
if (unlikely(prstate->mark_unused_now))
heap_prune_record_unused(prstate, offnum);
heap_prune_record_unused(prstate, offnum, false);
break;
}
@ -586,23 +587,41 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
/*
* Check tuple's visibility status.
*/
tupdead = recent_dead = false;
switch (htsv_get_valid_status(htsv[offnum]))
{
case HEAPTUPLE_DEAD:
tupdead = true;
/* Remember the last DEAD tuple seen */
ndeadchain = nchain;
HeapTupleHeaderAdvanceConflictHorizon(htup,
&prstate->snapshotConflictHorizon);
/* Advance to next chain member */
break;
case HEAPTUPLE_RECENTLY_DEAD:
recent_dead = true;
/*
* This tuple may soon become DEAD. Update the hint field so
* that the page is reconsidered for pruning in future.
*
* We don't need to advance the conflict horizon for
* RECENTLY_DEAD tuples, even if we are removing them. This
* is because we only remove RECENTLY_DEAD tuples if they
* precede a DEAD tuple, and the DEAD tuple must have been
* inserted by a newer transaction than the RECENTLY_DEAD
* tuple by virtue of being later in the chain. We will have
* advanced the conflict horizon for the DEAD tuple.
*/
heap_prune_record_prunable(prstate,
HeapTupleHeaderGetUpdateXid(htup));
/*
* Advance past RECENTLY_DEAD tuples just in case there's a
* DEAD one after them. We have to make sure that we don't
* miss any DEAD tuples, since DEAD tuples that still have
* tuple storage after pruning will confuse VACUUM.
*/
break;
case HEAPTUPLE_DELETE_IN_PROGRESS:
@ -613,7 +632,7 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
*/
heap_prune_record_prunable(prstate,
HeapTupleHeaderGetUpdateXid(htup));
break;
goto process_chain;
case HEAPTUPLE_LIVE:
case HEAPTUPLE_INSERT_IN_PROGRESS:
@ -624,35 +643,19 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
* But we don't. See related decisions about when to mark the
* page prunable in heapam.c.
*/
break;
goto process_chain;
default:
elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
break;
goto process_chain;
}
/*
* Remember the last DEAD tuple seen. We will advance past
* RECENTLY_DEAD tuples just in case there's a DEAD one after them;
* but we can't advance past anything else. We have to make sure that
* we don't miss any DEAD tuples, since DEAD tuples that still have
* tuple storage after pruning will confuse VACUUM.
*/
if (tupdead)
{
latestdead = offnum;
HeapTupleHeaderAdvanceConflictHorizon(htup,
&prstate->snapshotConflictHorizon);
}
else if (!recent_dead)
break;
/*
* If the tuple is not HOT-updated, then we are at the end of this
* HOT-update chain.
*/
if (!HeapTupleHeaderIsHotUpdated(htup))
break;
goto process_chain;
/* HOT implies it can't have moved to different partition */
Assert(!HeapTupleHeaderIndicatesMovedPartitions(htup));
@ -665,57 +668,52 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
priorXmax = HeapTupleHeaderGetUpdateXid(htup);
}
/*
* If we found a DEAD tuple in the chain, adjust the HOT chain so that all
* the DEAD tuples at the start of the chain are removed and the root line
* pointer is appropriately redirected.
*/
if (OffsetNumberIsValid(latestdead))
{
/*
* Mark as unused each intermediate item that we are able to remove
* from the chain.
*
* When the previous item is the last dead tuple seen, we are at the
* right candidate for redirection.
*/
for (i = 1; (i < nchain) && (chainitems[i - 1] != latestdead); i++)
{
heap_prune_record_unused(prstate, chainitems[i]);
ndeleted++;
}
/*
* If the root entry had been a normal tuple, we are deleting it, so
* count it in the result. But changing a redirect (even to DEAD
* state) doesn't count.
*/
if (ItemIdIsNormal(rootlp))
ndeleted++;
/*
* If the DEAD tuple is at the end of the chain, the entire chain is
* dead and the root line pointer can be marked dead. Otherwise just
* redirect the root to the correct chain member.
*/
if (i >= nchain)
heap_prune_record_dead_or_unused(prstate, rootoffnum);
else
heap_prune_record_redirect(prstate, rootoffnum, chainitems[i]);
}
else if (nchain < 2 && ItemIdIsRedirected(rootlp))
if (ItemIdIsRedirected(rootlp) && nchain < 2)
{
/*
* We found a redirect item that doesn't point to a valid follow-on
* item. This can happen if the loop in heap_page_prune caused us to
* visit the dead successor of a redirect item before visiting the
* redirect item. We can clean up by setting the redirect item to
* DEAD state or LP_UNUSED if the caller indicated.
* LP_DEAD state or LP_UNUSED if the caller indicated.
*/
heap_prune_record_dead_or_unused(prstate, rootoffnum);
heap_prune_record_dead_or_unused(prstate, rootoffnum, false);
return;
}
return ndeleted;
process_chain:
if (ndeadchain == 0)
{
/*
* No DEAD tuple was found, so the chain is entirely composed of
* normal, unchanged tuples. Leave it alone.
*/
}
else if (ndeadchain == nchain)
{
/*
* The entire chain is dead. Mark the root line pointer LP_DEAD, and
* fully remove the other tuples in the chain.
*/
heap_prune_record_dead_or_unused(prstate, rootoffnum, ItemIdIsNormal(rootlp));
for (int i = 1; i < nchain; i++)
heap_prune_record_unused(prstate, chainitems[i], true);
}
else
{
/*
* We found a DEAD tuple in the chain. Redirect the root line pointer
* to the first non-DEAD tuple, and mark as unused each intermediate
* item that we are able to remove from the chain.
*/
heap_prune_record_redirect(prstate, rootoffnum, chainitems[ndeadchain],
ItemIdIsNormal(rootlp));
for (int i = 1; i < ndeadchain; i++)
heap_prune_record_unused(prstate, chainitems[i], true);
/* the rest of tuples in the chain are normal, unchanged tuples */
}
}
/* Record lowest soon-prunable XID */
@ -735,7 +733,8 @@ heap_prune_record_prunable(PruneState *prstate, TransactionId xid)
/* Record line pointer to be redirected */
static void
heap_prune_record_redirect(PruneState *prstate,
OffsetNumber offnum, OffsetNumber rdoffnum)
OffsetNumber offnum, OffsetNumber rdoffnum,
bool was_normal)
{
Assert(prstate->nredirected < MaxHeapTuplesPerPage);
prstate->redirected[prstate->nredirected * 2] = offnum;
@ -745,17 +744,34 @@ heap_prune_record_redirect(PruneState *prstate,
prstate->marked[offnum] = true;
Assert(!prstate->marked[rdoffnum]);
prstate->marked[rdoffnum] = true;
/*
* If the root entry had been a normal tuple, we are deleting it, so count
* it in the result. But changing a redirect (even to DEAD state) doesn't
* count.
*/
if (was_normal)
prstate->ndeleted++;
}
/* Record line pointer to be marked dead */
static void
heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum)
heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum,
bool was_normal)
{
Assert(prstate->ndead < MaxHeapTuplesPerPage);
prstate->nowdead[prstate->ndead] = offnum;
prstate->ndead++;
Assert(!prstate->marked[offnum]);
prstate->marked[offnum] = true;
/*
* If the root entry had been a normal tuple, we are deleting it, so count
* it in the result. But changing a redirect (even to DEAD state) doesn't
* count.
*/
if (was_normal)
prstate->ndeleted++;
}
/*
@ -765,7 +781,8 @@ heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum)
* pointers LP_DEAD if mark_unused_now is true.
*/
static void
heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum)
heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum,
bool was_normal)
{
/*
* If the caller set mark_unused_now to true, we can remove dead tuples
@ -774,20 +791,28 @@ heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum)
* likely.
*/
if (unlikely(prstate->mark_unused_now))
heap_prune_record_unused(prstate, offnum);
heap_prune_record_unused(prstate, offnum, was_normal);
else
heap_prune_record_dead(prstate, offnum);
heap_prune_record_dead(prstate, offnum, was_normal);
}
/* Record line pointer to be marked unused */
static void
heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum)
heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum, bool was_normal)
{
Assert(prstate->nunused < MaxHeapTuplesPerPage);
prstate->nowunused[prstate->nunused] = offnum;
prstate->nunused++;
Assert(!prstate->marked[offnum]);
prstate->marked[offnum] = true;
/*
* If the root entry had been a normal tuple, we are deleting it, so count
* it in the result. But changing a redirect (even to DEAD state) doesn't
* count.
*/
if (was_normal)
prstate->ndeleted++;
}