Allow locking updated tuples in tuple_update() and tuple_delete()

Currently, in read committed transaction isolation mode (default), we have the
following sequence of actions when tuple_update()/tuple_delete() finds
the tuple updated by the concurrent transaction.

1. Attempt to update/delete tuple with tuple_update()/tuple_delete(), which
   returns TM_Updated.
2. Lock tuple with tuple_lock().
3. Re-evaluate plan qual (recheck if we still need to update/delete and
   calculate the new tuple for update).
4. Second attempt to update/delete tuple with tuple_update()/tuple_delete().
   This attempt should be successful, since the tuple was previously locked.

This commit eliminates step 2 by taking the lock during the first
tuple_update()/tuple_delete() call.  The heap table access method saves some
effort by checking the updated tuple once instead of twice.  Future
undo-based table access methods, which will start from the latest row version,
can immediately place a lock there.

Also, this commit makes tuple_update()/tuple_delete() optionally save the old
tuple into the dedicated slot.  That saves efforts on re-fetching tuples in
certain cases.

The code in nodeModifyTable.c is simplified by removing the nested switch/case.

Discussion: https://postgr.es/m/CAPpHfdua-YFw3XTprfutzGp28xXLigFtzNbuFY8yPhqeq6X5kg%40mail.gmail.com
Reviewed-by: Aleksander Alekseev, Pavel Borisov, Vignesh C, Mason Sharp
Reviewed-by: Andres Freund, Chris Travers
This commit is contained in:
Alexander Korotkov 2024-03-26 01:27:56 +02:00
parent c7076ba6ad
commit 87985cc925
9 changed files with 502 additions and 346 deletions

View File

@ -2496,10 +2496,11 @@ xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
}
/*
* heap_delete - delete a tuple
* heap_delete - delete a tuple, optionally fetching it into a slot
*
* See table_tuple_delete() for an explanation of the parameters, except that
* this routine directly takes a tuple rather than a slot.
* this routine directly takes a tuple rather than a slot. Also, we don't
* place a lock on the tuple in this function, just fetch the existing version.
*
* In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
* t_xmax (resolving a possible MultiXact, if necessary), and t_cmax (the last
@ -2508,8 +2509,9 @@ xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
*/
TM_Result
heap_delete(Relation relation, ItemPointer tid,
CommandId cid, Snapshot crosscheck, bool wait,
TM_FailureData *tmfd, bool changingPart)
CommandId cid, Snapshot crosscheck, int options,
TM_FailureData *tmfd, bool changingPart,
TupleTableSlot *oldSlot)
{
TM_Result result;
TransactionId xid = GetCurrentTransactionId();
@ -2587,7 +2589,7 @@ l1:
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("attempted to delete invisible tuple")));
}
else if (result == TM_BeingModified && wait)
else if (result == TM_BeingModified && (options & TABLE_MODIFY_WAIT))
{
TransactionId xwait;
uint16 infomask;
@ -2728,7 +2730,30 @@ l1:
tmfd->cmax = HeapTupleHeaderGetCmax(tp.t_data);
else
tmfd->cmax = InvalidCommandId;
UnlockReleaseBuffer(buffer);
/*
* If we're asked to lock the updated tuple, we just fetch the
* existing tuple. That let's the caller save some resources on
* placing the lock.
*/
if (result == TM_Updated &&
(options & TABLE_MODIFY_LOCK_UPDATED))
{
BufferHeapTupleTableSlot *bslot;
Assert(TTS_IS_BUFFERTUPLE(oldSlot));
bslot = (BufferHeapTupleTableSlot *) oldSlot;
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
bslot->base.tupdata = tp;
ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata,
oldSlot,
buffer);
}
else
{
UnlockReleaseBuffer(buffer);
}
if (have_tuple_lock)
UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
if (vmbuffer != InvalidBuffer)
@ -2902,8 +2927,24 @@ l1:
*/
CacheInvalidateHeapTuple(relation, &tp, NULL);
/* Now we can release the buffer */
ReleaseBuffer(buffer);
/* Fetch the old tuple version if we're asked for that. */
if (options & TABLE_MODIFY_FETCH_OLD_TUPLE)
{
BufferHeapTupleTableSlot *bslot;
Assert(TTS_IS_BUFFERTUPLE(oldSlot));
bslot = (BufferHeapTupleTableSlot *) oldSlot;
bslot->base.tupdata = tp;
ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata,
oldSlot,
buffer);
}
else
{
/* Now we can release the buffer */
ReleaseBuffer(buffer);
}
/*
* Release the lmgr tuple lock, if we had it.
@ -2935,8 +2976,8 @@ simple_heap_delete(Relation relation, ItemPointer tid)
result = heap_delete(relation, tid,
GetCurrentCommandId(true), InvalidSnapshot,
true /* wait for commit */ ,
&tmfd, false /* changingPart */ );
TABLE_MODIFY_WAIT /* wait for commit */ ,
&tmfd, false /* changingPart */ , NULL);
switch (result)
{
case TM_SelfModified:
@ -2963,10 +3004,11 @@ simple_heap_delete(Relation relation, ItemPointer tid)
}
/*
* heap_update - replace a tuple
* heap_update - replace a tuple, optionally fetching it into a slot
*
* See table_tuple_update() for an explanation of the parameters, except that
* this routine directly takes a tuple rather than a slot.
* this routine directly takes a tuple rather than a slot. Also, we don't
* place a lock on the tuple in this function, just fetch the existing version.
*
* In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
* t_xmax (resolving a possible MultiXact, if necessary), and t_cmax (the last
@ -2975,9 +3017,9 @@ simple_heap_delete(Relation relation, ItemPointer tid)
*/
TM_Result
heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
CommandId cid, Snapshot crosscheck, bool wait,
CommandId cid, Snapshot crosscheck, int options,
TM_FailureData *tmfd, LockTupleMode *lockmode,
TU_UpdateIndexes *update_indexes)
TU_UpdateIndexes *update_indexes, TupleTableSlot *oldSlot)
{
TM_Result result;
TransactionId xid = GetCurrentTransactionId();
@ -3154,7 +3196,7 @@ l2:
result = HeapTupleSatisfiesUpdate(&oldtup, cid, buffer);
/* see below about the "no wait" case */
Assert(result != TM_BeingModified || wait);
Assert(result != TM_BeingModified || (options & TABLE_MODIFY_WAIT));
if (result == TM_Invisible)
{
@ -3163,7 +3205,7 @@ l2:
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("attempted to update invisible tuple")));
}
else if (result == TM_BeingModified && wait)
else if (result == TM_BeingModified && (options & TABLE_MODIFY_WAIT))
{
TransactionId xwait;
uint16 infomask;
@ -3367,7 +3409,30 @@ l2:
tmfd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data);
else
tmfd->cmax = InvalidCommandId;
UnlockReleaseBuffer(buffer);
/*
* If we're asked to lock the updated tuple, we just fetch the
* existing tuple. That let's the caller save some resouces on
* placing the lock.
*/
if (result == TM_Updated &&
(options & TABLE_MODIFY_LOCK_UPDATED))
{
BufferHeapTupleTableSlot *bslot;
Assert(TTS_IS_BUFFERTUPLE(oldSlot));
bslot = (BufferHeapTupleTableSlot *) oldSlot;
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
bslot->base.tupdata = oldtup;
ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata,
oldSlot,
buffer);
}
else
{
UnlockReleaseBuffer(buffer);
}
if (have_tuple_lock)
UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
if (vmbuffer != InvalidBuffer)
@ -3846,7 +3911,26 @@ l2:
/* Now we can release the buffer(s) */
if (newbuf != buffer)
ReleaseBuffer(newbuf);
ReleaseBuffer(buffer);
/* Fetch the old tuple version if we're asked for that. */
if (options & TABLE_MODIFY_FETCH_OLD_TUPLE)
{
BufferHeapTupleTableSlot *bslot;
Assert(TTS_IS_BUFFERTUPLE(oldSlot));
bslot = (BufferHeapTupleTableSlot *) oldSlot;
bslot->base.tupdata = oldtup;
ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata,
oldSlot,
buffer);
}
else
{
/* Now we can release the buffer */
ReleaseBuffer(buffer);
}
if (BufferIsValid(vmbuffer_new))
ReleaseBuffer(vmbuffer_new);
if (BufferIsValid(vmbuffer))
@ -4054,8 +4138,8 @@ simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup,
result = heap_update(relation, otid, tup,
GetCurrentCommandId(true), InvalidSnapshot,
true /* wait for commit */ ,
&tmfd, &lockmode, update_indexes);
TABLE_MODIFY_WAIT /* wait for commit */ ,
&tmfd, &lockmode, update_indexes, NULL);
switch (result)
{
case TM_SelfModified:
@ -4118,12 +4202,14 @@ get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
* tuples.
*
* Output parameters:
* *tuple: all fields filled in
* *buffer: set to buffer holding tuple (pinned but not locked at exit)
* *slot: BufferHeapTupleTableSlot filled with tuple
* *tmfd: filled in failure cases (see below)
*
* Function results are the same as the ones for table_tuple_lock().
*
* If *slot already contains the target tuple, it takes advantage on that by
* skipping the ReadBuffer() call.
*
* In the failure cases other than TM_Invisible, the routine fills
* *tmfd with the tuple's t_ctid, t_xmax (resolving a possible MultiXact,
* if necessary), and t_cmax (the last only for TM_SelfModified,
@ -4134,15 +4220,14 @@ get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
* See README.tuplock for a thorough explanation of this mechanism.
*/
TM_Result
heap_lock_tuple(Relation relation, HeapTuple tuple,
heap_lock_tuple(Relation relation, ItemPointer tid, TupleTableSlot *slot,
CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy,
bool follow_updates,
Buffer *buffer, TM_FailureData *tmfd)
bool follow_updates, TM_FailureData *tmfd)
{
TM_Result result;
ItemPointer tid = &(tuple->t_self);
ItemId lp;
Page page;
Buffer buffer;
Buffer vmbuffer = InvalidBuffer;
BlockNumber block;
TransactionId xid,
@ -4154,8 +4239,24 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
bool skip_tuple_lock = false;
bool have_tuple_lock = false;
bool cleared_all_frozen = false;
BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
HeapTuple tuple = &bslot->base.tupdata;
*buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
Assert(TTS_IS_BUFFERTUPLE(slot));
/* Take advantage if slot already contains the relevant tuple */
if (!TTS_EMPTY(slot) &&
slot->tts_tableOid == relation->rd_id &&
ItemPointerCompare(&slot->tts_tid, tid) == 0 &&
BufferIsValid(bslot->buffer))
{
buffer = bslot->buffer;
IncrBufferRefCount(buffer);
}
else
{
buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
}
block = ItemPointerGetBlockNumber(tid);
/*
@ -4164,21 +4265,22 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
* in the middle of changing this, so we'll need to recheck after we have
* the lock.
*/
if (PageIsAllVisible(BufferGetPage(*buffer)))
if (PageIsAllVisible(BufferGetPage(buffer)))
visibilitymap_pin(relation, block, &vmbuffer);
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
page = BufferGetPage(*buffer);
page = BufferGetPage(buffer);
lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
Assert(ItemIdIsNormal(lp));
tuple->t_self = *tid;
tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
tuple->t_len = ItemIdGetLength(lp);
tuple->t_tableOid = RelationGetRelid(relation);
l3:
result = HeapTupleSatisfiesUpdate(tuple, cid, *buffer);
result = HeapTupleSatisfiesUpdate(tuple, cid, buffer);
if (result == TM_Invisible)
{
@ -4207,7 +4309,7 @@ l3:
infomask2 = tuple->t_data->t_infomask2;
ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid);
LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
/*
* If any subtransaction of the current top transaction already holds
@ -4359,12 +4461,12 @@ l3:
{
result = res;
/* recovery code expects to have buffer lock held */
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
goto failed;
}
}
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
/*
* Make sure it's still an appropriate lock, else start over.
@ -4399,7 +4501,7 @@ l3:
if (HEAP_XMAX_IS_LOCKED_ONLY(infomask) &&
!HEAP_XMAX_IS_EXCL_LOCKED(infomask))
{
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
/*
* Make sure it's still an appropriate lock, else start over.
@ -4427,7 +4529,7 @@ l3:
* No conflict, but if the xmax changed under us in the
* meantime, start over.
*/
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
!TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data),
xwait))
@ -4439,7 +4541,7 @@ l3:
}
else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask))
{
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
/* if the xmax changed in the meantime, start over */
if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
@ -4467,7 +4569,7 @@ l3:
TransactionIdIsCurrentTransactionId(xwait))
{
/* ... but if the xmax changed in the meantime, start over */
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
!TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data),
xwait))
@ -4489,7 +4591,7 @@ l3:
*/
if (require_sleep && (result == TM_Updated || result == TM_Deleted))
{
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
goto failed;
}
else if (require_sleep)
@ -4514,7 +4616,7 @@ l3:
*/
result = TM_WouldBlock;
/* recovery code expects to have buffer lock held */
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
goto failed;
}
@ -4540,7 +4642,7 @@ l3:
{
result = TM_WouldBlock;
/* recovery code expects to have buffer lock held */
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
goto failed;
}
break;
@ -4580,7 +4682,7 @@ l3:
{
result = TM_WouldBlock;
/* recovery code expects to have buffer lock held */
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
goto failed;
}
break;
@ -4606,12 +4708,12 @@ l3:
{
result = res;
/* recovery code expects to have buffer lock held */
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
goto failed;
}
}
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
/*
* xwait is done, but if xwait had just locked the tuple then some
@ -4633,7 +4735,7 @@ l3:
* don't check for this in the multixact case, because some
* locker transactions might still be running.
*/
UpdateXmaxHintBits(tuple->t_data, *buffer, xwait);
UpdateXmaxHintBits(tuple->t_data, buffer, xwait);
}
}
@ -4692,9 +4794,9 @@ failed:
*/
if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
{
LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
visibilitymap_pin(relation, block, &vmbuffer);
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
goto l3;
}
@ -4757,7 +4859,7 @@ failed:
cleared_all_frozen = true;
MarkBufferDirty(*buffer);
MarkBufferDirty(buffer);
/*
* XLOG stuff. You might think that we don't need an XLOG record because
@ -4777,7 +4879,7 @@ failed:
XLogRecPtr recptr;
XLogBeginInsert();
XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD);
XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
xlrec.xmax = xid;
@ -4798,7 +4900,7 @@ failed:
result = TM_Ok;
out_locked:
LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
out_unlocked:
if (BufferIsValid(vmbuffer))
@ -4816,6 +4918,9 @@ out_unlocked:
if (have_tuple_lock)
UnlockTupleTuplock(relation, tid, mode);
/* Put the target tuple to the slot */
ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
return result;
}

View File

@ -45,6 +45,12 @@
#include "utils/builtins.h"
#include "utils/rel.h"
static TM_Result heapam_tuple_lock(Relation relation, ItemPointer tid,
Snapshot snapshot, TupleTableSlot *slot,
CommandId cid, LockTupleMode mode,
LockWaitPolicy wait_policy, uint8 flags,
TM_FailureData *tmfd);
static void reform_and_rewrite_tuple(HeapTuple tuple,
Relation OldHeap, Relation NewHeap,
Datum *values, bool *isnull, RewriteState rwstate);
@ -300,23 +306,55 @@ heapam_tuple_complete_speculative(Relation relation, TupleTableSlot *slot,
static TM_Result
heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid,
Snapshot snapshot, Snapshot crosscheck, bool wait,
TM_FailureData *tmfd, bool changingPart)
Snapshot snapshot, Snapshot crosscheck, int options,
TM_FailureData *tmfd, bool changingPart,
TupleTableSlot *oldSlot)
{
TM_Result result;
/*
* Currently Deleting of index tuples are handled at vacuum, in case if
* the storage itself is cleaning the dead tuples by itself, it is the
* time to call the index tuple deletion also.
*/
return heap_delete(relation, tid, cid, crosscheck, wait, tmfd, changingPart);
result = heap_delete(relation, tid, cid, crosscheck, options,
tmfd, changingPart, oldSlot);
/*
* If the tuple has been concurrently updated, then get the lock on it.
* (Do only if caller asked for this by setting the
* TABLE_MODIFY_LOCK_UPDATED option) With the lock held retry of the
* delete should succeed even if there are more concurrent update
* attempts.
*/
if (result == TM_Updated && (options & TABLE_MODIFY_LOCK_UPDATED))
{
/*
* heapam_tuple_lock() will take advantage of tuple loaded into
* oldSlot by heap_delete().
*/
result = heapam_tuple_lock(relation, tid, snapshot,
oldSlot, cid, LockTupleExclusive,
(options & TABLE_MODIFY_WAIT) ?
LockWaitBlock :
LockWaitSkip,
TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
tmfd);
if (result == TM_Ok)
return TM_Updated;
}
return result;
}
static TM_Result
heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
CommandId cid, Snapshot snapshot, Snapshot crosscheck,
bool wait, TM_FailureData *tmfd,
LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
int options, TM_FailureData *tmfd,
LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes,
TupleTableSlot *oldSlot)
{
bool shouldFree = true;
HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
@ -326,8 +364,8 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
slot->tts_tableOid = RelationGetRelid(relation);
tuple->t_tableOid = slot->tts_tableOid;
result = heap_update(relation, otid, tuple, cid, crosscheck, wait,
tmfd, lockmode, update_indexes);
result = heap_update(relation, otid, tuple, cid, crosscheck, options,
tmfd, lockmode, update_indexes, oldSlot);
ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
/*
@ -354,6 +392,31 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
if (shouldFree)
pfree(tuple);
/*
* If the tuple has been concurrently updated, then get the lock on it.
* (Do only if caller asked for this by setting the
* TABLE_MODIFY_LOCK_UPDATED option) With the lock held retry of the
* update should succeed even if there are more concurrent update
* attempts.
*/
if (result == TM_Updated && (options & TABLE_MODIFY_LOCK_UPDATED))
{
/*
* heapam_tuple_lock() will take advantage of tuple loaded into
* oldSlot by heap_update().
*/
result = heapam_tuple_lock(relation, otid, snapshot,
oldSlot, cid, *lockmode,
(options & TABLE_MODIFY_WAIT) ?
LockWaitBlock :
LockWaitSkip,
TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
tmfd);
if (result == TM_Ok)
return TM_Updated;
}
return result;
}
@ -365,7 +428,6 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
{
BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
TM_Result result;
Buffer buffer;
HeapTuple tuple = &bslot->base.tupdata;
bool follow_updates;
@ -375,9 +437,8 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
Assert(TTS_IS_BUFFERTUPLE(slot));
tuple_lock_retry:
tuple->t_self = *tid;
result = heap_lock_tuple(relation, tuple, cid, mode, wait_policy,
follow_updates, &buffer, tmfd);
result = heap_lock_tuple(relation, tid, slot, cid, mode, wait_policy,
follow_updates, tmfd);
if (result == TM_Updated &&
(flags & TUPLE_LOCK_FLAG_FIND_LAST_VERSION))
@ -385,8 +446,6 @@ tuple_lock_retry:
/* Should not encounter speculative tuple on recheck */
Assert(!HeapTupleHeaderIsSpeculative(tuple->t_data));
ReleaseBuffer(buffer);
if (!ItemPointerEquals(&tmfd->ctid, &tuple->t_self))
{
SnapshotData SnapshotDirty;
@ -408,6 +467,8 @@ tuple_lock_retry:
InitDirtySnapshot(SnapshotDirty);
for (;;)
{
Buffer buffer = InvalidBuffer;
if (ItemPointerIndicatesMovedPartitions(tid))
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
@ -502,7 +563,7 @@ tuple_lock_retry:
/*
* This is a live tuple, so try to lock it again.
*/
ReleaseBuffer(buffer);
ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
goto tuple_lock_retry;
}
@ -513,7 +574,7 @@ tuple_lock_retry:
*/
if (tuple->t_data == NULL)
{
Assert(!BufferIsValid(buffer));
ReleaseBuffer(buffer);
return TM_Deleted;
}
@ -566,9 +627,6 @@ tuple_lock_retry:
slot->tts_tableOid = RelationGetRelid(relation);
tuple->t_tableOid = slot->tts_tableOid;
/* store in slot, transferring existing pin */
ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
return result;
}

View File

@ -287,16 +287,23 @@ simple_table_tuple_insert(Relation rel, TupleTableSlot *slot)
* via ereport().
*/
void
simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot)
simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot,
TupleTableSlot *oldSlot)
{
TM_Result result;
TM_FailureData tmfd;
int options = TABLE_MODIFY_WAIT; /* wait for commit */
/* Fetch old tuple if the relevant slot is provided */
if (oldSlot)
options |= TABLE_MODIFY_FETCH_OLD_TUPLE;
result = table_tuple_delete(rel, tid,
GetCurrentCommandId(true),
snapshot, InvalidSnapshot,
true /* wait for commit */ ,
&tmfd, false /* changingPart */ );
options,
&tmfd, false /* changingPart */ ,
oldSlot);
switch (result)
{
@ -335,17 +342,24 @@ void
simple_table_tuple_update(Relation rel, ItemPointer otid,
TupleTableSlot *slot,
Snapshot snapshot,
TU_UpdateIndexes *update_indexes)
TU_UpdateIndexes *update_indexes,
TupleTableSlot *oldSlot)
{
TM_Result result;
TM_FailureData tmfd;
LockTupleMode lockmode;
int options = TABLE_MODIFY_WAIT; /* wait for commit */
/* Fetch old tuple if the relevant slot is provided */
if (oldSlot)
options |= TABLE_MODIFY_FETCH_OLD_TUPLE;
result = table_tuple_update(rel, otid, slot,
GetCurrentCommandId(true),
snapshot, InvalidSnapshot,
true /* wait for commit */ ,
&tmfd, &lockmode, update_indexes);
options,
&tmfd, &lockmode, update_indexes,
oldSlot);
switch (result)
{

View File

@ -2773,8 +2773,8 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
void
ExecARDeleteTriggers(EState *estate,
ResultRelInfo *relinfo,
ItemPointer tupleid,
HeapTuple fdw_trigtuple,
TupleTableSlot *slot,
TransitionCaptureState *transition_capture,
bool is_crosspart_update)
{
@ -2783,20 +2783,11 @@ ExecARDeleteTriggers(EState *estate,
if ((trigdesc && trigdesc->trig_delete_after_row) ||
(transition_capture && transition_capture->tcs_delete_old_table))
{
TupleTableSlot *slot = ExecGetTriggerOldSlot(estate, relinfo);
Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid));
if (fdw_trigtuple == NULL)
GetTupleForTrigger(estate,
NULL,
relinfo,
tupleid,
LockTupleExclusive,
slot,
NULL,
NULL,
NULL);
else
/*
* Put the FDW old tuple to the slot. Otherwise, caller is expected
* to have old tuple alredy fetched to the slot.
*/
if (fdw_trigtuple != NULL)
ExecForceStoreHeapTuple(fdw_trigtuple, slot, false);
AfterTriggerSaveEvent(estate, relinfo, NULL, NULL,
@ -3087,18 +3078,17 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
* Note: 'src_partinfo' and 'dst_partinfo', when non-NULL, refer to the source
* and destination partitions, respectively, of a cross-partition update of
* the root partitioned table mentioned in the query, given by 'relinfo'.
* 'tupleid' in that case refers to the ctid of the "old" tuple in the source
* partition, and 'newslot' contains the "new" tuple in the destination
* partition. This interface allows to support the requirements of
* ExecCrossPartitionUpdateForeignKey(); is_crosspart_update must be true in
* that case.
* 'oldslot' contains the "old" tuple in the source partition, and 'newslot'
* contains the "new" tuple in the destination partition. This interface
* allows to support the requirements of ExecCrossPartitionUpdateForeignKey();
* is_crosspart_update must be true in that case.
*/
void
ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
ResultRelInfo *src_partinfo,
ResultRelInfo *dst_partinfo,
ItemPointer tupleid,
HeapTuple fdw_trigtuple,
TupleTableSlot *oldslot,
TupleTableSlot *newslot,
List *recheckIndexes,
TransitionCaptureState *transition_capture,
@ -3117,29 +3107,14 @@ ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
* separately for DELETE and INSERT to capture transition table rows.
* In such case, either old tuple or new tuple can be NULL.
*/
TupleTableSlot *oldslot;
ResultRelInfo *tupsrc;
Assert((src_partinfo != NULL && dst_partinfo != NULL) ||
!is_crosspart_update);
tupsrc = src_partinfo ? src_partinfo : relinfo;
oldslot = ExecGetTriggerOldSlot(estate, tupsrc);
if (fdw_trigtuple == NULL && ItemPointerIsValid(tupleid))
GetTupleForTrigger(estate,
NULL,
tupsrc,
tupleid,
LockTupleExclusive,
oldslot,
NULL,
NULL,
NULL);
else if (fdw_trigtuple != NULL)
if (fdw_trigtuple != NULL)
{
Assert(oldslot);
ExecForceStoreHeapTuple(fdw_trigtuple, oldslot, false);
else
ExecClearTuple(oldslot);
}
AfterTriggerSaveEvent(estate, relinfo,
src_partinfo, dst_partinfo,

View File

@ -577,6 +577,7 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
{
List *recheckIndexes = NIL;
TU_UpdateIndexes update_indexes;
TupleTableSlot *oldSlot = NULL;
/* Compute stored generated columns */
if (rel->rd_att->constr &&
@ -590,8 +591,12 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
if (rel->rd_rel->relispartition)
ExecPartitionCheck(resultRelInfo, slot, estate, true);
if (resultRelInfo->ri_TrigDesc &&
resultRelInfo->ri_TrigDesc->trig_update_after_row)
oldSlot = ExecGetTriggerOldSlot(estate, resultRelInfo);
simple_table_tuple_update(rel, tid, slot, estate->es_snapshot,
&update_indexes);
&update_indexes, oldSlot);
if (resultRelInfo->ri_NumIndices > 0 && (update_indexes != TU_None))
recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
@ -602,7 +607,7 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
/* AFTER ROW UPDATE Triggers */
ExecARUpdateTriggers(estate, resultRelInfo,
NULL, NULL,
tid, NULL, slot,
NULL, oldSlot, slot,
recheckIndexes, NULL, false);
list_free(recheckIndexes);
@ -636,12 +641,18 @@ ExecSimpleRelationDelete(ResultRelInfo *resultRelInfo,
if (!skip_tuple)
{
TupleTableSlot *oldSlot = NULL;
if (resultRelInfo->ri_TrigDesc &&
resultRelInfo->ri_TrigDesc->trig_delete_after_row)
oldSlot = ExecGetTriggerOldSlot(estate, resultRelInfo);
/* OK, delete the tuple */
simple_table_tuple_delete(rel, tid, estate->es_snapshot);
simple_table_tuple_delete(rel, tid, estate->es_snapshot, oldSlot);
/* AFTER ROW DELETE Triggers */
ExecARDeleteTriggers(estate, resultRelInfo,
tid, NULL, NULL, false);
NULL, oldSlot, NULL, false);
}
}

View File

@ -566,6 +566,15 @@ ExecInitInsertProjection(ModifyTableState *mtstate,
table_slot_create(resultRelInfo->ri_RelationDesc,
&estate->es_tupleTable);
/*
* In the ON CONFLICT UPDATE case, we will also need a slot for the old
* tuple to calculate the updated tuple on its base.
*/
if (node->onConflictAction == ONCONFLICT_UPDATE)
resultRelInfo->ri_oldTupleSlot =
table_slot_create(resultRelInfo->ri_RelationDesc,
&estate->es_tupleTable);
/* Build ProjectionInfo if needed (it probably isn't). */
if (need_projection)
{
@ -1154,7 +1163,7 @@ ExecInsert(ModifyTableContext *context,
ExecARUpdateTriggers(estate, resultRelInfo,
NULL, NULL,
NULL,
NULL,
resultRelInfo->ri_oldTupleSlot,
slot,
NULL,
mtstate->mt_transition_capture,
@ -1334,7 +1343,8 @@ ExecDeletePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
*/
static TM_Result
ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
ItemPointer tupleid, bool changingPart)
ItemPointer tupleid, bool changingPart, int options,
TupleTableSlot *oldSlot)
{
EState *estate = context->estate;
@ -1342,9 +1352,10 @@ ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
estate->es_output_cid,
estate->es_snapshot,
estate->es_crosscheck_snapshot,
true /* wait for commit */ ,
options,
&context->tmfd,
changingPart);
changingPart,
oldSlot);
}
/*
@ -1353,10 +1364,15 @@ ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
* Closing steps of tuple deletion; this invokes AFTER FOR EACH ROW triggers,
* including the UPDATE triggers if the deletion is being done as part of a
* cross-partition tuple move.
*
* The old tuple is already fetched into slot for regular tables. For FDW,
* the old tuple is given as 'oldtuple' and is to be stored in 'slot' when
* needed.
*/
static void
ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
ItemPointer tupleid, HeapTuple oldtuple, bool changingPart)
ItemPointer tupleid, HeapTuple oldtuple,
TupleTableSlot *slot, bool changingPart)
{
ModifyTableState *mtstate = context->mtstate;
EState *estate = context->estate;
@ -1374,8 +1390,8 @@ ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
{
ExecARUpdateTriggers(estate, resultRelInfo,
NULL, NULL,
tupleid, oldtuple,
NULL, NULL, mtstate->mt_transition_capture,
oldtuple,
slot, NULL, NULL, mtstate->mt_transition_capture,
false);
/*
@ -1386,10 +1402,30 @@ ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
}
/* AFTER ROW DELETE Triggers */
ExecARDeleteTriggers(estate, resultRelInfo, tupleid, oldtuple,
ExecARDeleteTriggers(estate, resultRelInfo, oldtuple, slot,
ar_delete_trig_tcs, changingPart);
}
/*
* Initializes the tuple slot in a ResultRelInfo for DELETE action.
*
* We mark 'projectNewInfoValid' even though the projections themselves
* are not initialized here.
*/
static void
ExecInitDeleteTupleSlot(ModifyTableState *mtstate,
ResultRelInfo *resultRelInfo)
{
EState *estate = mtstate->ps.state;
Assert(!resultRelInfo->ri_projectNewInfoValid);
resultRelInfo->ri_oldTupleSlot =
table_slot_create(resultRelInfo->ri_RelationDesc,
&estate->es_tupleTable);
resultRelInfo->ri_projectNewInfoValid = true;
}
/* ----------------------------------------------------------------
* ExecDelete
*
@ -1409,7 +1445,8 @@ ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
* part of an UPDATE of partition-key, then the slot returned by
* EvalPlanQual() is passed back using output parameter epqreturnslot.
*
* Returns RETURNING result if any, otherwise NULL.
* Returns RETURNING result if any, otherwise NULL. The deleted tuple
* to be stored into oldslot independently that.
* ----------------------------------------------------------------
*/
static TupleTableSlot *
@ -1417,6 +1454,7 @@ ExecDelete(ModifyTableContext *context,
ResultRelInfo *resultRelInfo,
ItemPointer tupleid,
HeapTuple oldtuple,
TupleTableSlot *oldslot,
bool processReturning,
bool changingPart,
bool canSetTag,
@ -1480,6 +1518,15 @@ ExecDelete(ModifyTableContext *context,
}
else
{
int options = TABLE_MODIFY_WAIT | TABLE_MODIFY_FETCH_OLD_TUPLE;
/*
* Specify that we need to lock and fetch the last tuple version for
* EPQ on appropriate transaction isolation levels.
*/
if (!IsolationUsesXactSnapshot())
options |= TABLE_MODIFY_LOCK_UPDATED;
/*
* delete the tuple
*
@ -1490,7 +1537,8 @@ ExecDelete(ModifyTableContext *context,
* transaction-snapshot mode transactions.
*/
ldelete:
result = ExecDeleteAct(context, resultRelInfo, tupleid, changingPart);
result = ExecDeleteAct(context, resultRelInfo, tupleid, changingPart,
options, oldslot);
if (tmresult)
*tmresult = result;
@ -1537,7 +1585,6 @@ ldelete:
case TM_Updated:
{
TupleTableSlot *inputslot;
TupleTableSlot *epqslot;
if (IsolationUsesXactSnapshot())
@ -1546,87 +1593,29 @@ ldelete:
errmsg("could not serialize access due to concurrent update")));
/*
* Already know that we're going to need to do EPQ, so
* fetch tuple directly into the right slot.
* We need to do EPQ. The latest tuple is already found
* and locked as a result of TABLE_MODIFY_LOCK_UPDATED.
*/
EvalPlanQualBegin(context->epqstate);
inputslot = EvalPlanQualSlot(context->epqstate, resultRelationDesc,
resultRelInfo->ri_RangeTableIndex);
Assert(context->tmfd.traversed);
epqslot = EvalPlanQual(context->epqstate,
resultRelationDesc,
resultRelInfo->ri_RangeTableIndex,
oldslot);
if (TupIsNull(epqslot))
/* Tuple not passing quals anymore, exiting... */
return NULL;
result = table_tuple_lock(resultRelationDesc, tupleid,
estate->es_snapshot,
inputslot, estate->es_output_cid,
LockTupleExclusive, LockWaitBlock,
TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
&context->tmfd);
switch (result)
/*
* If requested, skip delete and pass back the updated
* row.
*/
if (epqreturnslot)
{
case TM_Ok:
Assert(context->tmfd.traversed);
epqslot = EvalPlanQual(context->epqstate,
resultRelationDesc,
resultRelInfo->ri_RangeTableIndex,
inputslot);
if (TupIsNull(epqslot))
/* Tuple not passing quals anymore, exiting... */
return NULL;
/*
* If requested, skip delete and pass back the
* updated row.
*/
if (epqreturnslot)
{
*epqreturnslot = epqslot;
return NULL;
}
else
goto ldelete;
case TM_SelfModified:
/*
* This can be reached when following an update
* chain from a tuple updated by another session,
* reaching a tuple that was already updated in
* this transaction. If previously updated by this
* command, ignore the delete, otherwise error
* out.
*
* See also TM_SelfModified response to
* table_tuple_delete() above.
*/
if (context->tmfd.cmax != estate->es_output_cid)
ereport(ERROR,
(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
errmsg("tuple to be deleted was already modified by an operation triggered by the current command"),
errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
return NULL;
case TM_Deleted:
/* tuple already deleted; nothing to do */
return NULL;
default:
/*
* TM_Invisible should be impossible because we're
* waiting for updated row versions, and would
* already have errored out if the first version
* is invisible.
*
* TM_Updated should be impossible, because we're
* locking the latest version via
* TUPLE_LOCK_FLAG_FIND_LAST_VERSION.
*/
elog(ERROR, "unexpected table_tuple_lock status: %u",
result);
return NULL;
*epqreturnslot = epqslot;
return NULL;
}
Assert(false);
break;
else
goto ldelete;
}
case TM_Deleted:
@ -1660,7 +1649,8 @@ ldelete:
if (tupleDeleted)
*tupleDeleted = true;
ExecDeleteEpilogue(context, resultRelInfo, tupleid, oldtuple, changingPart);
ExecDeleteEpilogue(context, resultRelInfo, tupleid, oldtuple,
oldslot, changingPart);
/* Process RETURNING if present and if requested */
if (processReturning && resultRelInfo->ri_projectReturning)
@ -1678,17 +1668,13 @@ ldelete:
}
else
{
/* Copy old tuple to the returning slot */
slot = ExecGetReturningSlot(estate, resultRelInfo);
if (oldtuple != NULL)
{
ExecForceStoreHeapTuple(oldtuple, slot, false);
}
else
{
if (!table_tuple_fetch_row_version(resultRelationDesc, tupleid,
SnapshotAny, slot))
elog(ERROR, "failed to fetch deleted tuple for DELETE RETURNING");
}
ExecCopySlot(slot, oldslot);
Assert(!TupIsNull(slot));
}
rslot = ExecProcessReturning(resultRelInfo, slot, context->planSlot);
@ -1788,12 +1774,19 @@ ExecCrossPartitionUpdate(ModifyTableContext *context,
MemoryContextSwitchTo(oldcxt);
}
/*
* Make sure ri_oldTupleSlot is initialized. The old tuple is to be saved
* there by ExecDelete() to save effort on further re-fetching.
*/
if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
ExecInitUpdateProjection(mtstate, resultRelInfo);
/*
* Row movement, part 1. Delete the tuple, but skip RETURNING processing.
* We want to return rows from INSERT.
*/
ExecDelete(context, resultRelInfo,
tupleid, oldtuple,
tupleid, oldtuple, resultRelInfo->ri_oldTupleSlot,
false, /* processReturning */
true, /* changingPart */
false, /* canSetTag */
@ -1834,21 +1827,13 @@ ExecCrossPartitionUpdate(ModifyTableContext *context,
return true;
else
{
/* Fetch the most recent version of old tuple. */
TupleTableSlot *oldSlot;
/* ... but first, make sure ri_oldTupleSlot is initialized. */
if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
ExecInitUpdateProjection(mtstate, resultRelInfo);
oldSlot = resultRelInfo->ri_oldTupleSlot;
if (!table_tuple_fetch_row_version(resultRelInfo->ri_RelationDesc,
tupleid,
SnapshotAny,
oldSlot))
elog(ERROR, "failed to fetch tuple being updated");
/* and project the new tuple to retry the UPDATE with */
/*
* ExecDelete already fetches the most recent version of old tuple
* to resultRelInfo->ri_oldTupleSlot. So, just project the new
* tuple to retry the UPDATE with.
*/
*retry_slot = ExecGetUpdateNewTuple(resultRelInfo, epqslot,
oldSlot);
resultRelInfo->ri_oldTupleSlot);
return false;
}
}
@ -1967,7 +1952,8 @@ ExecUpdatePrepareSlot(ResultRelInfo *resultRelInfo,
static TM_Result
ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
bool canSetTag, UpdateContext *updateCxt)
bool canSetTag, int options, TupleTableSlot *oldSlot,
UpdateContext *updateCxt)
{
EState *estate = context->estate;
Relation resultRelationDesc = resultRelInfo->ri_RelationDesc;
@ -2059,7 +2045,8 @@ lreplace:
ExecCrossPartitionUpdateForeignKey(context,
resultRelInfo,
insert_destrel,
tupleid, slot,
tupleid,
resultRelInfo->ri_oldTupleSlot,
inserted_tuple);
return TM_Ok;
@ -2102,9 +2089,10 @@ lreplace:
estate->es_output_cid,
estate->es_snapshot,
estate->es_crosscheck_snapshot,
true /* wait for commit */ ,
options /* wait for commit */ ,
&context->tmfd, &updateCxt->lockmode,
&updateCxt->updateIndexes);
&updateCxt->updateIndexes,
oldSlot);
return result;
}
@ -2118,7 +2106,8 @@ lreplace:
static void
ExecUpdateEpilogue(ModifyTableContext *context, UpdateContext *updateCxt,
ResultRelInfo *resultRelInfo, ItemPointer tupleid,
HeapTuple oldtuple, TupleTableSlot *slot)
HeapTuple oldtuple, TupleTableSlot *slot,
TupleTableSlot *oldslot)
{
ModifyTableState *mtstate = context->mtstate;
List *recheckIndexes = NIL;
@ -2134,7 +2123,7 @@ ExecUpdateEpilogue(ModifyTableContext *context, UpdateContext *updateCxt,
/* AFTER ROW UPDATE Triggers */
ExecARUpdateTriggers(context->estate, resultRelInfo,
NULL, NULL,
tupleid, oldtuple, slot,
oldtuple, oldslot, slot,
recheckIndexes,
mtstate->operation == CMD_INSERT ?
mtstate->mt_oc_transition_capture :
@ -2223,7 +2212,7 @@ ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
/* Perform the root table's triggers. */
ExecARUpdateTriggers(context->estate,
rootRelInfo, sourcePartInfo, destPartInfo,
tupleid, NULL, newslot, NIL, NULL, true);
NULL, oldslot, newslot, NIL, NULL, true);
}
/* ----------------------------------------------------------------
@ -2246,6 +2235,7 @@ ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
* no relevant triggers.
*
* slot contains the new tuple value to be stored.
* oldslot is the slot to store the old tuple.
* planSlot is the output of the ModifyTable's subplan; we use it
* to access values from other input tables (for RETURNING),
* row-ID junk columns, etc.
@ -2256,7 +2246,7 @@ ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
static TupleTableSlot *
ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
bool canSetTag)
TupleTableSlot *oldslot, bool canSetTag, bool locked)
{
EState *estate = context->estate;
Relation resultRelationDesc = resultRelInfo->ri_RelationDesc;
@ -2309,6 +2299,16 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
}
else
{
int options = TABLE_MODIFY_WAIT | TABLE_MODIFY_FETCH_OLD_TUPLE;
/*
* Specify that we need to lock and fetch the last tuple version for
* EPQ on appropriate transaction isolation levels if the tuple isn't
* locked already.
*/
if (!locked && !IsolationUsesXactSnapshot())
options |= TABLE_MODIFY_LOCK_UPDATED;
/*
* If we generate a new candidate tuple after EvalPlanQual testing, we
* must loop back here to try again. (We don't need to redo triggers,
@ -2318,7 +2318,7 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
*/
redo_act:
result = ExecUpdateAct(context, resultRelInfo, tupleid, oldtuple, slot,
canSetTag, &updateCxt);
canSetTag, options, oldslot, &updateCxt);
/*
* If ExecUpdateAct reports that a cross-partition update was done,
@ -2369,88 +2369,32 @@ redo_act:
case TM_Updated:
{
TupleTableSlot *inputslot;
TupleTableSlot *epqslot;
TupleTableSlot *oldSlot;
if (IsolationUsesXactSnapshot())
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update")));
/* Shouldn't get there if the tuple was previously locked */
Assert(!locked);
/*
* Already know that we're going to need to do EPQ, so
* fetch tuple directly into the right slot.
* We need to do EPQ. The latest tuple is already found
* and locked as a result of TABLE_MODIFY_LOCK_UPDATED.
*/
inputslot = EvalPlanQualSlot(context->epqstate, resultRelationDesc,
resultRelInfo->ri_RangeTableIndex);
result = table_tuple_lock(resultRelationDesc, tupleid,
estate->es_snapshot,
inputslot, estate->es_output_cid,
updateCxt.lockmode, LockWaitBlock,
TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
&context->tmfd);
switch (result)
{
case TM_Ok:
Assert(context->tmfd.traversed);
epqslot = EvalPlanQual(context->epqstate,
resultRelationDesc,
resultRelInfo->ri_RangeTableIndex,
inputslot);
if (TupIsNull(epqslot))
/* Tuple not passing quals anymore, exiting... */
return NULL;
/* Make sure ri_oldTupleSlot is initialized. */
if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
ExecInitUpdateProjection(context->mtstate,
resultRelInfo);
/* Fetch the most recent version of old tuple. */
oldSlot = resultRelInfo->ri_oldTupleSlot;
if (!table_tuple_fetch_row_version(resultRelationDesc,
tupleid,
SnapshotAny,
oldSlot))
elog(ERROR, "failed to fetch tuple being updated");
slot = ExecGetUpdateNewTuple(resultRelInfo,
epqslot, oldSlot);
goto redo_act;
case TM_Deleted:
/* tuple already deleted; nothing to do */
return NULL;
case TM_SelfModified:
/*
* This can be reached when following an update
* chain from a tuple updated by another session,
* reaching a tuple that was already updated in
* this transaction. If previously modified by
* this command, ignore the redundant update,
* otherwise error out.
*
* See also TM_SelfModified response to
* table_tuple_update() above.
*/
if (context->tmfd.cmax != estate->es_output_cid)
ereport(ERROR,
(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
errmsg("tuple to be updated was already modified by an operation triggered by the current command"),
errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
return NULL;
default:
/* see table_tuple_lock call in ExecDelete() */
elog(ERROR, "unexpected table_tuple_lock status: %u",
result);
return NULL;
}
Assert(context->tmfd.traversed);
epqslot = EvalPlanQual(context->epqstate,
resultRelationDesc,
resultRelInfo->ri_RangeTableIndex,
oldslot);
if (TupIsNull(epqslot))
/* Tuple not passing quals anymore, exiting... */
return NULL;
slot = ExecGetUpdateNewTuple(resultRelInfo,
epqslot,
oldslot);
goto redo_act;
}
break;
@ -2474,7 +2418,7 @@ redo_act:
(estate->es_processed)++;
ExecUpdateEpilogue(context, &updateCxt, resultRelInfo, tupleid, oldtuple,
slot);
slot, oldslot);
/* Process RETURNING if present */
if (resultRelInfo->ri_projectReturning)
@ -2692,7 +2636,8 @@ ExecOnConflictUpdate(ModifyTableContext *context,
*returning = ExecUpdate(context, resultRelInfo,
conflictTid, NULL,
resultRelInfo->ri_onConflict->oc_ProjSlot,
canSetTag);
existing,
canSetTag, true);
/*
* Clear out existing tuple, as there might not be another conflict among
@ -2934,6 +2879,7 @@ lmerge_matched:
{
result = ExecUpdateAct(context, resultRelInfo, tupleid,
NULL, newslot, canSetTag,
TABLE_MODIFY_WAIT, NULL,
&updateCxt);
/*
@ -2956,7 +2902,8 @@ lmerge_matched:
if (result == TM_Ok)
{
ExecUpdateEpilogue(context, &updateCxt, resultRelInfo,
tupleid, NULL, newslot);
tupleid, NULL, newslot,
resultRelInfo->ri_oldTupleSlot);
mtstate->mt_merge_updated += 1;
}
break;
@ -2987,12 +2934,12 @@ lmerge_matched:
}
else
result = ExecDeleteAct(context, resultRelInfo, tupleid,
false);
false, TABLE_MODIFY_WAIT, NULL);
if (result == TM_Ok)
{
ExecDeleteEpilogue(context, resultRelInfo, tupleid, NULL,
false);
resultRelInfo->ri_oldTupleSlot, false);
mtstate->mt_merge_deleted += 1;
}
break;
@ -4006,12 +3953,18 @@ ExecModifyTable(PlanState *pstate)
/* Now apply the update. */
slot = ExecUpdate(&context, resultRelInfo, tupleid, oldtuple,
slot, node->canSetTag);
slot, resultRelInfo->ri_oldTupleSlot,
node->canSetTag, false);
break;
case CMD_DELETE:
/* Initialize slot for DELETE to fetch the old tuple */
if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
ExecInitDeleteTupleSlot(node, resultRelInfo);
slot = ExecDelete(&context, resultRelInfo, tupleid, oldtuple,
true, false, node->canSetTag, NULL, NULL, NULL);
resultRelInfo->ri_oldTupleSlot, true, false,
node->canSetTag, NULL, NULL, NULL);
break;
case CMD_MERGE:

View File

@ -284,19 +284,22 @@ extern void heap_multi_insert(Relation relation, struct TupleTableSlot **slots,
int ntuples, CommandId cid, int options,
BulkInsertState bistate);
extern TM_Result heap_delete(Relation relation, ItemPointer tid,
CommandId cid, Snapshot crosscheck, bool wait,
struct TM_FailureData *tmfd, bool changingPart);
CommandId cid, Snapshot crosscheck, int options,
struct TM_FailureData *tmfd, bool changingPart,
TupleTableSlot *oldSlot);
extern void heap_finish_speculative(Relation relation, ItemPointer tid);
extern void heap_abort_speculative(Relation relation, ItemPointer tid);
extern TM_Result heap_update(Relation relation, ItemPointer otid,
HeapTuple newtup,
CommandId cid, Snapshot crosscheck, bool wait,
CommandId cid, Snapshot crosscheck, int options,
struct TM_FailureData *tmfd, LockTupleMode *lockmode,
TU_UpdateIndexes *update_indexes);
extern TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple,
CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy,
bool follow_updates,
Buffer *buffer, struct TM_FailureData *tmfd);
TU_UpdateIndexes *update_indexes,
TupleTableSlot *oldSlot);
extern TM_Result heap_lock_tuple(Relation relation, ItemPointer tid,
TupleTableSlot *slot,
CommandId cid, LockTupleMode mode,
LockWaitPolicy wait_policy, bool follow_updates,
struct TM_FailureData *tmfd);
extern void heap_inplace_update(Relation relation, HeapTuple tuple);
extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple,

View File

@ -259,6 +259,15 @@ typedef struct TM_IndexDeleteOp
/* Follow update chain and lock latest version of tuple */
#define TUPLE_LOCK_FLAG_FIND_LAST_VERSION (1 << 1)
/*
* "options" flag bits for table_tuple_update and table_tuple_delete,
* Wait for any conflicting update to commit/abort */
#define TABLE_MODIFY_WAIT 0x0001
/* Fetch the existing tuple into a dedicated slot */
#define TABLE_MODIFY_FETCH_OLD_TUPLE 0x0002
/* On concurrent update, follow the update chain and lock latest version of tuple */
#define TABLE_MODIFY_LOCK_UPDATED 0x0004
/* Typedef for callback function for table_index_build_scan */
typedef void (*IndexBuildCallback) (Relation index,
@ -528,9 +537,10 @@ typedef struct TableAmRoutine
CommandId cid,
Snapshot snapshot,
Snapshot crosscheck,
bool wait,
int options,
TM_FailureData *tmfd,
bool changingPart);
bool changingPart,
TupleTableSlot *oldSlot);
/* see table_tuple_update() for reference about parameters */
TM_Result (*tuple_update) (Relation rel,
@ -539,10 +549,11 @@ typedef struct TableAmRoutine
CommandId cid,
Snapshot snapshot,
Snapshot crosscheck,
bool wait,
int options,
TM_FailureData *tmfd,
LockTupleMode *lockmode,
TU_UpdateIndexes *update_indexes);
TU_UpdateIndexes *update_indexes,
TupleTableSlot *oldSlot);
/* see table_tuple_lock() for reference about parameters */
TM_Result (*tuple_lock) (Relation rel,
@ -1463,7 +1474,7 @@ table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots,
}
/*
* Delete a tuple.
* Delete a tuple (and optionally lock the last tuple version).
*
* NB: do not call this directly unless prepared to deal with
* concurrent-update conditions. Use simple_table_tuple_delete instead.
@ -1474,11 +1485,21 @@ table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots,
* cid - delete command ID (used for visibility test, and stored into
* cmax if successful)
* crosscheck - if not InvalidSnapshot, also check tuple against this
* wait - true if should wait for any conflicting update to commit/abort
* options:
* If TABLE_MODIFY_WAIT, wait for any conflicting update to commit/abort.
* If TABLE_MODIFY_FETCH_OLD_TUPLE option is given, the existing tuple is
* fetched into oldSlot when the update is successful.
* If TABLE_MODIFY_LOCK_UPDATED option is given and the tuple is
* concurrently updated, then the last tuple version is locked and fetched
* into oldSlot.
*
* Output parameters:
* tmfd - filled in failure cases (see below)
* changingPart - true iff the tuple is being moved to another partition
* table due to an update of the partition key. Otherwise, false.
* oldSlot - slot to save the deleted or locked tuple. Can be NULL if none of
* TABLE_MODIFY_FETCH_OLD_TUPLE or TABLE_MODIFY_LOCK_UPDATED options
* is specified.
*
* Normal, successful return value is TM_Ok, which means we did actually
* delete it. Failure return codes are TM_SelfModified, TM_Updated, and
@ -1490,16 +1511,18 @@ table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots,
*/
static inline TM_Result
table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid,
Snapshot snapshot, Snapshot crosscheck, bool wait,
TM_FailureData *tmfd, bool changingPart)
Snapshot snapshot, Snapshot crosscheck, int options,
TM_FailureData *tmfd, bool changingPart,
TupleTableSlot *oldSlot)
{
return rel->rd_tableam->tuple_delete(rel, tid, cid,
snapshot, crosscheck,
wait, tmfd, changingPart);
options, tmfd, changingPart,
oldSlot);
}
/*
* Update a tuple.
* Update a tuple (and optionally lock the last tuple version).
*
* NB: do not call this directly unless you are prepared to deal with
* concurrent-update conditions. Use simple_table_tuple_update instead.
@ -1511,13 +1534,23 @@ table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid,
* cid - update command ID (used for visibility test, and stored into
* cmax/cmin if successful)
* crosscheck - if not InvalidSnapshot, also check old tuple against this
* wait - true if should wait for any conflicting update to commit/abort
* options:
* If TABLE_MODIFY_WAIT, wait for any conflicting update to commit/abort.
* If TABLE_MODIFY_FETCH_OLD_TUPLE option is given, the existing tuple is
* fetched into oldSlot when the update is successful.
* If TABLE_MODIFY_LOCK_UPDATED option is given and the tuple is
* concurrently updated, then the last tuple version is locked and fetched
* into oldSlot.
*
* Output parameters:
* tmfd - filled in failure cases (see below)
* lockmode - filled with lock mode acquired on tuple
* update_indexes - in success cases this is set to true if new index entries
* are required for this tuple
*
* oldSlot - slot to save the deleted or locked tuple. Can be NULL if none of
* TABLE_MODIFY_FETCH_OLD_TUPLE or TABLE_MODIFY_LOCK_UPDATED options
* is specified.
* Normal, successful return value is TM_Ok, which means we did actually
* update it. Failure return codes are TM_SelfModified, TM_Updated, and
* TM_BeingModified (the last only possible if wait == false).
@ -1535,13 +1568,15 @@ table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid,
static inline TM_Result
table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot,
CommandId cid, Snapshot snapshot, Snapshot crosscheck,
bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode,
TU_UpdateIndexes *update_indexes)
int options, TM_FailureData *tmfd, LockTupleMode *lockmode,
TU_UpdateIndexes *update_indexes,
TupleTableSlot *oldSlot)
{
return rel->rd_tableam->tuple_update(rel, otid, slot,
cid, snapshot, crosscheck,
wait, tmfd,
lockmode, update_indexes);
options, tmfd,
lockmode, update_indexes,
oldSlot);
}
/*
@ -2083,10 +2118,12 @@ table_scan_sample_next_tuple(TableScanDesc scan,
extern void simple_table_tuple_insert(Relation rel, TupleTableSlot *slot);
extern void simple_table_tuple_delete(Relation rel, ItemPointer tid,
Snapshot snapshot);
Snapshot snapshot,
TupleTableSlot *oldSlot);
extern void simple_table_tuple_update(Relation rel, ItemPointer otid,
TupleTableSlot *slot, Snapshot snapshot,
TU_UpdateIndexes *update_indexes);
TU_UpdateIndexes *update_indexes,
TupleTableSlot *oldSlot);
/* ----------------------------------------------------------------------------

View File

@ -216,8 +216,8 @@ extern bool ExecBRDeleteTriggers(EState *estate,
TM_FailureData *tmfd);
extern void ExecARDeleteTriggers(EState *estate,
ResultRelInfo *relinfo,
ItemPointer tupleid,
HeapTuple fdw_trigtuple,
TupleTableSlot *slot,
TransitionCaptureState *transition_capture,
bool is_crosspart_update);
extern bool ExecIRDeleteTriggers(EState *estate,
@ -240,8 +240,8 @@ extern void ExecARUpdateTriggers(EState *estate,
ResultRelInfo *relinfo,
ResultRelInfo *src_partinfo,
ResultRelInfo *dst_partinfo,
ItemPointer tupleid,
HeapTuple fdw_trigtuple,
TupleTableSlot *oldslot,
TupleTableSlot *newslot,
List *recheckIndexes,
TransitionCaptureState *transition_capture,