Allow a no-wait lock acquisition to succeed in more cases.

We don't determine the position at which a process waiting for a lock
should insert itself into the wait queue until we reach ProcSleep(),
and we may at that point discover that we must insert ourselves ahead
of everyone who wants a conflicting lock, in which case we obtain the
lock immediately. Up until now, a no-wait lock acquisition would fail
in such cases, erroneously claiming that the lock couldn't be obtained
immediately.  Fix that by trying ProcSleep even in the no-wait case.

No back-patch for now, because I'm treating this as an improvement to
the existing no-wait feature. It could instead be argued that it's a
bug fix, on the theory that there should never be any case whatsoever
where no-wait fails to obtain a lock that would have been obtained
immediately without no-wait, but I'm reluctant to interpret the
semantics of no-wait that strictly.

Robert Haas and Jingxian Li

Discussion: http://postgr.es/m/CA+TgmobCH-kMXGVpb0BB-iNMdtcNkTvcZ4JBxDJows3kYM+GDg@mail.gmail.com
This commit is contained in:
Robert Haas 2024-03-14 08:55:25 -04:00
parent c20d90a41c
commit 2346df6fc3
6 changed files with 128 additions and 53 deletions

View File

@ -360,7 +360,8 @@ static PROCLOCK *SetupLockInTable(LockMethod lockMethodTable, PGPROC *proc,
static void GrantLockLocal(LOCALLOCK *locallock, ResourceOwner owner);
static void BeginStrongLockAcquire(LOCALLOCK *locallock, uint32 fasthashcode);
static void FinishStrongLockAcquire(void);
static void WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner);
static void WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner,
bool dontWait);
static void ReleaseLockIfHeld(LOCALLOCK *locallock, bool sessionLock);
static void LockReassignOwner(LOCALLOCK *locallock, ResourceOwner parent);
static bool UnGrantLock(LOCK *lock, LOCKMODE lockmode,
@ -1024,50 +1025,15 @@ LockAcquireExtended(const LOCKTAG *locktag,
}
else
{
/*
* We can't acquire the lock immediately. If caller specified no
* blocking, remove useless table entries and return
* LOCKACQUIRE_NOT_AVAIL without waiting.
*/
if (dontWait)
{
AbortStrongLockAcquire();
if (proclock->holdMask == 0)
{
uint32 proclock_hashcode;
proclock_hashcode = ProcLockHashCode(&proclock->tag, hashcode);
dlist_delete(&proclock->lockLink);
dlist_delete(&proclock->procLink);
if (!hash_search_with_hash_value(LockMethodProcLockHash,
&(proclock->tag),
proclock_hashcode,
HASH_REMOVE,
NULL))
elog(PANIC, "proclock table corrupted");
}
else
PROCLOCK_PRINT("LockAcquire: NOWAIT", proclock);
lock->nRequested--;
lock->requested[lockmode]--;
LOCK_PRINT("LockAcquire: conditional lock failed", lock, lockmode);
Assert((lock->nRequested > 0) && (lock->requested[lockmode] >= 0));
Assert(lock->nGranted <= lock->nRequested);
LWLockRelease(partitionLock);
if (locallock->nLocks == 0)
RemoveLocalLock(locallock);
if (locallockp)
*locallockp = NULL;
return LOCKACQUIRE_NOT_AVAIL;
}
/*
* Set bitmask of locks this process already holds on this object.
*/
MyProc->heldLocks = proclock->holdMask;
/*
* Sleep till someone wakes me up.
* Sleep till someone wakes me up. We do this even in the dontWait
* case, beause while trying to go to sleep, we may discover that we
* can acquire the lock immediately after all.
*/
TRACE_POSTGRESQL_LOCK_WAIT_START(locktag->locktag_field1,
@ -1077,7 +1043,7 @@ LockAcquireExtended(const LOCKTAG *locktag,
locktag->locktag_type,
lockmode);
WaitOnLock(locallock, owner);
WaitOnLock(locallock, owner, dontWait);
TRACE_POSTGRESQL_LOCK_WAIT_DONE(locktag->locktag_field1,
locktag->locktag_field2,
@ -1093,17 +1059,63 @@ LockAcquireExtended(const LOCKTAG *locktag,
*/
/*
* Check the proclock entry status, in case something in the ipc
* communication doesn't work correctly.
* Check the proclock entry status. If dontWait = true, this is an
* expected case; otherwise, it will open happen if something in the
* ipc communication doesn't work correctly.
*/
if (!(proclock->holdMask & LOCKBIT_ON(lockmode)))
{
AbortStrongLockAcquire();
PROCLOCK_PRINT("LockAcquire: INCONSISTENT", proclock);
LOCK_PRINT("LockAcquire: INCONSISTENT", lock, lockmode);
/* Should we retry ? */
LWLockRelease(partitionLock);
elog(ERROR, "LockAcquire failed");
if (dontWait)
{
/*
* We can't acquire the lock immediately. If caller specified
* no blocking, remove useless table entries and return
* LOCKACQUIRE_NOT_AVAIL without waiting.
*/
if (proclock->holdMask == 0)
{
uint32 proclock_hashcode;
proclock_hashcode = ProcLockHashCode(&proclock->tag,
hashcode);
dlist_delete(&proclock->lockLink);
dlist_delete(&proclock->procLink);
if (!hash_search_with_hash_value(LockMethodProcLockHash,
&(proclock->tag),
proclock_hashcode,
HASH_REMOVE,
NULL))
elog(PANIC, "proclock table corrupted");
}
else
PROCLOCK_PRINT("LockAcquire: NOWAIT", proclock);
lock->nRequested--;
lock->requested[lockmode]--;
LOCK_PRINT("LockAcquire: conditional lock failed",
lock, lockmode);
Assert((lock->nRequested > 0) &&
(lock->requested[lockmode] >= 0));
Assert(lock->nGranted <= lock->nRequested);
LWLockRelease(partitionLock);
if (locallock->nLocks == 0)
RemoveLocalLock(locallock);
if (locallockp)
*locallockp = NULL;
return LOCKACQUIRE_NOT_AVAIL;
}
else
{
/*
* We should have gotten the lock, but somehow that didn't
* happen. If we get here, it's a bug.
*/
PROCLOCK_PRINT("LockAcquire: INCONSISTENT", proclock);
LOCK_PRINT("LockAcquire: INCONSISTENT", lock, lockmode);
LWLockRelease(partitionLock);
elog(ERROR, "LockAcquire failed");
}
}
PROCLOCK_PRINT("LockAcquire: granted", proclock);
LOCK_PRINT("LockAcquire: granted", lock, lockmode);
@ -1777,10 +1789,11 @@ MarkLockClear(LOCALLOCK *locallock)
* Caller must have set MyProc->heldLocks to reflect locks already held
* on the lockable object by this process.
*
* The appropriate partition lock must be held at entry.
* The appropriate partition lock must be held at entry, and will still be
* held at exit.
*/
static void
WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner)
WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner, bool dontWait)
{
LOCKMETHODID lockmethodid = LOCALLOCK_LOCKMETHOD(*locallock);
LockMethod lockMethodTable = LockMethods[lockmethodid];
@ -1813,8 +1826,14 @@ WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner)
*/
PG_TRY();
{
if (ProcSleep(locallock, lockMethodTable) != PROC_WAIT_STATUS_OK)
/*
* If dontWait = true, we handle success and failure in the same way
* here. The caller will be able to sort out what has happened.
*/
if (ProcSleep(locallock, lockMethodTable, dontWait) != PROC_WAIT_STATUS_OK
&& !dontWait)
{
/*
* We failed as a result of a deadlock, see CheckDeadLock(). Quit
* now.

View File

@ -1043,10 +1043,19 @@ AuxiliaryPidGetProc(int pid)
* Caller must have set MyProc->heldLocks to reflect locks already held
* on the lockable object by this process (under all XIDs).
*
* It's not actually guaranteed that we need to wait when this function is
* called, because it could be that when we try to find a position at which
* to insert ourself into the wait queue, we discover that we must be inserted
* ahead of everyone who wants a lock that conflict with ours. In that case,
* we get the lock immediately. Beause of this, it's sensible for this function
* to have a dontWait argument, despite the name.
*
* The lock table's partition lock must be held at entry, and will be held
* at exit.
*
* Result: PROC_WAIT_STATUS_OK if we acquired the lock, PROC_WAIT_STATUS_ERROR if not (deadlock).
* Result: PROC_WAIT_STATUS_OK if we acquired the lock, PROC_WAIT_STATUS_ERROR
* if not (if dontWait = true, this is a deadlock; if dontWait = false, we
* would have had to wait).
*
* ASSUME: that no one will fiddle with the queue until after
* we release the partition lock.
@ -1054,7 +1063,7 @@ AuxiliaryPidGetProc(int pid)
* NOTES: The process queue is now a priority queue for locking.
*/
ProcWaitStatus
ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable, bool dontWait)
{
LOCKMODE lockmode = locallock->tag.mode;
LOCK *lock = locallock->lock;
@ -1167,6 +1176,13 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
}
}
/*
* At this point we know that we'd really need to sleep. If we've been
* commanded not to do that, bail out.
*/
if (dontWait)
return PROC_WAIT_STATUS_ERROR;
/*
* Insert self into queue, at the position determined above.
*/

View File

@ -471,7 +471,9 @@ extern int GetStartupBufferPinWaitBufId(void);
extern bool HaveNFreeProcs(int n, int *nfree);
extern void ProcReleaseLocks(bool isCommit);
extern ProcWaitStatus ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable);
extern ProcWaitStatus ProcSleep(LOCALLOCK *locallock,
LockMethod lockMethodTable,
bool dontWait);
extern void ProcWakeup(PGPROC *proc, ProcWaitStatus waitStatus);
extern void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock);
extern void CheckDeadLockAlert(void);

View File

@ -0,0 +1,9 @@
Parsed test spec with 2 sessions
starting permutation: s1a s2a s1b s1c s2c
step s1a: LOCK TABLE a1 IN ACCESS EXCLUSIVE MODE;
step s2a: LOCK TABLE a1 IN EXCLUSIVE MODE; <waiting ...>
step s1b: LOCK TABLE a1 IN SHARE ROW EXCLUSIVE MODE NOWAIT;
step s1c: COMMIT;
step s2a: <... completed>
step s2c: COMMIT;

View File

@ -111,3 +111,4 @@ test: serializable-parallel
test: serializable-parallel-2
test: serializable-parallel-3
test: matview-write-skew
test: lock-nowait

View File

@ -0,0 +1,28 @@
# While requesting nowait lock, if the lock requested should
# be inserted in front of some waiter, check to see if the lock
# conflicts with already-held locks or the requests before
# the waiter. If not, then just grant myself the requested
# lock immediately. Test this scenario.
setup
{
CREATE TABLE a1 ();
}
teardown
{
DROP TABLE a1;
}
session s1
setup { BEGIN; }
step s1a { LOCK TABLE a1 IN ACCESS EXCLUSIVE MODE; }
step s1b { LOCK TABLE a1 IN SHARE ROW EXCLUSIVE MODE NOWAIT; }
step s1c { COMMIT; }
session s2
setup { BEGIN; }
step s2a { LOCK TABLE a1 IN EXCLUSIVE MODE; }
step s2c { COMMIT; }
permutation s1a s2a s1b s1c s2c