Get rid of page-level locking in btree-s.

LockBuffer is used to acquire read/write access
to index pages. Pages are released before leaving
index internals.
This commit is contained in:
Vadim B. Mikheev 1999-05-25 18:20:31 +00:00
parent 07842084fe
commit 7d443a85af
7 changed files with 73 additions and 132 deletions

View File

@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.40 1999/05/25 16:07:23 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.41 1999/05/25 18:20:28 vadim Exp $
*
*-------------------------------------------------------------------------
*/
@ -19,7 +19,6 @@
#include <access/nbtree.h>
#include <access/heapam.h>
#include <access/xact.h>
#include <storage/bufmgr.h>
#include <fmgr.h>
#ifndef HAVE_MEMMOVE
@ -64,14 +63,11 @@ _bt_doinsert(Relation rel, BTItem btitem, bool index_is_unique, Relation heapRel
/* find the page containing this key */
stack = _bt_search(rel, natts, itup_scankey, &buf);
blkno = BufferGetBlockNumber(buf);
/* trade in our read lock for a write lock */
_bt_relbuf(rel, buf, BT_READ);
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
LockBuffer(buf, BT_WRITE);
l1:
buf = _bt_getbuf(rel, blkno, BT_WRITE);
/*
* If the page was split between the time that we surrendered our read
* lock and acquired our write lock, then this page may no longer be
@ -81,6 +77,7 @@ l1:
*/
buf = _bt_moveright(rel, buf, natts, itup_scankey, BT_WRITE);
blkno = BufferGetBlockNumber(buf);
/* if we're not allowing duplicates, make sure the key isn't */
/* already in the node */
@ -99,13 +96,13 @@ l1:
/* key on the page before trying to compare it */
if (!PageIsEmpty(page) && offset <= maxoff)
{
TupleDesc itupdesc;
BTItem cbti;
HeapTupleData htup;
BTPageOpaque opaque;
Buffer nbuf;
BlockNumber blkno;
bool chtup = true;
TupleDesc itupdesc;
BTItem cbti;
HeapTupleData htup;
BTPageOpaque opaque;
Buffer nbuf;
BlockNumber nblkno;
bool chtup = true;
itupdesc = RelationGetDescr(rel);
nbuf = InvalidBuffer;
@ -157,7 +154,8 @@ l1:
_bt_relbuf(rel, nbuf, BT_READ);
_bt_relbuf(rel, buf, BT_WRITE);
XactLockTableWait(xwait);
goto l1;/* continue from the begin */
buf = _bt_getbuf(rel, blkno, BT_WRITE);
goto l1; /* continue from the begin */
}
elog(ERROR, "Cannot insert a duplicate key into a unique index");
}
@ -177,12 +175,12 @@ l1:
* min key of the right page is the same, ooh - so
* many dead duplicates...
*/
blkno = opaque->btpo_next;
nblkno = opaque->btpo_next;
if (nbuf != InvalidBuffer)
_bt_relbuf(rel, nbuf, BT_READ);
for (nbuf = InvalidBuffer;;)
for (nbuf = InvalidBuffer; ; )
{
nbuf = _bt_getbuf(rel, blkno, BT_READ);
nbuf = _bt_getbuf(rel, nblkno, BT_READ);
page = BufferGetPage(nbuf);
maxoff = PageGetMaxOffsetNumber(page);
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
@ -193,10 +191,10 @@ l1:
}
else
{ /* Empty or "pseudo"-empty page - get next */
blkno = opaque->btpo_next;
nblkno = opaque->btpo_next;
_bt_relbuf(rel, nbuf, BT_READ);
nbuf = InvalidBuffer;
if (blkno == P_NONE)
if (nblkno == P_NONE)
break;
}
}

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.21 1999/05/25 16:07:26 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.22 1999/05/25 18:20:29 vadim Exp $
*
* NOTES
* Postgres btree pages look like ordinary relation pages. The opaque
@ -27,7 +27,6 @@
#include <storage/bufpage.h>
#include <access/nbtree.h>
#include <miscadmin.h>
#include <storage/bufmgr.h>
#include <storage/lmgr.h>
#ifndef HAVE_MEMMOVE
@ -36,26 +35,17 @@
#include <string.h>
#endif
static void _bt_setpagelock(Relation rel, BlockNumber blkno, int access);
static void _bt_unsetpagelock(Relation rel, BlockNumber blkno, int access);
#define BTREE_METAPAGE 0
#define BTREE_MAGIC 0x053162
#ifdef BTREE_VERSION_1
#define BTREE_VERSION 1
#else
#define BTREE_VERSION 0
#endif
typedef struct BTMetaPageData
{
uint32 btm_magic;
uint32 btm_version;
BlockNumber btm_root;
#ifdef BTREE_VERSION_1
int32 btm_level;
#endif
} BTMetaPageData;
#define BTPageGetMeta(p) \
@ -108,9 +98,7 @@ _bt_metapinit(Relation rel)
metad.btm_magic = BTREE_MAGIC;
metad.btm_version = BTREE_VERSION;
metad.btm_root = P_NONE;
#ifdef BTREE_VERSION_1
metad.btm_level = 0;
#endif
memmove((char *) BTPageGetMeta(pg), (char *) &metad, sizeof(metad));
op = (BTPageOpaque) PageGetSpecialPointer(pg);
@ -246,9 +234,7 @@ _bt_getroot(Relation rel, int access)
rootblkno = BufferGetBlockNumber(rootbuf);
rootpg = BufferGetPage(rootbuf);
metad->btm_root = rootblkno;
#ifdef BTREE_VERSION_1
metad->btm_level = 1;
#endif
_bt_pageinit(rootpg, BufferGetPageSize(rootbuf));
rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpg);
rootopaque->btpo_flags |= (BTP_LEAF | BTP_ROOT);
@ -257,8 +243,8 @@ _bt_getroot(Relation rel, int access)
/* swap write lock for read lock, if appropriate */
if (access != BT_WRITE)
{
_bt_setpagelock(rel, rootblkno, BT_READ);
_bt_unsetpagelock(rel, rootblkno, BT_WRITE);
LockBuffer(rootbuf, BUFFER_LOCK_UNLOCK);
LockBuffer(rootbuf, BT_READ);
}
/* okay, metadata is correct */
@ -322,31 +308,24 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
Buffer buf;
Page page;
/*
* If we want a new block, we can't set a lock of the appropriate type
* until we've instantiated the buffer.
*/
if (blkno != P_NEW)
{
if (access == BT_WRITE)
_bt_setpagelock(rel, blkno, BT_WRITE);
else
_bt_setpagelock(rel, blkno, BT_READ);
buf = ReadBuffer(rel, blkno);
LockBuffer(buf, access);
}
else
{
/*
* Extend bufmgr code is unclean and so we have to
* use locking here.
*/
LockPage(rel, 0, ExclusiveLock);
buf = ReadBuffer(rel, blkno);
UnlockPage(rel, 0, ExclusiveLock);
blkno = BufferGetBlockNumber(buf);
page = BufferGetPage(buf);
_bt_pageinit(page, BufferGetPageSize(buf));
if (access == BT_WRITE)
_bt_setpagelock(rel, blkno, BT_WRITE);
else
_bt_setpagelock(rel, blkno, BT_READ);
LockBuffer(buf, access);
}
/* ref count and lock type are correct */
@ -359,16 +338,7 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
void
_bt_relbuf(Relation rel, Buffer buf, int access)
{
BlockNumber blkno;
blkno = BufferGetBlockNumber(buf);
/* access had better be one of read or write */
if (access == BT_WRITE)
_bt_unsetpagelock(rel, blkno, BT_WRITE);
else
_bt_unsetpagelock(rel, blkno, BT_READ);
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buf);
}
@ -382,11 +352,8 @@ _bt_relbuf(Relation rel, Buffer buf, int access)
void
_bt_wrtbuf(Relation rel, Buffer buf)
{
BlockNumber blkno;
blkno = BufferGetBlockNumber(buf);
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
WriteBuffer(buf);
_bt_unsetpagelock(rel, blkno, BT_WRITE);
}
/*
@ -399,9 +366,6 @@ _bt_wrtbuf(Relation rel, Buffer buf)
void
_bt_wrtnorelbuf(Relation rel, Buffer buf)
{
BlockNumber blkno;
blkno = BufferGetBlockNumber(buf);
WriteNoReleaseBuffer(buf);
}
@ -452,12 +416,10 @@ _bt_metaproot(Relation rel, BlockNumber rootbknum, int level)
Assert(metaopaque->btpo_flags & BTP_META);
metad = BTPageGetMeta(metap);
metad->btm_root = rootbknum;
#ifdef BTREE_VERSION_1
if (level == 0) /* called from _do_insert */
if (level == 0) /* called from _do_insert */
metad->btm_level += 1;
else
metad->btm_level = level; /* called from btsort */
#endif
_bt_wrtbuf(rel, metabuf);
}
@ -582,32 +544,6 @@ _bt_getstackbuf(Relation rel, BTStack stack, int access)
}
}
static void
_bt_setpagelock(Relation rel, BlockNumber blkno, int access)
{
if (USELOCKING)
{
if (access == BT_WRITE)
LockPage(rel, blkno, ExclusiveLock);
else
LockPage(rel, blkno, ShareLock);
}
}
static void
_bt_unsetpagelock(Relation rel, BlockNumber blkno, int access)
{
if (USELOCKING)
{
if (access == BT_WRITE)
UnlockPage(rel, blkno, ExclusiveLock);
else
UnlockPage(rel, blkno, ShareLock);
}
}
void
_bt_pagedel(Relation rel, ItemPointer tid)
{

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.38 1999/05/25 16:07:27 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.39 1999/05/25 18:20:29 vadim Exp $
*
* NOTES
* This file contains only the public interface routines.
@ -394,7 +394,7 @@ btgettuple(IndexScanDesc scan, ScanDirection dir)
/*
* Restore scan position using heap TID returned by previous call
* to btgettuple().
* to btgettuple(). _bt_restscan() locks buffer.
*/
_bt_restscan(scan);
res = _bt_next(scan, dir);
@ -402,9 +402,15 @@ btgettuple(IndexScanDesc scan, ScanDirection dir)
else
res = _bt_first(scan, dir);
/* Save heap TID to use it in _bt_restscan */
/*
* Save heap TID to use it in _bt_restscan.
* Unlock buffer before leaving index !
*/
if (res)
{
((BTScanOpaque) scan->opaque)->curHeapIptr = res->heap_iptr;
LockBuffer(((BTScanOpaque) scan->opaque)->btso_curbuf, BUFFER_LOCK_UNLOCK);
}
return (char *) res;
}
@ -437,18 +443,18 @@ btrescan(IndexScanDesc scan, bool fromEnd, ScanKey scankey)
so = (BTScanOpaque) scan->opaque;
/* we hold a read lock on the current page in the scan */
/* we don't hold a read lock on the current page in the scan */
if (ItemPointerIsValid(iptr = &(scan->currentItemData)))
{
_bt_relbuf(scan->relation, so->btso_curbuf, BT_READ);
ReleaseBuffer(so->btso_curbuf);
so->btso_curbuf = InvalidBuffer;
ItemPointerSetInvalid(iptr);
}
/* and we hold a read lock on the last marked item in the scan */
/* and we don't hold a read lock on the last marked item in the scan */
if (ItemPointerIsValid(iptr = &(scan->currentMarkData)))
{
_bt_relbuf(scan->relation, so->btso_mrkbuf, BT_READ);
ReleaseBuffer(so->btso_mrkbuf);
so->btso_mrkbuf = InvalidBuffer;
ItemPointerSetInvalid(iptr);
}
@ -489,10 +495,10 @@ btmovescan(IndexScanDesc scan, Datum v)
so = (BTScanOpaque) scan->opaque;
/* release any locks we still hold */
/* we don't hold a read lock on the current page in the scan */
if (ItemPointerIsValid(iptr = &(scan->currentItemData)))
{
_bt_relbuf(scan->relation, so->btso_curbuf, BT_READ);
ReleaseBuffer(so->btso_curbuf);
so->btso_curbuf = InvalidBuffer;
ItemPointerSetInvalid(iptr);
}
@ -512,11 +518,11 @@ btendscan(IndexScanDesc scan)
so = (BTScanOpaque) scan->opaque;
/* release any locks we still hold */
/* we don't hold any read locks */
if (ItemPointerIsValid(iptr = &(scan->currentItemData)))
{
if (BufferIsValid(so->btso_curbuf))
_bt_relbuf(scan->relation, so->btso_curbuf, BT_READ);
ReleaseBuffer(so->btso_curbuf);
so->btso_curbuf = InvalidBuffer;
ItemPointerSetInvalid(iptr);
}
@ -524,7 +530,7 @@ btendscan(IndexScanDesc scan)
if (ItemPointerIsValid(iptr = &(scan->currentMarkData)))
{
if (BufferIsValid(so->btso_mrkbuf))
_bt_relbuf(scan->relation, so->btso_mrkbuf, BT_READ);
ReleaseBuffer(so->btso_mrkbuf);
so->btso_mrkbuf = InvalidBuffer;
ItemPointerSetInvalid(iptr);
}
@ -547,20 +553,19 @@ btmarkpos(IndexScanDesc scan)
so = (BTScanOpaque) scan->opaque;
/* release lock on old marked data, if any */
/* we don't hold any read locks */
if (ItemPointerIsValid(iptr = &(scan->currentMarkData)))
{
_bt_relbuf(scan->relation, so->btso_mrkbuf, BT_READ);
ReleaseBuffer(so->btso_mrkbuf);
so->btso_mrkbuf = InvalidBuffer;
ItemPointerSetInvalid(iptr);
}
/* bump lock on currentItemData and copy to currentMarkData */
/* bump pin on current buffer */
if (ItemPointerIsValid(&(scan->currentItemData)))
{
so->btso_mrkbuf = _bt_getbuf(scan->relation,
BufferGetBlockNumber(so->btso_curbuf),
BT_READ);
so->btso_mrkbuf = ReadBuffer(scan->relation,
BufferGetBlockNumber(so->btso_curbuf));
scan->currentMarkData = scan->currentItemData;
so->mrkHeapIptr = so->curHeapIptr;
}
@ -577,20 +582,19 @@ btrestrpos(IndexScanDesc scan)
so = (BTScanOpaque) scan->opaque;
/* release lock on current data, if any */
/* we don't hold any read locks */
if (ItemPointerIsValid(iptr = &(scan->currentItemData)))
{
_bt_relbuf(scan->relation, so->btso_curbuf, BT_READ);
ReleaseBuffer(so->btso_curbuf);
so->btso_curbuf = InvalidBuffer;
ItemPointerSetInvalid(iptr);
}
/* bump lock on currentMarkData and copy to currentItemData */
/* bump pin on marked buffer */
if (ItemPointerIsValid(&(scan->currentMarkData)))
{
so->btso_curbuf = _bt_getbuf(scan->relation,
BufferGetBlockNumber(so->btso_mrkbuf),
BT_READ);
so->btso_curbuf = ReadBuffer(scan->relation,
BufferGetBlockNumber(so->btso_mrkbuf));
scan->currentItemData = scan->currentMarkData;
so->curHeapIptr = so->mrkHeapIptr;
@ -623,6 +627,7 @@ _bt_restscan(IndexScanDesc scan)
BTItem item;
BlockNumber blkno;
LockBuffer(buf, BT_READ);
/*
* We use this as flag when first index tuple on page is deleted but
* we do not move left (this would slowdown vacuum) - so we set

View File

@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/Attic/nbtscan.c,v 1.21 1999/05/25 16:07:29 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/Attic/nbtscan.c,v 1.22 1999/05/25 18:20:30 vadim Exp $
*
*
* NOTES
@ -30,7 +30,6 @@
#include <postgres.h>
#include <storage/bufpage.h>
#include <storage/bufmgr.h>
#include <access/nbtree.h>
typedef struct BTScanListData
@ -134,6 +133,11 @@ _bt_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno)
ItemPointerSetInvalid(&(so->curHeapIptr));
else
{
/*
* We have to lock buffer before _bt_step
* and unlock it after that.
*/
LockBuffer(buf, BT_READ);
_bt_step(scan, &buf, BackwardScanDirection);
so->btso_curbuf = buf;
if (ItemPointerIsValid(current))
@ -143,6 +147,7 @@ _bt_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno)
PageGetItemId(pg, ItemPointerGetOffsetNumber(current)));
so->curHeapIptr = btitem->bti_itup.t_tid;
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
}
}
}
@ -169,6 +174,7 @@ _bt_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno)
so->btso_curbuf = so->btso_mrkbuf;
so->btso_mrkbuf = buf;
buf = so->btso_curbuf;
LockBuffer(buf, BT_READ); /* as above */
_bt_step(scan, &buf, BackwardScanDirection);
@ -184,6 +190,7 @@ _bt_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno)
PageGetItemId(pg, ItemPointerGetOffsetNumber(current)));
so->mrkHeapIptr = btitem->bti_itup.t_tid;
LockBuffer(buf, BUFFER_LOCK_UNLOCK); /* as above */
}
}
}

View File

@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.44 1999/05/25 16:07:31 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.45 1999/05/25 18:20:30 vadim Exp $
*
*-------------------------------------------------------------------------
*/
@ -17,7 +17,6 @@
#include <access/genam.h>
#include <fmgr.h>
#include <storage/bufpage.h>
#include <storage/bufmgr.h>
#include <access/nbtree.h>
#include <catalog/pg_proc.h>

View File

@ -5,7 +5,7 @@
*
*
* IDENTIFICATION
* $Id: nbtsort.c,v 1.39 1999/05/25 16:07:34 momjian Exp $
* $Id: nbtsort.c,v 1.40 1999/05/25 18:20:31 vadim Exp $
*
* NOTES
*
@ -52,7 +52,6 @@
#include "postgres.h"
#include "access/nbtree.h"
#include "storage/bufmgr.h"
#include "storage/bufpage.h"
#include "utils/memutils.h"

View File

@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtutils.c,v 1.26 1999/04/13 17:18:29 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtutils.c,v 1.27 1999/05/25 18:20:31 vadim Exp $
*
*-------------------------------------------------------------------------
*/
@ -301,9 +301,6 @@ _bt_formitem(IndexTuple itup)
btitem = (BTItem) palloc(nbytes_btitem);
memmove((char *) &(btitem->bti_itup), (char *) itup, tuplen);
#ifndef BTREE_VERSION_1
btitem->bti_oid = newoid();
#endif
return btitem;
}