1. BTREE_VERSION_1: using bti_itup->t_tid as unique identifier for a given

index tuple (logical position within A LEVEL). bti_oid & bti_dummy
taken off from BTItemData.
2. Fix for multi-column indices (nbtsearch.c):
   _bt_binsrch() - for searches on internal pages having keysize <
	number of attrs we point at the last item < the scankey, not at the
	first item = the scankey;
   _bt_moveright() - if keysize < number of attrs we compare scankey with
	_last_ item on current page to decide should we move right or
	not.
This commit is contained in:
Vadim B. Mikheev 1997-04-16 01:48:29 +00:00
parent afd9295786
commit 329fb11262
5 changed files with 170 additions and 102 deletions

View File

@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.11 1997/03/24 08:48:09 vadim Exp $
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.12 1997/04/16 01:48:11 vadim Exp $
*
*-------------------------------------------------------------------------
*/
@ -33,7 +33,7 @@ static OffsetNumber _bt_findsplitloc(Relation rel, Page page, OffsetNumber start
static void _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf);
static OffsetNumber _bt_pgaddtup(Relation rel, Buffer buf, int keysz, ScanKey itup_scankey, Size itemsize, BTItem btitem, BTItem afteritem);
static bool _bt_goesonpg(Relation rel, Buffer buf, Size keysz, ScanKey scankey, BTItem afteritem);
static void _bt_updateitem(Relation rel, Size keysz, Buffer buf, Oid bti_oid, BTItem newItem);
static void _bt_updateitem(Relation rel, Size keysz, Buffer buf, BTItem oldItem, BTItem newItem);
static bool _bt_isequal (TupleDesc itupdesc, Page page, OffsetNumber offnum, int keysz, ScanKey scankey);
/*
@ -357,7 +357,7 @@ _bt_insertonpg(Relation rel,
DOUBLEALIGN (IndexTupleDSize (stack->bts_btitem->bti_itup)) )
{
_bt_updateitem(rel, keysz, pbuf,
stack->bts_btitem->bti_oid, lowLeftItem);
stack->bts_btitem, lowLeftItem);
_bt_relbuf(rel, buf, BT_WRITE);
_bt_relbuf(rel, rbuf, BT_WRITE);
}
@ -644,23 +644,14 @@ _bt_findsplitloc(Relation rel,
OffsetNumber saferight;
ItemId nxtitemid, safeitemid;
BTItem safeitem, nxtitem;
IndexTuple safetup, nxttup;
Size nbytes;
TupleDesc itupdesc;
int natts;
int attno;
Datum attsafe;
Datum attnext;
bool null;
itupdesc = RelationGetTupleDescriptor(rel);
natts = rel->rd_rel->relnatts;
saferight = start;
safeitemid = PageGetItemId(page, saferight);
nbytes = ItemIdGetLength(safeitemid) + sizeof(ItemIdData);
safeitem = (BTItem) PageGetItem(page, safeitemid);
safetup = &(safeitem->bti_itup);
i = OffsetNumberNext(start);
@ -670,26 +661,17 @@ _bt_findsplitloc(Relation rel,
nxtitemid = PageGetItemId(page, i);
nbytes += (ItemIdGetLength(nxtitemid) + sizeof(ItemIdData));
nxtitem = (BTItem) PageGetItem(page, nxtitemid);
nxttup = &(nxtitem->bti_itup);
/* test against last known safe item */
for (attno = 1; attno <= natts; attno++) {
attsafe = index_getattr(safetup, attno, itupdesc, &null);
attnext = index_getattr(nxttup, attno, itupdesc, &null);
/*
* If the tuple we're looking at isn't equal to the last safe one
* we saw, then it's our new safe tuple.
*/
if (!_bt_invokestrat(rel, attno, BTEqualStrategyNumber,
attsafe, attnext)) {
safetup = nxttup;
saferight = i;
/* break is for the attno for loop */
break;
}
/*
* Test against last known safe item:
* if the tuple we're looking at isn't equal to the last safe
* one we saw, then it's our new safe tuple.
*/
if ( !_bt_itemcmp (rel, natts,
safeitem, nxtitem, BTEqualStrategyNumber) )
{
safeitem = nxtitem;
saferight = i;
}
i = OffsetNumberNext(i);
}
@ -753,7 +735,7 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
rbkno = BufferGetBlockNumber(rbuf);
lpage = BufferGetPage(lbuf);
rpage = BufferGetPage(rbuf);
/*
* step over the high key on the left page while building the
* left page pointer.
@ -793,7 +775,7 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
_bt_wrtbuf(rel, rootbuf);
/* update metadata page with new root block number */
_bt_metaproot(rel, rootbknum);
_bt_metaproot(rel, rootbknum, 0);
}
/*
@ -820,7 +802,6 @@ _bt_pgaddtup(Relation rel,
Page page;
BTPageOpaque opaque;
BTItem chkitem;
Oid afteroid;
page = BufferGetPage(buf);
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
@ -829,14 +810,13 @@ _bt_pgaddtup(Relation rel,
if (afteritem == (BTItem) NULL) {
itup_off = _bt_binsrch(rel, buf, keysz, itup_scankey, BT_INSERTION);
} else {
afteroid = afteritem->bti_oid;
itup_off = first;
do {
chkitem =
(BTItem) PageGetItem(page, PageGetItemId(page, itup_off));
itup_off = OffsetNumberNext(itup_off);
} while (chkitem->bti_oid != afteroid);
} while ( ! BTItemSame (chkitem, afteritem) );
}
(void) PageAddItem(page, (Item) btitem, itemsize, itup_off, LP_USED);
@ -870,7 +850,6 @@ _bt_goesonpg(Relation rel,
BTPageOpaque opaque;
BTItem chkitem;
OffsetNumber offnum, maxoff;
Oid afteroid;
bool found;
page = BufferGetPage(buf);
@ -908,7 +887,6 @@ _bt_goesonpg(Relation rel,
return (false);
/* damn, have to work for it. i hate that. */
afteroid = afteritem->bti_oid;
maxoff = PageGetMaxOffsetNumber(page);
/*
@ -924,7 +902,8 @@ _bt_goesonpg(Relation rel,
offnum <= maxoff;
offnum = OffsetNumberNext(offnum)) {
chkitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum));
if (chkitem->bti_oid == afteroid) {
if ( BTItemSame (chkitem, afteritem) ) {
found = true;
break;
}
@ -1029,7 +1008,7 @@ static void
_bt_updateitem(Relation rel,
Size keysz,
Buffer buf,
Oid bti_oid,
BTItem oldItem,
BTItem newItem)
{
Page page;
@ -1050,10 +1029,10 @@ _bt_updateitem(Relation rel,
do {
item = (BTItem) PageGetItem(page, PageGetItemId(page, i));
i = OffsetNumberNext(i);
} while (i <= maxoff && item->bti_oid != bti_oid);
} while (i <= maxoff && ! BTItemSame (item, oldItem));
/* this should never happen (in theory) */
if (item->bti_oid != bti_oid) {
if ( ! BTItemSame (item, oldItem) ) {
elog(FATAL, "_bt_getstackbuf was lying!!");
}

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.6 1996/11/05 10:35:30 scrappy Exp $
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.7 1997/04/16 01:48:15 vadim Exp $
*
* NOTES
* Postgres btree pages look like ordinary relation pages. The opaque
@ -38,12 +38,20 @@
#define BTREE_METAPAGE 0
#define BTREE_MAGIC 0x053162
#ifdef BTREE_VERSION_1
#define BTREE_VERSION 1
#else
#define BTREE_VERSION 0
#endif
typedef struct BTMetaPageData {
uint32 btm_magic;
uint32 btm_version;
BlockNumber btm_root;
#ifdef BTREE_VERSION_1
int32 btm_level;
#endif
} BTMetaPageData;
#define BTPageGetMeta(p) \
@ -95,6 +103,9 @@ _bt_metapinit(Relation rel)
metad.btm_magic = BTREE_MAGIC;
metad.btm_version = BTREE_VERSION;
metad.btm_root = P_NONE;
#ifdef BTREE_VERSION_1
metad.btm_level = 0;
#endif
memmove((char *) BTPageGetMeta(pg), (char *) &metad, sizeof(metad));
op = (BTPageOpaque) PageGetSpecialPointer(pg);
@ -179,6 +190,17 @@ _bt_getroot(Relation rel, int access)
metaopaque = (BTPageOpaque) PageGetSpecialPointer(metapg);
Assert(metaopaque->btpo_flags & BTP_META);
metad = BTPageGetMeta(metapg);
if (metad->btm_magic != BTREE_MAGIC) {
elog(WARN, "Index %s is not a btree",
RelationGetRelationName(rel));
}
if (metad->btm_version != BTREE_VERSION) {
elog(WARN, "Version mismatch on %s: version %d file, version %d code",
RelationGetRelationName(rel),
metad->btm_version, BTREE_VERSION);
}
/* if no root page initialized yet, do it */
if (metad->btm_root == P_NONE) {
@ -209,6 +231,9 @@ _bt_getroot(Relation rel, int access)
rootblkno = BufferGetBlockNumber(rootbuf);
rootpg = BufferGetPage(rootbuf);
metad->btm_root = rootblkno;
#ifdef BTREE_VERSION_1
metad->btm_level = 1;
#endif
_bt_pageinit(rootpg, BufferGetPageSize(rootbuf));
rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpg);
rootopaque->btpo_flags |= (BTP_LEAF | BTP_ROOT);
@ -387,7 +412,7 @@ _bt_pageinit(Page page, Size size)
* a reference to or lock on the metapage.
*/
void
_bt_metaproot(Relation rel, BlockNumber rootbknum)
_bt_metaproot(Relation rel, BlockNumber rootbknum, int level)
{
Buffer metabuf;
Page metap;
@ -400,6 +425,12 @@ _bt_metaproot(Relation rel, BlockNumber rootbknum)
Assert(metaopaque->btpo_flags & BTP_META);
metad = BTPageGetMeta(metap);
metad->btm_root = rootbknum;
#ifdef BTREE_VERSION_1
if ( level == 0 ) /* called from _do_insert */
metad->btm_level += 1;
else
metad->btm_level = level; /* called from btsort */
#endif
_bt_wrtbuf(rel, metabuf);
}
@ -434,7 +465,7 @@ _bt_getstackbuf(Relation rel, BTStack stack, int access)
item = (BTItem) PageGetItem(page, itemid);
/* if the item is where we left it, we're done */
if (item->bti_oid == stack->bts_btitem->bti_oid)
if ( BTItemSame (item, stack->bts_btitem) )
return (buf);
/* if the item has just moved right on this page, we're done */
@ -445,7 +476,7 @@ _bt_getstackbuf(Relation rel, BTStack stack, int access)
item = (BTItem) PageGetItem(page, itemid);
/* if the item is where we left it, we're done */
if (item->bti_oid == stack->bts_btitem->bti_oid)
if ( BTItemSame (item, stack->bts_btitem) )
return (buf);
}
}
@ -471,7 +502,7 @@ _bt_getstackbuf(Relation rel, BTStack stack, int access)
offnum = OffsetNumberNext(offnum)) {
itemid = PageGetItemId(page, offnum);
item = (BTItem) PageGetItem(page, itemid);
if (item->bti_oid == stack->bts_btitem->bti_oid)
if ( BTItemSame (item, stack->bts_btitem) )
return (buf);
}
}

View File

@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.16 1997/03/24 08:48:12 vadim Exp $
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.17 1997/04/16 01:48:17 vadim Exp $
*
*-------------------------------------------------------------------------
*/
@ -160,7 +160,8 @@ _bt_moveright(Relation rel,
ItemId hikey;
ItemId itemid;
BlockNumber rblkno;
int natts = rel->rd_rel->relnatts;
page = BufferGetPage(buf);
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
@ -178,22 +179,43 @@ _bt_moveright(Relation rel,
*/
if (_bt_skeycmp(rel, keysz, scankey, page, hikey,
BTGreaterEqualStrategyNumber)) {
BTGreaterEqualStrategyNumber))
{
/* move right as long as we need to */
do {
do
{
OffsetNumber offmax;
/*
* If this page consists of all duplicate keys (hikey and first
* key on the page have the same value), then we don't need to
* step right.
*
* NOTE for multi-column indices: we may do scan using
* keys not for all attrs. But we handle duplicates
* using all attrs in _bt_insert/_bt_spool code.
* And so we've to compare scankey with _last_ item
* on this page to do not lose "good" tuples if number
* of attrs > keysize. Example: (2,0) - last items on
* this page, (2,1) - first item on next page (hikey),
* our scankey is x = 2. Scankey >= (2,1) because of
* we compare first attrs only, but we shouldn't to move
* right of here. - vadim 04/15/97
*/
if (PageGetMaxOffsetNumber(page) > P_HIKEY) {
if ( (offmax = PageGetMaxOffsetNumber(page)) > P_HIKEY)
{
itemid = PageGetItemId(page, P_FIRSTKEY);
if (_bt_skeycmp(rel, keysz, scankey, page, itemid,
BTEqualStrategyNumber)) {
/* break is for the "move right" while loop */
break;
}
else if ( natts > keysz )
{
itemid = PageGetItemId(page, offmax);
if (_bt_skeycmp(rel, keysz, scankey, page, itemid,
BTLessEqualStrategyNumber))
break;
}
}
/* step right one page */
@ -346,6 +368,7 @@ _bt_binsrch(Relation rel,
Page page;
BTPageOpaque opaque;
OffsetNumber low, mid, high;
int natts = rel->rd_rel->relnatts;
int result;
page = BufferGetPage(buf);
@ -379,55 +402,84 @@ _bt_binsrch(Relation rel,
else if (result < 0)
high = mid - 1;
else
return (_bt_firsteq(rel, itupdesc, page, keysz, scankey, mid));
{
mid = _bt_firsteq(rel, itupdesc, page, keysz, scankey, mid);
/*
* NOTE for multi-column indices: we may do scan using
* keys not for all attrs. But we handle duplicates using
* all attrs in _bt_insert/_bt_spool code. And so while
* searching on internal pages having number of attrs > keysize
* we want to point at the last item < the scankey, not at the
* first item = the scankey (!!!), and let _bt_moveright
* decide later whether to move right or not (see comments and
* example there). Note also that INSERTions are not affected
* by this code (natts == keysz). - vadim 04/15/97
*/
if ( natts == keysz || opaque->btpo_flags & BTP_LEAF )
return (mid);
low = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
if ( mid == low )
return (mid);
return (OffsetNumberPrev(mid));
}
}
/*
* We terminated because the endpoints got too close together. There
* are two cases to take care of.
*
* For non-insertion searches on internal pages, we want to point at
* the last key <, or first key =, the scankey on the page. This
* guarantees that we'll descend the tree correctly.
*
* For all other cases, we want to point at the first key >=
* the scankey on the page. This guarantees that scans and
* insertions will happen correctly.
*/
/*
* We terminated because the endpoints got too close together. There
* are two cases to take care of.
*
* For non-insertion searches on internal pages, we want to point at
* the last key <, or first key =, the scankey on the page. This
* guarantees that we'll descend the tree correctly.
* (NOTE comments above for multi-column indices).
*
* For all other cases, we want to point at the first key >=
* the scankey on the page. This guarantees that scans and
* insertions will happen correctly.
*/
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
if (!(opaque->btpo_flags & BTP_LEAF) && srchtype == BT_DESCENT) {
if (!(opaque->btpo_flags & BTP_LEAF) && srchtype == BT_DESCENT)
{ /*
* We want the last key <, or first key ==, the scan key.
*/
result = _bt_compare(rel, itupdesc, page, keysz, scankey, high);
if (result == 0)
{
mid = _bt_firsteq(rel, itupdesc, page, keysz, scankey, high);
/*
* We want the last key <, or first key ==, the scan key.
* If natts > keysz we want last item < the scan key.
* See comments above for multi-column indices.
*/
result = _bt_compare(rel, itupdesc, page, keysz, scankey, high);
if (result == 0) {
return (_bt_firsteq(rel, itupdesc, page, keysz, scankey, high));
} else if (result > 0) {
return (high);
} else {
return (low);
}
} else {
/* we want the first key >= the scan key */
result = _bt_compare(rel, itupdesc, page, keysz, scankey, low);
if (result <= 0) {
return (low);
} else {
if (low == high)
return (OffsetNumberNext(low));
result = _bt_compare(rel, itupdesc, page, keysz, scankey, high);
if (result <= 0)
return (high);
else
return (OffsetNumberNext(high));
}
if ( natts == keysz )
return (mid);
low = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
if ( mid == low )
return (mid);
return (OffsetNumberPrev(mid));
}
else if (result > 0)
return (high);
else
return (low);
}
else /* we want the first key >= the scan key */
{
result = _bt_compare(rel, itupdesc, page, keysz, scankey, low);
if (result <= 0)
return (low);
else
{
if (low == high)
return (OffsetNumberNext(low));
result = _bt_compare(rel, itupdesc, page, keysz, scankey, high);
if (result <= 0)
return (high);
else
return (OffsetNumberNext(high));
}
}
}
static OffsetNumber
@ -1107,7 +1159,7 @@ _bt_twostep(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
while (offnum <= maxoff) {
itemid = PageGetItemId(page, offnum);
btitem = (BTItem) PageGetItem(page, itemid);
if (btitem->bti_oid == svitem->bti_oid) {
if ( BTItemSame (btitem, svitem) ) {
pfree(svitem);
ItemPointerSet(current, blkno, offnum);
return (false);

View File

@ -5,7 +5,7 @@
*
*
* IDENTIFICATION
* $Id: nbtsort.c,v 1.13 1997/03/24 08:48:15 vadim Exp $
* $Id: nbtsort.c,v 1.14 1997/04/16 01:48:27 vadim Exp $
*
* NOTES
*
@ -1021,9 +1021,13 @@ _bt_buildadd(Relation index, void *pstate, BTItem bti, int flags)
}
#endif /* FASTBUILD_DEBUG && FASTBUILD_MERGE */
#endif
if (last_bti == (BTItem) NULL) {
if (last_bti == (BTItem) NULL)
{
first_off = P_FIRSTKEY;
} else if (!_bt_itemcmp(index, _bt_nattr, bti, last_bti, BTEqualStrategyNumber)) {
}
else if ( !_bt_itemcmp(index, _bt_nattr,
bti, last_bti, BTEqualStrategyNumber) )
{
first_off = off;
}
last_off = off;
@ -1061,7 +1065,7 @@ _bt_uppershutdown(Relation index, BTPageState *state)
if (s->btps_doupper) {
if (s->btps_next == (BTPageState *) NULL) {
opaque->btpo_flags |= BTP_ROOT;
_bt_metaproot(index, blkno);
_bt_metaproot(index, blkno, s->btps_level + 1);
} else {
bti = _bt_minitem(s->btps_page, blkno, 0);
(void) _bt_buildadd(index, s->btps_next, bti, 0);

View File

@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtutils.c,v 1.9 1997/03/24 08:48:16 vadim Exp $
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtutils.c,v 1.10 1997/04/16 01:48:29 vadim Exp $
*
*-------------------------------------------------------------------------
*/
@ -297,7 +297,9 @@ _bt_formitem(IndexTuple itup)
btitem = (BTItem) palloc(nbytes_btitem);
memmove((char *) &(btitem->bti_itup), (char *) itup, tuplen);
#ifndef BTREE_VERSION_1
btitem->bti_oid = newoid();
#endif
return (btitem);
}